1 /*-
2 * Copyright (c) 1990, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #define _DEFAULT_SOURCE
34 #include <sys/param.h>
35 #if defined(LIBC_SCCS) && !defined(lint)
36 static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94";
37 #endif /* LIBC_SCCS and not lint */
38 #include <sys/cdefs.h>
39
40 /*
41 * PACKAGE: hash
42 * DESCRIPTION:
43 * Big key/data handling for the hashing package.
44 *
45 * ROUTINES:
46 * External
47 * __big_keydata
48 * __big_split
49 * __big_insert
50 * __big_return
51 * __big_delete
52 * __find_last_page
53 * Internal
54 * collect_key
55 * collect_data
56 */
57
58 #include <sys/param.h>
59
60 #include <errno.h>
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <string.h>
64
65 #ifdef DEBUG
66 #include <assert.h>
67 #endif
68
69 #include "db_local.h"
70 #include "hash.h"
71 #include "page.h"
72 #include "extern.h"
73
74 static size_t collect_key(HTAB *, BUFHEAD *, size_t, DBT *, int);
75 static size_t collect_data(HTAB *, BUFHEAD *, size_t, int);
76
77 /*
78 * Big_insert
79 *
80 * You need to do an insert and the key/data pair is too big
81 *
82 * Returns:
83 * 0 ==> OK
84 *-1 ==> ERROR
85 */
86 extern int
__big_insert(HTAB * hashp,BUFHEAD * bufp,const DBT * key,const DBT * val)87 __big_insert(HTAB *hashp,
88 BUFHEAD *bufp,
89 const DBT *key,
90 const DBT *val)
91 {
92 __uint16_t *p;
93 size_t key_size, n;
94 size_t val_size;
95 __uint16_t space, move_bytes, off;
96 char *cp, *key_data, *val_data;
97
98 cp = bufp->page; /* Character pointer of p. */
99 p = (__uint16_t *)cp;
100
101 key_data = (char *)key->data;
102 key_size = key->size;
103 val_data = (char *)val->data;
104 val_size = val->size;
105
106 /* First move the Key */
107 for (space = FREESPACE(p) - BIGOVERHEAD; key_size;
108 space = FREESPACE(p) - BIGOVERHEAD) {
109 move_bytes = MIN(space, key_size);
110 off = OFFSET(p) - move_bytes;
111 memmove(cp + off, key_data, move_bytes);
112 key_size -= move_bytes;
113 key_data += move_bytes;
114 n = p[0];
115 p[++n] = off;
116 p[0] = ++n;
117 FREESPACE(p) = off - PAGE_META(n);
118 OFFSET(p) = off;
119 p[n] = PARTIAL_KEY;
120 bufp = __add_ovflpage(hashp, bufp);
121 if (!bufp)
122 return (-1);
123 n = p[0];
124 if (!key_size) {
125 if (FREESPACE(p)) {
126 move_bytes = MIN(FREESPACE(p), val_size);
127 off = OFFSET(p) - move_bytes;
128 p[n] = off;
129 memmove(cp + off, val_data, move_bytes);
130 val_data += move_bytes;
131 val_size -= move_bytes;
132 p[n - 2] = FULL_KEY_DATA;
133 FREESPACE(p) = FREESPACE(p) - move_bytes;
134 OFFSET(p) = off;
135 } else
136 p[n - 2] = FULL_KEY;
137 }
138 p = (__uint16_t *)bufp->page;
139 cp = bufp->page;
140 bufp->flags |= BUF_MOD;
141 }
142
143 /* Now move the data */
144 for (space = FREESPACE(p) - BIGOVERHEAD; val_size;
145 space = FREESPACE(p) - BIGOVERHEAD) {
146 move_bytes = MIN(space, val_size);
147 /*
148 * Here's the hack to make sure that if the data ends on the
149 * same page as the key ends, FREESPACE is at least one.
150 */
151 if (space == val_size && val_size == val->size)
152 move_bytes--;
153 off = OFFSET(p) - move_bytes;
154 memmove(cp + off, val_data, move_bytes);
155 val_size -= move_bytes;
156 val_data += move_bytes;
157 n = p[0];
158 p[++n] = off;
159 p[0] = ++n;
160 FREESPACE(p) = off - PAGE_META(n);
161 OFFSET(p) = off;
162 if (val_size) {
163 p[n] = FULL_KEY;
164 bufp = __add_ovflpage(hashp, bufp);
165 if (!bufp)
166 return (-1);
167 cp = bufp->page;
168 p = (__uint16_t *)cp;
169 } else
170 p[n] = FULL_KEY_DATA;
171 bufp->flags |= BUF_MOD;
172 }
173 return (0);
174 }
175
176 /*
177 * Called when bufp's page contains a partial key (index should be 1)
178 *
179 * All pages in the big key/data pair except bufp are freed. We cannot
180 * free bufp because the page pointing to it is lost and we can't get rid
181 * of its pointer.
182 *
183 * Returns:
184 * 0 => OK
185 *-1 => ERROR
186 */
187 extern int
__big_delete(HTAB * hashp,BUFHEAD * bufp)188 __big_delete(HTAB *hashp,
189 BUFHEAD *bufp)
190 {
191 BUFHEAD *last_bfp, *rbufp;
192 __uint16_t *bp, pageno;
193 int key_done, n;
194
195 rbufp = bufp;
196 last_bfp = NULL;
197 bp = (__uint16_t *)bufp->page;
198 pageno = 0;
199 key_done = 0;
200
201 while (!key_done || (bp[2] != FULL_KEY_DATA)) {
202 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA)
203 key_done = 1;
204
205 /*
206 * If there is freespace left on a FULL_KEY_DATA page, then
207 * the data is short and fits entirely on this page, and this
208 * is the last page.
209 */
210 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp))
211 break;
212 pageno = bp[bp[0] - 1];
213 rbufp->flags |= BUF_MOD;
214 rbufp = __get_buf(hashp, pageno, rbufp, 0);
215 if (last_bfp)
216 __free_ovflpage(hashp, last_bfp);
217 last_bfp = rbufp;
218 if (!rbufp)
219 return (-1); /* Error. */
220 bp = (__uint16_t *)rbufp->page;
221 }
222
223 /*
224 * If we get here then rbufp points to the last page of the big
225 * key/data pair. Bufp points to the first one -- it should now be
226 * empty pointing to the next page after this pair. Can't free it
227 * because we don't have the page pointing to it.
228 */
229
230 /* This is information from the last page of the pair. */
231 n = bp[0];
232 pageno = bp[n - 1];
233
234 /* Now, bp is the first page of the pair. */
235 bp = (__uint16_t *)bufp->page;
236 if (n > 2) {
237 /* There is an overflow page. */
238 bp[1] = pageno;
239 bp[2] = OVFLPAGE;
240 bufp->ovfl = rbufp->ovfl;
241 } else
242 /* This is the last page. */
243 bufp->ovfl = NULL;
244 n -= 2;
245 bp[0] = n;
246 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n);
247 OFFSET(bp) = hashp->BSIZE - 1;
248
249 bufp->flags |= BUF_MOD;
250 if (rbufp)
251 __free_ovflpage(hashp, rbufp);
252 if (last_bfp != rbufp)
253 __free_ovflpage(hashp, last_bfp);
254
255 hashp->NKEYS--;
256 return (0);
257 }
258 /*
259 * Returns:
260 * 0 = key not found
261 * -1 = get next overflow page
262 * -2 means key not found and this is big key/data
263 * -3 error
264 */
265 extern int
__find_bigpair(HTAB * hashp,BUFHEAD * bufp,int ndx,char * key,int size)266 __find_bigpair(HTAB *hashp,
267 BUFHEAD *bufp,
268 int ndx,
269 char *key,
270 int size)
271 {
272 __uint16_t *bp;
273 char *p;
274 int ksize;
275 __uint16_t bytes;
276 char *kkey;
277
278 bp = (__uint16_t *)bufp->page;
279 p = bufp->page;
280 ksize = size;
281 kkey = key;
282
283 for (bytes = hashp->BSIZE - bp[ndx];
284 (int) bytes <= size && bp[ndx + 1] == PARTIAL_KEY;
285 bytes = hashp->BSIZE - bp[ndx]) {
286 if (memcmp(p + bp[ndx], kkey, bytes))
287 return (-2);
288 kkey += bytes;
289 ksize -= bytes;
290 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0);
291 if (!bufp)
292 return (-3);
293 p = bufp->page;
294 bp = (__uint16_t *)p;
295 ndx = 1;
296 }
297
298 if ((int) bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) {
299 #ifdef HASH_STATISTICS
300 ++hash_collisions;
301 #endif
302 return (-2);
303 } else
304 return (ndx);
305 }
306
307 /*
308 * Given the buffer pointer of the first overflow page of a big pair,
309 * find the end of the big pair
310 *
311 * This will set bpp to the buffer header of the last page of the big pair.
312 * It will return the pageno of the overflow page following the last page
313 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the
314 * bucket)
315 */
316 extern __uint16_t
__find_last_page(HTAB * hashp,BUFHEAD ** bpp)317 __find_last_page(HTAB *hashp,
318 BUFHEAD **bpp)
319 {
320 BUFHEAD *bufp;
321 __uint16_t *bp, pageno;
322 int n;
323
324 bufp = *bpp;
325 bp = (__uint16_t *)bufp->page;
326 for (;;) {
327 n = bp[0];
328
329 /*
330 * This is the last page if: the tag is FULL_KEY_DATA and
331 * either only 2 entries OVFLPAGE marker is explicit there
332 * is freespace on the page.
333 */
334 if (bp[2] == FULL_KEY_DATA &&
335 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp))))
336 break;
337
338 pageno = bp[n - 1];
339 bufp = __get_buf(hashp, pageno, bufp, 0);
340 if (!bufp)
341 return (0); /* Need to indicate an error! */
342 bp = (__uint16_t *)bufp->page;
343 }
344
345 *bpp = bufp;
346 if (bp[0] > 2)
347 return (bp[3]);
348 else
349 return (0);
350 }
351
352 /*
353 * Return the data for the key/data pair that begins on this page at this
354 * index (index should always be 1).
355 */
356 extern int
__big_return(HTAB * hashp,BUFHEAD * bufp,int ndx,DBT * val,int set_current)357 __big_return(HTAB *hashp,
358 BUFHEAD *bufp,
359 int ndx,
360 DBT *val,
361 int set_current)
362 {
363 BUFHEAD *save_p;
364 __uint16_t *bp, len, off, save_addr;
365 char *tp;
366
367 bp = (__uint16_t *)bufp->page;
368 while (bp[ndx + 1] == PARTIAL_KEY) {
369 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
370 if (!bufp)
371 return (-1);
372 bp = (__uint16_t *)bufp->page;
373 ndx = 1;
374 }
375
376 if (bp[ndx + 1] == FULL_KEY) {
377 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
378 if (!bufp)
379 return (-1);
380 bp = (__uint16_t *)bufp->page;
381 save_p = bufp;
382 save_addr = save_p->addr;
383 off = bp[1];
384 len = 0;
385 } else
386 if (!FREESPACE(bp)) {
387 /*
388 * This is a hack. We can't distinguish between
389 * FULL_KEY_DATA that contains complete data or
390 * incomplete data, so we require that if the data
391 * is complete, there is at least 1 byte of free
392 * space left.
393 */
394 off = bp[bp[0]];
395 len = bp[1] - off;
396 save_p = bufp;
397 save_addr = bufp->addr;
398 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
399 if (!bufp)
400 return (-1);
401 bp = (__uint16_t *)bufp->page;
402 } else {
403 /* The data is all on one page. */
404 tp = (char *)bp;
405 off = bp[bp[0]];
406 val->data = (u_char *)tp + off;
407 val->size = bp[1] - off;
408 if (set_current) {
409 if (bp[0] == 2) { /* No more buckets in
410 * chain */
411 hashp->cpage = NULL;
412 hashp->cbucket++;
413 hashp->cndx = 1;
414 } else {
415 hashp->cpage = __get_buf(hashp,
416 bp[bp[0] - 1], bufp, 0);
417 if (!hashp->cpage)
418 return (-1);
419 hashp->cndx = 1;
420 if (!((__uint16_t *)
421 hashp->cpage->page)[0]) {
422 hashp->cbucket++;
423 hashp->cpage = NULL;
424 }
425 }
426 }
427 return (0);
428 }
429
430 val->size = collect_data(hashp, bufp, len, set_current);
431 if (val->size == (size_t) -1)
432 return (-1);
433 if (save_p->addr != save_addr) {
434 /* We are pretty short on buffers. */
435 errno = EINVAL; /* OUT OF BUFFERS */
436 return (-1);
437 }
438 memmove(hashp->tmp_buf, (save_p->page) + off, len);
439 val->data = (u_char *)hashp->tmp_buf;
440 return (0);
441 }
442 /*
443 * Count how big the total datasize is by recursing through the pages. Then
444 * allocate a buffer and copy the data as you recurse up.
445 */
446 static size_t
collect_data(HTAB * hashp,BUFHEAD * bufp,size_t len,int set)447 collect_data(HTAB *hashp,
448 BUFHEAD *bufp,
449 size_t len,
450 int set)
451 {
452 __uint16_t *bp;
453 char *p;
454 BUFHEAD *xbp;
455 __uint16_t save_addr;
456 size_t mylen, totlen;
457
458 p = bufp->page;
459 bp = (__uint16_t *)p;
460 mylen = hashp->BSIZE - bp[1];
461 save_addr = bufp->addr;
462
463 if (bp[2] == FULL_KEY_DATA) { /* End of Data */
464 totlen = len + mylen;
465 if (hashp->tmp_buf)
466 free(hashp->tmp_buf);
467 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL)
468 return (-1);
469 if (set) {
470 hashp->cndx = 1;
471 if (bp[0] == 2) { /* No more buckets in chain */
472 hashp->cpage = NULL;
473 hashp->cbucket++;
474 } else {
475 hashp->cpage =
476 __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
477 if (!hashp->cpage)
478 return (-1);
479 else if (!((__uint16_t *)hashp->cpage->page)[0]) {
480 hashp->cbucket++;
481 hashp->cpage = NULL;
482 }
483 }
484 }
485 } else {
486 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
487 if (!xbp)
488 return (-1);
489 totlen = collect_data(hashp, xbp, len + mylen, set);
490 if (totlen < 1 || totlen == (size_t) -1)
491 return (-1);
492 }
493 if (bufp->addr != save_addr) {
494 errno = EINVAL; /* Out of buffers. */
495 return (-1);
496 }
497 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen);
498 return (totlen);
499 }
500
501 /*
502 * Fill in the key and data for this big pair.
503 */
504 extern int
__big_keydata(HTAB * hashp,BUFHEAD * bufp,DBT * key,DBT * val,int set)505 __big_keydata(HTAB *hashp,
506 BUFHEAD *bufp,
507 DBT *key,
508 DBT *val,
509 int set)
510 {
511 key->size = collect_key(hashp, bufp, 0, val, set);
512 if (key->size == (size_t) -1)
513 return (-1);
514 key->data = (u_char *)hashp->tmp_key;
515 return (0);
516 }
517
518 /*
519 * Count how big the total key size is by recursing through the pages. Then
520 * collect the data, allocate a buffer and copy the key as you recurse up.
521 */
522 static size_t
collect_key(HTAB * hashp,BUFHEAD * bufp,size_t len,DBT * val,int set)523 collect_key(HTAB *hashp,
524 BUFHEAD *bufp,
525 size_t len,
526 DBT *val,
527 int set)
528 {
529 BUFHEAD *xbp;
530 char *p;
531 size_t mylen, totlen;
532 __uint16_t *bp, save_addr;
533
534 p = bufp->page;
535 bp = (__uint16_t *)p;
536 mylen = hashp->BSIZE - bp[1];
537
538 save_addr = bufp->addr;
539 totlen = len + mylen;
540 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */
541 if (hashp->tmp_key != NULL)
542 free(hashp->tmp_key);
543 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL)
544 return (-1);
545 if (__big_return(hashp, bufp, 1, val, set))
546 return (-1);
547 } else {
548 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
549 if (!xbp || ((totlen =
550 collect_key(hashp, xbp, totlen, val, set)) < 1))
551 return (-1);
552 }
553 if (bufp->addr != save_addr) {
554 errno = EINVAL; /* MIS -- OUT OF BUFFERS */
555 return (-1);
556 }
557 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen);
558 return (totlen);
559 }
560
561 /*
562 * Returns:
563 * 0 => OK
564 * -1 => error
565 */
566 extern int
__big_split(HTAB * hashp,BUFHEAD * op,BUFHEAD * np,BUFHEAD * big_keyp,int addr,__uint32_t obucket,SPLIT_RETURN * ret)567 __big_split(
568 HTAB *hashp,
569 BUFHEAD *op, /* Pointer to where to put keys that go in old bucket */
570 BUFHEAD *np, /* Pointer to new bucket page */
571 BUFHEAD *big_keyp, /* Pointer to first page containing the big key/data */
572 int addr, /* Address of big_keyp */
573 __uint32_t obucket, /* Old Bucket */
574 SPLIT_RETURN *ret)
575 {
576 BUFHEAD *tmpp;
577 __uint16_t *tp;
578 BUFHEAD *bp;
579 DBT key, val;
580 __uint32_t change;
581 __uint16_t free_space, n, off;
582
583 bp = big_keyp;
584
585 /* Now figure out where the big key/data goes */
586 if (__big_keydata(hashp, big_keyp, &key, &val, 0))
587 return (-1);
588 change = (__call_hash(hashp, key.data, key.size) != obucket);
589
590 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) {
591 if (!(ret->nextp =
592 __get_buf(hashp, ret->next_addr, big_keyp, 0)))
593 return (-1);;
594 } else
595 ret->nextp = NULL;
596
597 /* Now make one of np/op point to the big key/data pair */
598 #ifdef DEBUG
599 assert(np->ovfl == NULL);
600 #endif
601 if (change)
602 tmpp = np;
603 else
604 tmpp = op;
605
606 tmpp->flags |= BUF_MOD;
607 #ifdef DEBUG1
608 (void)fprintf(stderr,
609 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr,
610 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0));
611 #endif
612 tmpp->ovfl = bp; /* one of op/np point to big_keyp */
613 tp = (__uint16_t *)tmpp->page;
614 #ifdef DEBUG
615 assert(FREESPACE(tp) >= OVFLSIZE);
616 #endif
617 n = tp[0];
618 off = OFFSET(tp);
619 free_space = FREESPACE(tp);
620 tp[++n] = (__uint16_t)addr;
621 tp[++n] = OVFLPAGE;
622 tp[0] = n;
623 OFFSET(tp) = off;
624 FREESPACE(tp) = free_space - OVFLSIZE;
625
626 /*
627 * Finally, set the new and old return values. BIG_KEYP contains a
628 * pointer to the last page of the big key_data pair. Make sure that
629 * big_keyp has no following page (2 elements) or create an empty
630 * following page.
631 */
632
633 ret->newp = np;
634 ret->oldp = op;
635
636 tp = (__uint16_t *)big_keyp->page;
637 big_keyp->flags |= BUF_MOD;
638 if (tp[0] > 2) {
639 /*
640 * There may be either one or two offsets on this page. If
641 * there is one, then the overflow page is linked on normally
642 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains
643 * the second offset and needs to get stuffed in after the
644 * next overflow page is added.
645 */
646 n = tp[4];
647 free_space = FREESPACE(tp);
648 off = OFFSET(tp);
649 tp[0] -= 2;
650 FREESPACE(tp) = free_space + OVFLSIZE;
651 OFFSET(tp) = off;
652 tmpp = __add_ovflpage(hashp, big_keyp);
653 if (!tmpp)
654 return (-1);
655 tp[4] = n;
656 } else
657 tmpp = big_keyp;
658
659 if (change)
660 ret->newp = tmpp;
661 else
662 ret->oldp = tmpp;
663 return (0);
664 }
665