1 /*-
2 * Copyright (c) 1990, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #define _DEFAULT_SOURCE
34 #include <sys/param.h>
35 #if defined(LIBC_SCCS) && !defined(lint)
36 static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94";
37 #endif /* LIBC_SCCS and not lint */
38
39 /*
40 * PACKAGE: hash
41 * DESCRIPTION:
42 * Big key/data handling for the hashing package.
43 *
44 * ROUTINES:
45 * External
46 * __big_keydata
47 * __big_split
48 * __big_insert
49 * __big_return
50 * __big_delete
51 * __find_last_page
52 * Internal
53 * collect_key
54 * collect_data
55 */
56
57 #include <sys/param.h>
58
59 #include <errno.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <string.h>
63
64 #ifdef DEBUG
65 #include <assert.h>
66 #endif
67
68 #include "db_local.h"
69 #include "hash.h"
70 #include "page.h"
71 #include "extern.h"
72
73 static size_t collect_key(HTAB *, BUFHEAD *, size_t, DBT *, int);
74 static size_t collect_data(HTAB *, BUFHEAD *, size_t, int);
75
76 /*
77 * Big_insert
78 *
79 * You need to do an insert and the key/data pair is too big
80 *
81 * Returns:
82 * 0 ==> OK
83 *-1 ==> ERROR
84 */
85 extern int
__big_insert(HTAB * hashp,BUFHEAD * bufp,const DBT * key,const DBT * val)86 __big_insert(HTAB *hashp,
87 BUFHEAD *bufp,
88 const DBT *key,
89 const DBT *val)
90 {
91 __uint16_t *p;
92 size_t key_size, n;
93 size_t val_size;
94 __uint16_t space, move_bytes, off;
95 char *cp, *key_data, *val_data;
96
97 cp = bufp->page; /* Character pointer of p. */
98 p = (__uint16_t *)cp;
99
100 key_data = (char *)key->data;
101 key_size = key->size;
102 val_data = (char *)val->data;
103 val_size = val->size;
104
105 /* First move the Key */
106 for (space = FREESPACE(p) - BIGOVERHEAD; key_size;
107 space = FREESPACE(p) - BIGOVERHEAD) {
108 move_bytes = MIN(space, key_size);
109 off = OFFSET(p) - move_bytes;
110 memmove(cp + off, key_data, move_bytes);
111 key_size -= move_bytes;
112 key_data += move_bytes;
113 n = p[0];
114 p[++n] = off;
115 p[0] = ++n;
116 FREESPACE(p) = off - PAGE_META(n);
117 OFFSET(p) = off;
118 p[n] = PARTIAL_KEY;
119 bufp = __add_ovflpage(hashp, bufp);
120 if (!bufp)
121 return (-1);
122 n = p[0];
123 if (!key_size) {
124 if (FREESPACE(p)) {
125 move_bytes = MIN(FREESPACE(p), val_size);
126 off = OFFSET(p) - move_bytes;
127 p[n] = off;
128 memmove(cp + off, val_data, move_bytes);
129 val_data += move_bytes;
130 val_size -= move_bytes;
131 p[n - 2] = FULL_KEY_DATA;
132 FREESPACE(p) = FREESPACE(p) - move_bytes;
133 OFFSET(p) = off;
134 } else
135 p[n - 2] = FULL_KEY;
136 }
137 p = (__uint16_t *)bufp->page;
138 cp = bufp->page;
139 bufp->flags |= BUF_MOD;
140 }
141
142 /* Now move the data */
143 for (space = FREESPACE(p) - BIGOVERHEAD; val_size;
144 space = FREESPACE(p) - BIGOVERHEAD) {
145 move_bytes = MIN(space, val_size);
146 /*
147 * Here's the hack to make sure that if the data ends on the
148 * same page as the key ends, FREESPACE is at least one.
149 */
150 if (space == val_size && val_size == val->size)
151 move_bytes--;
152 off = OFFSET(p) - move_bytes;
153 memmove(cp + off, val_data, move_bytes);
154 val_size -= move_bytes;
155 val_data += move_bytes;
156 n = p[0];
157 p[++n] = off;
158 p[0] = ++n;
159 FREESPACE(p) = off - PAGE_META(n);
160 OFFSET(p) = off;
161 if (val_size) {
162 p[n] = FULL_KEY;
163 bufp = __add_ovflpage(hashp, bufp);
164 if (!bufp)
165 return (-1);
166 cp = bufp->page;
167 p = (__uint16_t *)cp;
168 } else
169 p[n] = FULL_KEY_DATA;
170 bufp->flags |= BUF_MOD;
171 }
172 return (0);
173 }
174
175 /*
176 * Called when bufp's page contains a partial key (index should be 1)
177 *
178 * All pages in the big key/data pair except bufp are freed. We cannot
179 * free bufp because the page pointing to it is lost and we can't get rid
180 * of its pointer.
181 *
182 * Returns:
183 * 0 => OK
184 *-1 => ERROR
185 */
186 extern int
__big_delete(HTAB * hashp,BUFHEAD * bufp)187 __big_delete(HTAB *hashp,
188 BUFHEAD *bufp)
189 {
190 BUFHEAD *last_bfp, *rbufp;
191 __uint16_t *bp, pageno;
192 int key_done, n;
193
194 rbufp = bufp;
195 last_bfp = NULL;
196 bp = (__uint16_t *)bufp->page;
197 pageno = 0;
198 key_done = 0;
199
200 while (!key_done || (bp[2] != FULL_KEY_DATA)) {
201 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA)
202 key_done = 1;
203
204 /*
205 * If there is freespace left on a FULL_KEY_DATA page, then
206 * the data is short and fits entirely on this page, and this
207 * is the last page.
208 */
209 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp))
210 break;
211 pageno = bp[bp[0] - 1];
212 rbufp->flags |= BUF_MOD;
213 rbufp = __get_buf(hashp, pageno, rbufp, 0);
214 if (last_bfp)
215 __free_ovflpage(hashp, last_bfp);
216 last_bfp = rbufp;
217 if (!rbufp)
218 return (-1); /* Error. */
219 bp = (__uint16_t *)rbufp->page;
220 }
221
222 /*
223 * If we get here then rbufp points to the last page of the big
224 * key/data pair. Bufp points to the first one -- it should now be
225 * empty pointing to the next page after this pair. Can't free it
226 * because we don't have the page pointing to it.
227 */
228
229 /* This is information from the last page of the pair. */
230 n = bp[0];
231 pageno = bp[n - 1];
232
233 /* Now, bp is the first page of the pair. */
234 bp = (__uint16_t *)bufp->page;
235 if (n > 2) {
236 /* There is an overflow page. */
237 bp[1] = pageno;
238 bp[2] = OVFLPAGE;
239 bufp->ovfl = rbufp->ovfl;
240 } else
241 /* This is the last page. */
242 bufp->ovfl = NULL;
243 n -= 2;
244 bp[0] = n;
245 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n);
246 OFFSET(bp) = hashp->BSIZE - 1;
247
248 bufp->flags |= BUF_MOD;
249 if (rbufp)
250 __free_ovflpage(hashp, rbufp);
251 if (last_bfp != rbufp)
252 __free_ovflpage(hashp, last_bfp);
253
254 hashp->NKEYS--;
255 return (0);
256 }
257 /*
258 * Returns:
259 * 0 = key not found
260 * -1 = get next overflow page
261 * -2 means key not found and this is big key/data
262 * -3 error
263 */
264 extern int
__find_bigpair(HTAB * hashp,BUFHEAD * bufp,int ndx,char * key,int size)265 __find_bigpair(HTAB *hashp,
266 BUFHEAD *bufp,
267 int ndx,
268 char *key,
269 int size)
270 {
271 __uint16_t *bp;
272 char *p;
273 int ksize;
274 __uint16_t bytes;
275 char *kkey;
276
277 bp = (__uint16_t *)bufp->page;
278 p = bufp->page;
279 ksize = size;
280 kkey = key;
281
282 for (bytes = hashp->BSIZE - bp[ndx];
283 (int) bytes <= size && bp[ndx + 1] == PARTIAL_KEY;
284 bytes = hashp->BSIZE - bp[ndx]) {
285 if (memcmp(p + bp[ndx], kkey, bytes))
286 return (-2);
287 kkey += bytes;
288 ksize -= bytes;
289 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0);
290 if (!bufp)
291 return (-3);
292 p = bufp->page;
293 bp = (__uint16_t *)p;
294 ndx = 1;
295 }
296
297 if ((int) bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) {
298 #ifdef HASH_STATISTICS
299 ++hash_collisions;
300 #endif
301 return (-2);
302 } else
303 return (ndx);
304 }
305
306 /*
307 * Given the buffer pointer of the first overflow page of a big pair,
308 * find the end of the big pair
309 *
310 * This will set bpp to the buffer header of the last page of the big pair.
311 * It will return the pageno of the overflow page following the last page
312 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the
313 * bucket)
314 */
315 extern __uint16_t
__find_last_page(HTAB * hashp,BUFHEAD ** bpp)316 __find_last_page(HTAB *hashp,
317 BUFHEAD **bpp)
318 {
319 BUFHEAD *bufp;
320 __uint16_t *bp, pageno;
321 int n;
322
323 bufp = *bpp;
324 bp = (__uint16_t *)bufp->page;
325 for (;;) {
326 n = bp[0];
327
328 /*
329 * This is the last page if: the tag is FULL_KEY_DATA and
330 * either only 2 entries OVFLPAGE marker is explicit there
331 * is freespace on the page.
332 */
333 if (bp[2] == FULL_KEY_DATA &&
334 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp))))
335 break;
336
337 pageno = bp[n - 1];
338 bufp = __get_buf(hashp, pageno, bufp, 0);
339 if (!bufp)
340 return (0); /* Need to indicate an error! */
341 bp = (__uint16_t *)bufp->page;
342 }
343
344 *bpp = bufp;
345 if (bp[0] > 2)
346 return (bp[3]);
347 else
348 return (0);
349 }
350
351 /*
352 * Return the data for the key/data pair that begins on this page at this
353 * index (index should always be 1).
354 */
355 extern int
__big_return(HTAB * hashp,BUFHEAD * bufp,int ndx,DBT * val,int set_current)356 __big_return(HTAB *hashp,
357 BUFHEAD *bufp,
358 int ndx,
359 DBT *val,
360 int set_current)
361 {
362 BUFHEAD *save_p;
363 __uint16_t *bp, len, off, save_addr;
364 char *tp;
365
366 bp = (__uint16_t *)bufp->page;
367 while (bp[ndx + 1] == PARTIAL_KEY) {
368 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
369 if (!bufp)
370 return (-1);
371 bp = (__uint16_t *)bufp->page;
372 ndx = 1;
373 }
374
375 if (bp[ndx + 1] == FULL_KEY) {
376 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
377 if (!bufp)
378 return (-1);
379 bp = (__uint16_t *)bufp->page;
380 save_p = bufp;
381 save_addr = save_p->addr;
382 off = bp[1];
383 len = 0;
384 } else
385 if (!FREESPACE(bp)) {
386 /*
387 * This is a hack. We can't distinguish between
388 * FULL_KEY_DATA that contains complete data or
389 * incomplete data, so we require that if the data
390 * is complete, there is at least 1 byte of free
391 * space left.
392 */
393 off = bp[bp[0]];
394 len = bp[1] - off;
395 save_p = bufp;
396 save_addr = bufp->addr;
397 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
398 if (!bufp)
399 return (-1);
400 bp = (__uint16_t *)bufp->page;
401 } else {
402 /* The data is all on one page. */
403 tp = (char *)bp;
404 off = bp[bp[0]];
405 val->data = (u_char *)tp + off;
406 val->size = bp[1] - off;
407 if (set_current) {
408 if (bp[0] == 2) { /* No more buckets in
409 * chain */
410 hashp->cpage = NULL;
411 hashp->cbucket++;
412 hashp->cndx = 1;
413 } else {
414 hashp->cpage = __get_buf(hashp,
415 bp[bp[0] - 1], bufp, 0);
416 if (!hashp->cpage)
417 return (-1);
418 hashp->cndx = 1;
419 if (!((__uint16_t *)
420 hashp->cpage->page)[0]) {
421 hashp->cbucket++;
422 hashp->cpage = NULL;
423 }
424 }
425 }
426 return (0);
427 }
428
429 val->size = collect_data(hashp, bufp, len, set_current);
430 if (val->size == (size_t) -1)
431 return (-1);
432 if (save_p->addr != save_addr) {
433 /* We are pretty short on buffers. */
434 errno = EINVAL; /* OUT OF BUFFERS */
435 return (-1);
436 }
437 memmove(hashp->tmp_buf, (save_p->page) + off, len);
438 val->data = (u_char *)hashp->tmp_buf;
439 return (0);
440 }
441 /*
442 * Count how big the total datasize is by recursing through the pages. Then
443 * allocate a buffer and copy the data as you recurse up.
444 */
445 static size_t
collect_data(HTAB * hashp,BUFHEAD * bufp,size_t len,int set)446 collect_data(HTAB *hashp,
447 BUFHEAD *bufp,
448 size_t len,
449 int set)
450 {
451 __uint16_t *bp;
452 char *p;
453 BUFHEAD *xbp;
454 __uint16_t save_addr;
455 size_t mylen, totlen;
456
457 p = bufp->page;
458 bp = (__uint16_t *)p;
459 mylen = hashp->BSIZE - bp[1];
460 save_addr = bufp->addr;
461
462 if (bp[2] == FULL_KEY_DATA) { /* End of Data */
463 totlen = len + mylen;
464 if (hashp->tmp_buf)
465 free(hashp->tmp_buf);
466 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL)
467 return (-1);
468 if (set) {
469 hashp->cndx = 1;
470 if (bp[0] == 2) { /* No more buckets in chain */
471 hashp->cpage = NULL;
472 hashp->cbucket++;
473 } else {
474 hashp->cpage =
475 __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
476 if (!hashp->cpage)
477 return (-1);
478 else if (!((__uint16_t *)hashp->cpage->page)[0]) {
479 hashp->cbucket++;
480 hashp->cpage = NULL;
481 }
482 }
483 }
484 } else {
485 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
486 if (!xbp)
487 return (-1);
488 totlen = collect_data(hashp, xbp, len + mylen, set);
489 if (totlen < 1 || totlen == (size_t) -1)
490 return (-1);
491 }
492 if (bufp->addr != save_addr) {
493 errno = EINVAL; /* Out of buffers. */
494 return (-1);
495 }
496 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen);
497 return (totlen);
498 }
499
500 /*
501 * Fill in the key and data for this big pair.
502 */
503 extern int
__big_keydata(HTAB * hashp,BUFHEAD * bufp,DBT * key,DBT * val,int set)504 __big_keydata(HTAB *hashp,
505 BUFHEAD *bufp,
506 DBT *key,
507 DBT *val,
508 int set)
509 {
510 key->size = collect_key(hashp, bufp, 0, val, set);
511 if (key->size == (size_t) -1)
512 return (-1);
513 key->data = (u_char *)hashp->tmp_key;
514 return (0);
515 }
516
517 /*
518 * Count how big the total key size is by recursing through the pages. Then
519 * collect the data, allocate a buffer and copy the key as you recurse up.
520 */
521 static size_t
collect_key(HTAB * hashp,BUFHEAD * bufp,size_t len,DBT * val,int set)522 collect_key(HTAB *hashp,
523 BUFHEAD *bufp,
524 size_t len,
525 DBT *val,
526 int set)
527 {
528 BUFHEAD *xbp;
529 char *p;
530 size_t mylen, totlen;
531 __uint16_t *bp, save_addr;
532
533 p = bufp->page;
534 bp = (__uint16_t *)p;
535 mylen = hashp->BSIZE - bp[1];
536
537 save_addr = bufp->addr;
538 totlen = len + mylen;
539 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */
540 if (hashp->tmp_key != NULL)
541 free(hashp->tmp_key);
542 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL)
543 return (-1);
544 if (__big_return(hashp, bufp, 1, val, set))
545 return (-1);
546 } else {
547 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
548 if (!xbp || ((totlen =
549 collect_key(hashp, xbp, totlen, val, set)) < 1))
550 return (-1);
551 }
552 if (bufp->addr != save_addr) {
553 errno = EINVAL; /* MIS -- OUT OF BUFFERS */
554 return (-1);
555 }
556 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen);
557 return (totlen);
558 }
559
560 /*
561 * Returns:
562 * 0 => OK
563 * -1 => error
564 */
565 extern int
__big_split(HTAB * hashp,BUFHEAD * op,BUFHEAD * np,BUFHEAD * big_keyp,int addr,__uint32_t obucket,SPLIT_RETURN * ret)566 __big_split(
567 HTAB *hashp,
568 BUFHEAD *op, /* Pointer to where to put keys that go in old bucket */
569 BUFHEAD *np, /* Pointer to new bucket page */
570 BUFHEAD *big_keyp, /* Pointer to first page containing the big key/data */
571 int addr, /* Address of big_keyp */
572 __uint32_t obucket, /* Old Bucket */
573 SPLIT_RETURN *ret)
574 {
575 BUFHEAD *tmpp;
576 __uint16_t *tp;
577 BUFHEAD *bp;
578 DBT key, val;
579 __uint32_t change;
580 __uint16_t free_space, n, off;
581
582 bp = big_keyp;
583
584 /* Now figure out where the big key/data goes */
585 if (__big_keydata(hashp, big_keyp, &key, &val, 0))
586 return (-1);
587 change = (__call_hash(hashp, key.data, key.size) != obucket);
588
589 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) {
590 if (!(ret->nextp =
591 __get_buf(hashp, ret->next_addr, big_keyp, 0)))
592 return (-1);;
593 } else
594 ret->nextp = NULL;
595
596 /* Now make one of np/op point to the big key/data pair */
597 #ifdef DEBUG
598 assert(np->ovfl == NULL);
599 #endif
600 if (change)
601 tmpp = np;
602 else
603 tmpp = op;
604
605 tmpp->flags |= BUF_MOD;
606 #ifdef DEBUG1
607 (void)fprintf(stderr,
608 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr,
609 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0));
610 #endif
611 tmpp->ovfl = bp; /* one of op/np point to big_keyp */
612 tp = (__uint16_t *)tmpp->page;
613 #ifdef DEBUG
614 assert(FREESPACE(tp) >= OVFLSIZE);
615 #endif
616 n = tp[0];
617 off = OFFSET(tp);
618 free_space = FREESPACE(tp);
619 tp[++n] = (__uint16_t)addr;
620 tp[++n] = OVFLPAGE;
621 tp[0] = n;
622 OFFSET(tp) = off;
623 FREESPACE(tp) = free_space - OVFLSIZE;
624
625 /*
626 * Finally, set the new and old return values. BIG_KEYP contains a
627 * pointer to the last page of the big key_data pair. Make sure that
628 * big_keyp has no following page (2 elements) or create an empty
629 * following page.
630 */
631
632 ret->newp = np;
633 ret->oldp = op;
634
635 tp = (__uint16_t *)big_keyp->page;
636 big_keyp->flags |= BUF_MOD;
637 if (tp[0] > 2) {
638 /*
639 * There may be either one or two offsets on this page. If
640 * there is one, then the overflow page is linked on normally
641 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains
642 * the second offset and needs to get stuffed in after the
643 * next overflow page is added.
644 */
645 n = tp[4];
646 free_space = FREESPACE(tp);
647 off = OFFSET(tp);
648 tp[0] -= 2;
649 FREESPACE(tp) = free_space + OVFLSIZE;
650 OFFSET(tp) = off;
651 tmpp = __add_ovflpage(hashp, big_keyp);
652 if (!tmpp)
653 return (-1);
654 tp[4] = n;
655 } else
656 tmpp = big_keyp;
657
658 if (change)
659 ret->newp = tmpp;
660 else
661 ret->oldp = tmpp;
662 return (0);
663 }
664