diff options
Diffstat (limited to 'db2/hash/hash_dup.c')
-rw-r--r-- | db2/hash/hash_dup.c | 544 |
1 files changed, 544 insertions, 0 deletions
diff --git a/db2/hash/hash_dup.c b/db2/hash/hash_dup.c new file mode 100644 index 0000000000..059eec6f92 --- /dev/null +++ b/db2/hash/hash_dup.c @@ -0,0 +1,544 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)hash_dup.c 10.5 (Sleepycat) 7/27/97"; +#endif /* not lint */ + +/* + * PACKAGE: hashing + * + * DESCRIPTION: + * Manipulation of duplicates for the hash package. + * + * ROUTINES: + * + * External + * __add_dup + * Internal + */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_swap.h" +#include "hash.h" + +static int __ham_check_move __P((HTAB *, HASH_CURSOR *, int32_t)); +static int __ham_dup_convert __P((HTAB *, HASH_CURSOR *)); +static int __ham_make_dup __P((const DBT *, DBT *d, void **, u_int32_t *)); + +/* + * Called from hash_access to add a duplicate key. nval is the new + * value that we want to add. The flags correspond to the flag values + * to cursor_put indicating where to add the new element. + * There are 4 cases. + * Case 1: The existing duplicate set already resides on a separate page. + * We can use common code for this. + * Case 2: The element is small enough to just be added to the existing set. + * Case 3: The element is large enough to be a big item, so we're going to + * have to push the set onto a new page. + * Case 4: The element is large enough to push the duplicate set onto a + * separate page. + * + * PUBLIC: int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int)); + */ +int +__ham_add_dup(hashp, hcp, nval, flags) + HTAB *hashp; + HASH_CURSOR *hcp; + DBT *nval; + int flags; +{ + DBT pval, tmp_val; + HKEYDATA *hk; + u_int32_t del_len, new_size; + int ret; + + if (flags == DB_CURRENT && hcp->dpgno == PGNO_INVALID) + del_len = hcp->dup_len; + else + del_len = 0; + + if ((ret = __ham_check_move(hashp, hcp, + (int32_t)DUP_SIZE(nval->size) - (int32_t)del_len)) != 0) + return (ret); + + /* + * Check if resulting duplicate set is going to need to go + * onto a separate duplicate page. If so, convert the + * duplicate set and add the new one. After conversion, + * hcp->dndx is the first free ndx or the index of the + * current pointer into the duplicate set. + */ + hk = H_PAIRDATA(hcp->pagep, hcp->bndx); + new_size = DUP_SIZE(nval->size) - del_len + LEN_HKEYDATA(hcp->pagep, + hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx)); + + /* + * We convert to off-page duplicates if the item is a big item, + * the addition of the new item will make the set large, or + * if there isn't enough room on this page to add the next item. + */ + if (hk->type != H_OFFDUP && + (hk->type == H_OFFPAGE || ISBIG(hashp, new_size) || + DUP_SIZE(nval->size) - del_len > P_FREESPACE(hcp->pagep))) { + + if ((ret = __ham_dup_convert(hashp, hcp)) != 0) + return (ret); + else + hk = H_PAIRDATA(hcp->pagep, hcp->bndx); + } + + /* There are two separate cases here: on page and off page. */ + if (hk->type != H_OFFDUP) { + if (hk->type != H_DUPLICATE) { + hk->type = H_DUPLICATE; + pval.flags = 0; + pval.data = hk->data; + pval.size = LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, + hcp->bndx); + if ((ret = __ham_make_dup(&pval, &tmp_val, &hcp->big_data, + &hcp->big_datalen)) != 0 || + (ret = __ham_replpair(hashp, hcp, &tmp_val, 1)) != 0) + return (ret); + } + + /* Now make the new entry a duplicate. */ + if ((ret = __ham_make_dup(nval, + &tmp_val, &hcp->big_data, &hcp->big_datalen)) != 0) + return (ret); + + tmp_val.dlen = 0; + switch (flags) { /* On page. */ + case DB_KEYFIRST: + tmp_val.doff = 0; + break; + case DB_KEYLAST: + tmp_val.doff = LEN_HDATA(hcp->pagep, + hashp->hdr->pagesize, hcp->bndx); + break; + case DB_CURRENT: + tmp_val.doff = hcp->dup_off; + tmp_val.dlen = DUP_SIZE(hcp->dup_len); + break; + case DB_BEFORE: + tmp_val.doff = hcp->dup_off; + break; + case DB_AFTER: + tmp_val.doff = hcp->dup_off + DUP_SIZE(hcp->dup_len); + break; + } + /* Add the duplicate. */ + ret = __ham_replpair(hashp, hcp, &tmp_val, 0); + if (ret == 0) + ret = __ham_dirty_page(hashp, hcp->pagep); + __ham_c_update(hashp, hcp, hcp->pgno, tmp_val.size, 1, 1); + return (ret); + } + + /* If we get here, then we're on duplicate pages. */ + if (hcp->dpgno == PGNO_INVALID) { + memcpy(&hcp->dpgno, + (u_int8_t *)hk + SSZ(HOFFDUP, pgno), sizeof(db_pgno_t)); + hcp->dndx = 0; + } + + switch (flags) { + case DB_KEYFIRST: + /* + * The only way that we are already on a dup page is + * if we just converted the on-page representation. + * In that case, we've only got one page of duplicates. + */ + if (hcp->dpagep == NULL && (ret = + __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0) + return (ret); + hcp->dndx = 0; + break; + case DB_KEYLAST: + if (hcp->dpagep == NULL && (ret = + __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0) + return (ret); + hcp->dpgno = PGNO(hcp->dpagep); + hcp->dndx = NUM_ENT(hcp->dpagep); + break; + case DB_CURRENT: + if ((ret = __db_ditem(hashp->dbp, hcp->dpagep, hcp->dndx, + BKEYDATA_SIZE(GET_BKEYDATA(hcp->dpagep, hcp->dndx)->len))) + != 0) + return (ret); + break; + case DB_BEFORE: /* The default behavior is correct. */ + break; + case DB_AFTER: + hcp->dndx++; + break; + } + + ret = __db_dput(hashp->dbp, + nval, &hcp->dpagep, &hcp->dndx, __ham_overflow_page); + hcp->pgno = PGNO(hcp->pagep); + __ham_c_update(hashp, hcp, hcp->pgno, nval->size, 1, 1); + return (ret); +} + +/* + * Convert an on-page set of duplicates to an offpage set of duplicates. + */ +static int +__ham_dup_convert(hashp, hcp) + HTAB *hashp; + HASH_CURSOR *hcp; +{ + BOVERFLOW bo; + DBT dbt; + HOFFPAGE ho; + db_indx_t dndx, len; + int ret; + u_int8_t *p, *pend; + + /* + * Create a new page for the duplicates. + */ + if ((ret = + __ham_overflow_page(hashp->dbp, P_DUPLICATE, &hcp->dpagep)) != 0) + return (ret); + hcp->dpagep->type = P_DUPLICATE; + hcp->dpgno = PGNO(hcp->dpagep); + + /* + * Now put the duplicates onto the new page. + */ + dbt.flags = 0; + switch (((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->type) { + case H_KEYDATA: + /* Simple case, one key on page; move it to dup page. */ + dndx = 0; + dbt.size = + LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx); + dbt.data = + ((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->data; + ret = __db_pitem(hashp->dbp, hcp->dpagep, + (u_int32_t)dndx, BKEYDATA_SIZE(dbt.size), NULL, &dbt); + if (ret == 0) + __ham_dirty_page(hashp, hcp->dpagep); + break; + case H_OFFPAGE: + /* Simple case, one key on page; move it to dup page. */ + dndx = 0; + memcpy(&ho, + P_ENTRY(hcp->pagep, H_DATAINDEX(hcp->bndx)), HOFFPAGE_SIZE); + bo.deleted = 0; + bo.type = ho.type; + bo.pgno = ho.pgno; + bo.tlen = ho.tlen; + dbt.size = BOVERFLOW_SIZE; + dbt.data = &bo; + + ret = __db_pitem(hashp->dbp, hcp->dpagep, + (u_int32_t)dndx, dbt.size, &dbt, NULL); + if (ret == 0) + __ham_dirty_page(hashp, hcp->dpagep); + break; + case H_DUPLICATE: + p = ((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->data; + pend = p + + LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx); + + for (dndx = 0; p < pend; dndx++) { + memcpy(&len, p, sizeof(db_indx_t)); + dbt.size = len; + p += sizeof(db_indx_t); + dbt.data = p; + p += len + sizeof(db_indx_t); + ret = __db_dput(hashp->dbp, &dbt, + &hcp->dpagep, &dndx, __ham_overflow_page); + if (ret != 0) + break; + } + break; + default: + ret = __db_pgfmt(hashp->dbp, (u_long)hcp->pgno); + } + if (ret == 0) { + /* + * Now attach this to the source page in place of + * the old duplicate item. + */ + __ham_move_offpage(hashp, hcp->pagep, + (u_int32_t)H_DATAINDEX(hcp->bndx), hcp->dpgno); + + /* Can probably just do a "put" here. */ + ret = __ham_dirty_page(hashp, hcp->pagep); + } else { + (void)__ham_del_page(hashp->dbp, hcp->dpagep); + hcp->dpagep = NULL; + } + return (ret); +} + +static int +__ham_make_dup(notdup, dup, bufp, sizep) + const DBT *notdup; + DBT *dup; + void **bufp; + u_int32_t *sizep; +{ + db_indx_t tsize, item_size; + int ret; + u_int8_t *p; + + item_size = (db_indx_t)notdup->size; + tsize = DUP_SIZE(item_size); + if ((ret = __ham_init_dbt(dup, tsize, bufp, sizep)) != 0) + return (ret); + + dup->dlen = 0; + dup->flags = notdup->flags; + F_SET(dup, DB_DBT_PARTIAL); + + p = dup->data; + memcpy(p, &item_size, sizeof(db_indx_t)); + p += sizeof(db_indx_t); + memcpy(p, notdup->data, notdup->size); + p += notdup->size; + memcpy(p, &item_size, sizeof(db_indx_t)); + + dup->doff = 0; + dup->dlen = notdup->size; + + return (0); +} + +static int +__ham_check_move(hashp, hcp, add_len) + HTAB *hashp; + HASH_CURSOR *hcp; + int32_t add_len; +{ + DBT k, d; + DB_LSN new_lsn; + HKEYDATA *hk; + PAGE *next_pagep; + db_pgno_t next_pgno; + int rectype, ret; + u_int32_t new_datalen, old_len; + + /* + * Check if we can do whatever we need to on this page. If not, + * then we'll have to move the current element to a new page. + */ + + hk = H_PAIRDATA(hcp->pagep, hcp->bndx); + + /* + * If the item is already off page duplicates or an offpage item, + * then we know we can do whatever we need to do in-place + */ + if (hk->type == H_OFFDUP || hk->type == H_OFFPAGE) + return (0); + + old_len = + LEN_HITEM(hcp->pagep, hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx)); + new_datalen = old_len - HKEYDATA_SIZE(0) + add_len; + + /* + * We need to add a new page under two conditions: + * 1. The addition makes the total data length cross the BIG + * threshold and the OFFDUP structure won't fit on this page. + * 2. The addition does not make the total data cross the + * threshold, but the new data won't fit on the page. + * If neither of these is true, then we can return. + */ + if (ISBIG(hashp, new_datalen) && (old_len > HOFFDUP_SIZE || + HOFFDUP_SIZE - old_len <= P_FREESPACE(hcp->pagep))) + return (0); + + if (!ISBIG(hashp, new_datalen) && + add_len <= (int32_t)P_FREESPACE(hcp->pagep)) + return (0); + + /* + * If we get here, then we need to move the item to a new page. + * Check if there are more pages in the chain. + */ + + new_datalen = ISBIG(hashp, new_datalen) ? + HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen); + + next_pagep = NULL; + for (next_pgno = NEXT_PGNO(hcp->pagep); next_pgno != PGNO_INVALID; + next_pgno = NEXT_PGNO(next_pagep)) { + if (next_pagep != NULL && + (ret = __ham_put_page(hashp->dbp, next_pagep, 0)) != 0) + return (ret); + + if ((ret = __ham_get_page(hashp->dbp, next_pgno, &next_pagep)) != 0) + return (ret); + + if (P_FREESPACE(next_pagep) >= new_datalen) + break; + } + + /* No more pages, add one. */ + if (next_pagep == NULL && + (ret = __ham_add_ovflpage(hashp, hcp->pagep, 0, &next_pagep)) != 0) + return (ret); + + /* Add new page at the end of the chain. */ + if (P_FREESPACE(next_pagep) < new_datalen && + (ret = __ham_add_ovflpage(hashp, next_pagep, 1, &next_pagep)) != 0) + return (ret); + + /* Copy the item to the new page. */ + if (DB_LOGGING(hashp->dbp)) { + rectype = PUTPAIR; + k.flags = 0; + d.flags = 0; + if (H_PAIRKEY(hcp->pagep, hcp->bndx)->type == H_OFFPAGE) { + rectype |= PAIR_KEYMASK; + k.data = H_PAIRKEY(hcp->pagep, hcp->bndx); + k.size = HOFFPAGE_SIZE; + } else { + k.data = H_PAIRKEY(hcp->pagep, hcp->bndx)->data; + k.size = LEN_HKEY(hcp->pagep, + hashp->hdr->pagesize, hcp->bndx); + } + + if (hk->type == H_OFFPAGE) { + rectype |= PAIR_DATAMASK; + d.data = H_PAIRDATA(hcp->pagep, hcp->bndx); + d.size = HOFFPAGE_SIZE; + } else { + d.data = H_PAIRDATA(hcp->pagep, hcp->bndx)->data; + d.size = LEN_HDATA(hcp->pagep, + hashp->hdr->pagesize, hcp->bndx); + } + + + if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype, + hashp->dbp->log_fileid, PGNO(next_pagep), + (u_int32_t)H_NUMPAIRS(next_pagep), &LSN(next_pagep), + &k, &d)) != 0) + return (ret); + + /* Move lsn onto page. */ + LSN(next_pagep) = new_lsn; /* Structure assignment. */ + } + + __ham_copy_item(hashp, hcp->pagep, H_KEYINDEX(hcp->bndx), next_pagep); + __ham_copy_item(hashp, hcp->pagep, H_DATAINDEX(hcp->bndx), next_pagep); + + /* Now delete the pair from the current page. */ + ret = __ham_del_pair(hashp, hcp); + + (void)__ham_put_page(hashp->dbp, hcp->pagep, 1); + hcp->pagep = next_pagep; + hcp->pgno = PGNO(hcp->pagep); + hcp->bndx = H_NUMPAIRS(hcp->pagep) - 1; + F_SET(hcp, H_EXPAND); + return (ret); +} + +/* + * Replace an onpage set of duplicates with the OFFDUP structure that + * references the duplicate page. + * XXX This is really just a special case of __onpage_replace; we should + * probably combine them. + * PUBLIC: void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t)); + */ +void +__ham_move_offpage(hashp, pagep, ndx, pgno) + HTAB *hashp; + PAGE *pagep; + u_int32_t ndx; + db_pgno_t pgno; +{ + DBT new_dbt; + DBT old_dbt; + HOFFDUP od; + db_indx_t i; + int32_t shrink; + u_int8_t *src; + + od.type = H_OFFDUP; + od.pgno = pgno; + + if (DB_LOGGING(hashp->dbp)) { + new_dbt.data = &od; + new_dbt.size = HOFFDUP_SIZE; + old_dbt.data = P_ENTRY(pagep, ndx); + old_dbt.size = LEN_HITEM(pagep, hashp->hdr->pagesize, ndx); + (void)__ham_replace_log(hashp->dbp->dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &LSN(pagep), 0, + hashp->dbp->log_fileid, PGNO(pagep), (u_int32_t)ndx, + &LSN(pagep), -1, &old_dbt, &new_dbt, 0); + } + + shrink = + LEN_HITEM(pagep, hashp->hdr->pagesize, ndx) - HOFFDUP_SIZE; + + if (shrink != 0) { + /* Copy data. */ + src = (u_int8_t *)(pagep) + HOFFSET(pagep); + memmove(src + shrink, src, pagep->inp[ndx] - HOFFSET(pagep)); + HOFFSET(pagep) += shrink; + + /* Update index table. */ + for (i = ndx; i < NUM_ENT(pagep); i++) + pagep->inp[i] += shrink; + } + + /* Now copy the offdup entry onto the page. */ + memcpy(P_ENTRY(pagep, ndx), &od, HOFFDUP_SIZE); +} |