diff options
Diffstat (limited to 'db2/btree/bt_rsearch.c')
-rw-r--r-- | db2/btree/bt_rsearch.c | 164 |
1 files changed, 86 insertions, 78 deletions
diff --git a/db2/btree/bt_rsearch.c b/db2/btree/bt_rsearch.c index caa6b3515e..8efe4059a8 100644 --- a/db2/btree/bt_rsearch.c +++ b/db2/btree/bt_rsearch.c @@ -44,7 +44,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_rsearch.c 10.15 (Sleepycat) 5/6/98"; +static const char sccsid[] = "@(#)bt_rsearch.c 10.21 (Sleepycat) 12/2/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -59,39 +59,37 @@ static const char sccsid[] = "@(#)bt_rsearch.c 10.15 (Sleepycat) 5/6/98"; * __bam_rsearch -- * Search a btree for a record number. * - * PUBLIC: int __bam_rsearch __P((DB *, db_recno_t *, u_int32_t, int, int *)); + * PUBLIC: int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *)); */ int -__bam_rsearch(dbp, recnop, flags, stop, exactp) - DB *dbp; +__bam_rsearch(dbc, recnop, flags, stop, exactp) + DBC *dbc; db_recno_t *recnop; u_int32_t flags; int stop, *exactp; { BINTERNAL *bi; - BTREE *t; + CURSOR *cp; + DB *dbp; DB_LOCK lock; PAGE *h; RINTERNAL *ri; db_indx_t indx, top; db_pgno_t pg; db_recno_t i, recno, total; - int isappend, ret, stack; + int ret, stack; - t = dbp->internal; + dbp = dbc->dbp; + cp = dbc->internal; - /* - * We test for groups of flags, S_APPEND is the only one that can be - * OR'd into the set. Clear it now so that the tests for equality - * will work. - */ - if ((isappend = LF_ISSET(S_APPEND)) != 0) - LF_CLR(S_APPEND); + BT_STK_CLR(cp); /* * There are several ways we search a btree tree. The flags argument * specifies if we're acquiring read or write locks and if we are - * locking pairs of pages. See btree.h for more details. + * locking pairs of pages. In addition, if we're adding or deleting + * an item, we have to lock the entire tree, regardless. See btree.h + * for more details. * * If write-locking pages, we need to know whether or not to acquire a * write lock on a page before getting it. This depends on how deep it @@ -102,15 +100,36 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp) * Retrieve the root page. */ pg = PGNO_ROOT; - if ((ret = __bam_lget(dbp, 0, PGNO_ROOT, - flags == S_INSERT || flags == S_DELETE ? - DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0) + stack = LF_ISSET(S_STACK); + if ((ret = __bam_lget(dbc, + 0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0) return (ret); - if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) { - (void)__BT_LPUT(dbp, lock); + if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) { + (void)__BT_LPUT(dbc, lock); return (ret); } - total = RE_NREC(h); + + /* + * Decide if we need to save this page; if we do, write lock it. + * We deliberately don't lock-couple on this call. If the tree + * is tiny, i.e., one page, and two threads are busily updating + * the root page, we're almost guaranteed deadlocks galore, as + * each one gets a read lock and then blocks the other's attempt + * for a write lock. + */ + if (!stack && + ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) || + (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) { + (void)memp_fput(dbp->mpf, h, 0); + (void)__BT_LPUT(dbc, lock); + if ((ret = __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0) + return (ret); + if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) { + (void)__BT_LPUT(dbc, lock); + return (ret); + } + stack = 1; + } /* * If appending to the tree, set the record number now -- we have the @@ -124,7 +143,8 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp) * for the record immediately after the last record in the tree, so do * a fast check now. */ - if (isappend) { + total = RE_NREC(h); + if (LF_ISSET(S_APPEND)) { *exactp = 0; *recnop = recno = total + 1; } else { @@ -133,33 +153,14 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp) *exactp = 1; else { *exactp = 0; - if (!PAST_END_OK(flags) || recno > total + 1) { + if (!LF_ISSET(S_PAST_EOF) || recno > total + 1) { (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_LPUT(dbp, lock); + (void)__BT_LPUT(dbc, lock); return (DB_NOTFOUND); } } } - /* Decide if we're building a stack based on the operation. */ - BT_STK_CLR(t); - stack = flags == S_DELETE || flags == S_INSERT; - - /* - * Decide if we need to save this page; if we do, write lock it, and - * start to build a stack. - */ - if (LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) { - (void)memp_fput(dbp->mpf, h, 0); - if ((ret = __bam_lget(dbp, 1, pg, DB_LOCK_WRITE, &lock)) != 0) - return (ret); - if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) { - (void)__BT_LPUT(dbp, lock); - return (ret); - } - stack = 1; - } - /* * !!! * Record numbers in the tree are 0-based, but the recno is @@ -177,7 +178,7 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp) * not exist if there are enough deleted records in the * page. */ - if (recno <= NUM_ENT(h)) + if (recno <= (db_recno_t)NUM_ENT(h) / P_INDX) for (i = recno - 1;; --i) { if (B_DISSET(GET_BKEYDATA(h, i * P_INDX + O_INDX)->type)) @@ -185,10 +186,10 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp) if (i == 0) break; } - if (recno > NUM_ENT(h)) { + if (recno > (db_recno_t)NUM_ENT(h) / P_INDX) { *exactp = 0; - if (!PAST_END_OK(flags) || - recno > (db_recno_t)(NUM_ENT(h) + 1)) { + if (!LF_ISSET(S_PAST_EOF) || recno > + (db_recno_t)(NUM_ENT(h) / P_INDX + 1)) { ret = DB_NOTFOUND; goto err; } @@ -197,7 +198,7 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp) /* Correct from 1-based to 0-based for a page offset. */ --recno; - BT_STK_ENTER(t, h, recno * P_INDX, lock, ret); + BT_STK_ENTER(cp, h, recno * P_INDX, lock, ret); return (ret); case P_IBTREE: for (indx = 0, top = NUM_ENT(h);;) { @@ -213,7 +214,7 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp) /* Correct from 1-based to 0-based for a page offset. */ --recno; - BT_STK_ENTER(t, h, recno, lock, ret); + BT_STK_ENTER(cp, h, recno, lock, ret); return (ret); case P_IRECNO: for (indx = 0, top = NUM_ENT(h);;) { @@ -232,42 +233,42 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp) if (stack) { /* Return if this is the lowest page wanted. */ if (LF_ISSET(S_PARENT) && stop == h->level) { - BT_STK_ENTER(t, h, indx, lock, ret); + BT_STK_ENTER(cp, h, indx, lock, ret); return (ret); } - BT_STK_PUSH(t, h, indx, lock, ret); - if (ret) + BT_STK_PUSH(cp, h, indx, lock, ret); + if (ret != 0) goto err; - if ((ret = __bam_lget(dbp, 0, pg, - LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ, - &lock)) != 0) + if ((ret = + __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0) goto err; } else { - (void)memp_fput(dbp->mpf, h, 0); - /* * Decide if we want to return a pointer to the next * page in the stack. If we do, write lock it and * never unlock it. */ - if (LF_ISSET(S_PARENT) && - (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) + if ((LF_ISSET(S_PARENT) && + (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) || + (h->level - 1) == LEAFLEVEL) stack = 1; - if ((ret = __bam_lget(dbp, 1, pg, - LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ, - &lock)) != 0) + (void)memp_fput(dbp->mpf, h, 0); + + if ((ret = + __bam_lget(dbc, 1, pg, stack && LF_ISSET(S_WRITE) ? + DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0) goto err; } - if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) + if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) goto err; } /* NOTREACHED */ -err: BT_STK_POP(t); - __bam_stkrel(dbp); +err: BT_STK_POP(cp); + __bam_stkrel(dbc, 0); return (ret); } @@ -275,25 +276,29 @@ err: BT_STK_POP(t); * __bam_adjust -- * Adjust the tree after adding or deleting a record. * - * PUBLIC: int __bam_adjust __P((DB *, BTREE *, int32_t)); + * PUBLIC: int __bam_adjust __P((DBC *, int32_t)); */ int -__bam_adjust(dbp, t, adjust) - DB *dbp; - BTREE *t; +__bam_adjust(dbc, adjust) + DBC *dbc; int32_t adjust; { + CURSOR *cp; + DB *dbp; EPG *epg; PAGE *h; int ret; + dbp = dbc->dbp; + cp = dbc->internal; + /* Update the record counts for the tree. */ - for (epg = t->bt_sp; epg <= t->bt_csp; ++epg) { + for (epg = cp->sp; epg <= cp->csp; ++epg) { h = epg->page; if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) { - if (DB_LOGGING(dbp) && + if (DB_LOGGING(dbc) && (ret = __bam_cadjust_log(dbp->dbenv->lg_info, - dbp->txn, &LSN(h), 0, dbp->log_fileid, + dbc->txn, &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h), (u_int32_t)epg->indx, adjust, 1)) != 0) return (ret); @@ -317,28 +322,31 @@ __bam_adjust(dbp, t, adjust) * __bam_nrecs -- * Return the number of records in the tree. * - * PUBLIC: int __bam_nrecs __P((DB *, db_recno_t *)); + * PUBLIC: int __bam_nrecs __P((DBC *, db_recno_t *)); */ int -__bam_nrecs(dbp, rep) - DB *dbp; +__bam_nrecs(dbc, rep) + DBC *dbc; db_recno_t *rep; { + DB *dbp; DB_LOCK lock; PAGE *h; db_pgno_t pgno; int ret; + dbp = dbc->dbp; + pgno = PGNO_ROOT; - if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_READ, &lock)) != 0) + if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &lock)) != 0) return (ret); - if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0) + if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) return (ret); *rep = RE_NREC(h); (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_TLPUT(dbp, lock); + (void)__BT_TLPUT(dbc, lock); return (0); } |