diff options
Diffstat (limited to 'db2/include/btree.h')
-rw-r--r-- | db2/include/btree.h | 233 |
1 files changed, 76 insertions, 157 deletions
diff --git a/db2/include/btree.h b/db2/include/btree.h index 1660d331e7..b0c04b1508 100644 --- a/db2/include/btree.h +++ b/db2/include/btree.h @@ -43,38 +43,19 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)btree.h 10.21 (Sleepycat) 5/23/98 + * @(#)btree.h 10.26 (Sleepycat) 12/16/98 */ /* Forward structure declarations. */ struct __btree; typedef struct __btree BTREE; struct __cursor; typedef struct __cursor CURSOR; struct __epg; typedef struct __epg EPG; -struct __rcursor; typedef struct __rcursor RCURSOR; struct __recno; typedef struct __recno RECNO; -#undef DEFMINKEYPAGE /* Minimum keys per page */ #define DEFMINKEYPAGE (2) -#undef ISINTERNAL /* If an internal page. */ -#define ISINTERNAL(p) (TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO) -#undef ISLEAF /* If a leaf page. */ -#define ISLEAF(p) (TYPE(p) == P_LBTREE || TYPE(p) == P_LRECNO) - -/* Allocate and discard thread structures. */ -#define GETHANDLE(dbp, set_txn, dbpp, ret) { \ - if (F_ISSET(dbp, DB_AM_THREAD)) { \ - if ((ret = __db_gethandle(dbp, __bam_bdup, dbpp)) != 0) \ - return (ret); \ - } else \ - *dbpp = dbp; \ - *dbpp->txn = set_txn; \ -} -#define PUTHANDLE(dbp) { \ - dbp->txn = NULL; \ - if (F_ISSET(dbp, DB_AM_THREAD)) \ - __db_puthandle(dbp); \ -} +#define ISINTERNAL(p) (TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO) +#define ISLEAF(p) (TYPE(p) == P_LBTREE || TYPE(p) == P_LRECNO) /* * If doing transactions we have to hold the locks associated with a data item @@ -82,15 +63,15 @@ struct __recno; typedef struct __recno RECNO; * locks associated with walking the tree. Distinguish between the two so that * we don't tie up the internal pages of the tree longer than necessary. */ -#define __BT_LPUT(dbp, lock) \ - (F_ISSET((dbp), DB_AM_LOCKING) ? \ - lock_put((dbp)->dbenv->lk_info, lock) : 0) -#define __BT_TLPUT(dbp, lock) \ - (F_ISSET((dbp), DB_AM_LOCKING) && (dbp)->txn == NULL ? \ - lock_put((dbp)->dbenv->lk_info, lock) : 0) +#define __BT_LPUT(dbc, lock) \ + (F_ISSET((dbc)->dbp, DB_AM_LOCKING) ? \ + lock_put((dbc)->dbp->dbenv->lk_info, lock) : 0) +#define __BT_TLPUT(dbc, lock) \ + (F_ISSET((dbc)->dbp, DB_AM_LOCKING) && (dbc)->txn == NULL ? \ + lock_put((dbc)->dbp->dbenv->lk_info, lock) : 0) /* - * Flags to __bt_search() and __rec_search(). + * Flags to __bam_search() and __bam_rsearch(). * * Note, internal page searches must find the largest record less than key in * the tree so that descents work. Leaf page searches must find the smallest @@ -113,22 +94,19 @@ struct __recno; typedef struct __recno RECNO; #define S_EXACT 0x00400 /* Exact items only. */ #define S_PARENT 0x00800 /* Lock page pair. */ #define S_STACK 0x01000 /* Need a complete stack. */ +#define S_PAST_EOF 0x02000 /* If doing insert search (or keyfirst + * or keylast operations), or a split + * on behalf of an insert, it's okay to + * return an entry one past end-of-page. + */ #define S_DELETE (S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT | S_STACK) #define S_FIND (S_READ | S_DUPFIRST | S_DELNO) -#define S_INSERT (S_WRITE | S_DUPLAST | S_STACK) -#define S_KEYFIRST (S_WRITE | S_DUPFIRST | S_STACK) -#define S_KEYLAST (S_WRITE | S_DUPLAST | S_STACK) -#define S_WRPAIR (S_WRITE | S_DUPLAST | S_PARENT) - -/* - * If doing insert search (including keyfirst or keylast operations) or a - * split search on behalf of an insert, it's okay to return the entry one - * past the end of the page. - */ -#define PAST_END_OK(f) \ - ((f) == S_INSERT || \ - (f) == S_KEYFIRST || (f) == S_KEYLAST || (f) == S_WRPAIR) +#define S_FIND_WR (S_WRITE | S_DUPFIRST | S_DELNO) +#define S_INSERT (S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK) +#define S_KEYFIRST (S_WRITE | S_DUPFIRST | S_PAST_EOF | S_STACK) +#define S_KEYLAST (S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK) +#define S_WRPAIR (S_WRITE | S_DUPLAST | S_PAST_EOF | S_PARENT) /* * Flags to __bam_iitem(). @@ -149,23 +127,32 @@ struct __epg { }; /* - * All cursors are queued from the master DB structure. Convert the user's - * DB reference to the master DB reference. We lock the master DB mutex - * so that we can walk the cursor queue. There's no race in accessing the - * cursors, because if we're modifying a page, we have a write lock on it, - * and therefore no other thread than the current one can have a cursor that - * references the page. + * We maintain a stack of the pages that we're locking in the tree. Btree's + * (currently) only save two levels of the tree at a time, so the default + * stack is always large enough. Recno trees have to lock the entire tree to + * do inserts/deletes, however. Grow the stack as necessary. */ -#define CURSOR_SETUP(dbp) { \ - (dbp) = (dbp)->master; \ - DB_THREAD_LOCK(dbp); \ -} -#define CURSOR_TEARDOWN(dbp) \ - DB_THREAD_UNLOCK(dbp); +#define BT_STK_CLR(c) \ + ((c)->csp = (c)->sp) + +#define BT_STK_ENTER(c, pagep, page_indx, lock, ret) do { \ + if ((ret = \ + (c)->csp == (c)->esp ? __bam_stkgrow(c) : 0) == 0) { \ + (c)->csp->page = pagep; \ + (c)->csp->indx = page_indx; \ + (c)->csp->lock = lock; \ + } \ +} while (0) + +#define BT_STK_PUSH(c, pagep, page_indx, lock, ret) do { \ + BT_STK_ENTER(c, pagep, page_indx, lock, ret); \ + ++(c)->csp; \ +} while (0) + +#define BT_STK_POP(c) \ + ((c)->csp == (c)->stack ? NULL : --(c)->csp) /* - * Btree cursor. - * * Arguments passed to __bam_ca_replace(). */ typedef enum { @@ -173,9 +160,27 @@ typedef enum { REPLACE_SUCCESS, REPLACE_FAILED } ca_replace_arg; + +/* Arguments passed to __ram_ca(). */ +typedef enum { + CA_DELETE, + CA_IAFTER, + CA_IBEFORE +} ca_recno_arg; + +#define RECNO_OOB 0 /* Illegal record number. */ + +/* Btree/Recno cursor. */ struct __cursor { DBC *dbc; /* Enclosing DBC. */ + /* Per-thread information: shared by btree/recno. */ + EPG *sp; /* Stack pointer. */ + EPG *csp; /* Current stack entry. */ + EPG *esp; /* End stack pointer. */ + EPG stack[5]; + + /* Per-thread information: btree private. */ PAGE *page; /* Cursor page. */ db_pgno_t pgno; /* Page. */ @@ -187,90 +192,25 @@ struct __cursor { DB_LOCK lock; /* Cursor read lock. */ db_lockmode_t mode; /* Lock mode. */ - /* - * If a cursor record is deleted, the key/data pair has to remain on - * the page so that subsequent inserts/deletes don't interrupt the - * cursor progression through the file. This results in interesting - * cases when "standard" operations, e.g., dbp->put() are done in the - * context of "deleted" cursors. - * - * C_DELETED -- The item referenced by the cursor has been "deleted" - * but not physically removed from the page. - * C_REPLACE -- The "deleted" item referenced by a cursor has been - * replaced by a dbp->put(), so the cursor is no longer - * responsible for physical removal from the page. - * C_REPLACE_SETUP -- - * We are about to overwrite a "deleted" item, flag any - * cursors referencing it for transition to C_REPLACE - * state. - */ -#define C_DELETED 0x0001 -#define C_REPLACE 0x0002 -#define C_REPLACE_SETUP 0x0004 - - /* - * Internal cursor held for DB->get; don't hold locks unless involved - * in a TXN. - */ -#define C_INTERNAL 0x0008 - u_int32_t flags; -}; - -/* - * Recno cursor. - * - * Arguments passed to __ram_ca(). - */ -typedef enum { - CA_DELETE, - CA_IAFTER, - CA_IBEFORE -} ca_recno_arg; -struct __rcursor { - DBC *dbc; /* Enclosing DBC. */ - + /* Per-thread information: recno private. */ db_recno_t recno; /* Current record number. */ /* - * Cursors referencing "deleted" records are positioned between - * two records, and so must be specially adjusted until they are - * moved. + * Btree: + * We set a flag in the cursor structure if the underlying object has + * been deleted. It's not strictly necessary, we could get the same + * information by looking at the page itself. + * + * Recno: + * When renumbering recno databases during deletes, cursors referencing + * "deleted" records end up positioned between two records, and so must + * be specially adjusted on the next operation. */ -#define CR_DELETED 0x0001 /* Record deleted. */ +#define C_DELETED 0x0001 /* Record was deleted. */ u_int32_t flags; }; /* - * We maintain a stack of the pages that we're locking in the tree. Btree's - * (currently) only save two levels of the tree at a time, so the default - * stack is always large enough. Recno trees have to lock the entire tree to - * do inserts/deletes, however. Grow the stack as necessary. - */ -#undef BT_STK_CLR -#define BT_STK_CLR(t) \ - ((t)->bt_csp = (t)->bt_sp) - -#undef BT_STK_ENTER -#define BT_STK_ENTER(t, pagep, page_indx, lock, ret) do { \ - if ((ret = \ - (t)->bt_csp == (t)->bt_esp ? __bam_stkgrow(t) : 0) == 0) { \ - (t)->bt_csp->page = pagep; \ - (t)->bt_csp->indx = page_indx; \ - (t)->bt_csp->lock = lock; \ - } \ -} while (0) - -#undef BT_STK_PUSH -#define BT_STK_PUSH(t, pagep, page_indx, lock, ret) do { \ - BT_STK_ENTER(t, pagep, page_indx, lock, ret); \ - ++(t)->bt_csp; \ -} while (0) - -#undef BT_STK_POP -#define BT_STK_POP(t) \ - ((t)->bt_csp == (t)->bt_stack ? NULL : --(t)->bt_csp) - -/* * The in-memory recno data structure. * * !!! @@ -278,9 +218,6 @@ struct __rcursor { * are no transaction semantics associated with backing files, nor is there * any thread protection. */ -#undef RECNO_OOB -#define RECNO_OOB 0 /* Illegal record number. */ - struct __recno { int re_delim; /* Variable-length delimiting byte. */ int re_pad; /* Fixed-length padding byte. */ @@ -294,7 +231,7 @@ struct __recno { void *re_emap; /* End of mapped space. */ size_t re_msize; /* Size of mapped region. */ /* Recno input function. */ - int (*re_irec) __P((DB *, db_recno_t)); + int (*re_irec) __P((DBC *, db_recno_t)); #define RECNO_EOF 0x0001 /* EOF on backing source file. */ #define RECNO_MODIFIED 0x0002 /* Tree was modified. */ @@ -302,31 +239,11 @@ struct __recno { }; /* - * The in-memory btree data structure. + * The in-memory, per-tree btree data structure. */ struct __btree { -/* - * These fields are per-thread and are initialized when the BTREE structure - * is created. - */ db_pgno_t bt_lpgno; /* Last insert location. */ - DBT bt_rkey; /* Returned key. */ - DBT bt_rdata; /* Returned data. */ - - EPG *bt_sp; /* Stack pointer. */ - EPG *bt_csp; /* Current stack entry. */ - EPG *bt_esp; /* End stack pointer. */ - EPG bt_stack[5]; - - RECNO *bt_recno; /* Private recno structure. */ - - DB_BTREE_LSTAT lstat; /* Btree local statistics. */ - -/* - * These fields are copied from the original BTREE structure and never - * change. - */ db_indx_t bt_maxkey; /* Maximum keys per page. */ db_indx_t bt_minkey; /* Minimum keys per page. */ @@ -336,6 +253,8 @@ struct __btree { __P((const DBT *, const DBT *)); db_indx_t bt_ovflsize; /* Maximum key/data on-page size. */ + + RECNO *recno; /* Private recno structure. */ }; #include "btree_auto.h" |