about summary refs log tree commit diff
path: root/db2/include/btree.h
diff options
context:
space:
mode:
Diffstat (limited to 'db2/include/btree.h')
-rw-r--r--db2/include/btree.h233
1 files changed, 76 insertions, 157 deletions
diff --git a/db2/include/btree.h b/db2/include/btree.h
index 1660d331e7..b0c04b1508 100644
--- a/db2/include/btree.h
+++ b/db2/include/btree.h
@@ -43,38 +43,19 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)btree.h	10.21 (Sleepycat) 5/23/98
+ *	@(#)btree.h	10.26 (Sleepycat) 12/16/98
  */
 
 /* Forward structure declarations. */
 struct __btree;		typedef struct __btree BTREE;
 struct __cursor;	typedef struct __cursor CURSOR;
 struct __epg;		typedef struct __epg EPG;
-struct __rcursor;	typedef struct __rcursor RCURSOR;
 struct __recno;		typedef struct __recno RECNO;
 
-#undef	DEFMINKEYPAGE			/* Minimum keys per page */
 #define	DEFMINKEYPAGE	 (2)
 
-#undef	ISINTERNAL			/* If an internal page. */
-#define	ISINTERNAL(p)	(TYPE(p) == P_IBTREE || TYPE(p) ==  P_IRECNO)
-#undef	ISLEAF				/* If a leaf page. */
-#define	ISLEAF(p)	(TYPE(p) == P_LBTREE || TYPE(p) ==  P_LRECNO)
-
-/* Allocate and discard thread structures. */
-#define	GETHANDLE(dbp, set_txn, dbpp, ret) {				\
-	if (F_ISSET(dbp, DB_AM_THREAD)) {				\
-		if ((ret = __db_gethandle(dbp, __bam_bdup, dbpp)) != 0)	\
-			return (ret);					\
-	} else								\
-		*dbpp = dbp;						\
-	*dbpp->txn = set_txn;						\
-}
-#define	PUTHANDLE(dbp) {						\
-	dbp->txn = NULL;						\
-	if (F_ISSET(dbp, DB_AM_THREAD))					\
-		__db_puthandle(dbp);					\
-}
+#define	ISINTERNAL(p)	(TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO)
+#define	ISLEAF(p)	(TYPE(p) == P_LBTREE || TYPE(p) == P_LRECNO)
 
 /*
  * If doing transactions we have to hold the locks associated with a data item
@@ -82,15 +63,15 @@ struct __recno;		typedef struct __recno RECNO;
  * locks associated with walking the tree.  Distinguish between the two so that
  * we don't tie up the internal pages of the tree longer than necessary.
  */
-#define	__BT_LPUT(dbp, lock)						\
-	(F_ISSET((dbp), DB_AM_LOCKING) ?				\
-	    lock_put((dbp)->dbenv->lk_info, lock) : 0)
-#define	__BT_TLPUT(dbp, lock)						\
-	(F_ISSET((dbp), DB_AM_LOCKING) && (dbp)->txn == NULL ?		\
-	    lock_put((dbp)->dbenv->lk_info, lock) : 0)
+#define	__BT_LPUT(dbc, lock)						\
+	(F_ISSET((dbc)->dbp, DB_AM_LOCKING) ?				\
+	    lock_put((dbc)->dbp->dbenv->lk_info, lock) : 0)
+#define	__BT_TLPUT(dbc, lock)						\
+	(F_ISSET((dbc)->dbp, DB_AM_LOCKING) && (dbc)->txn == NULL ?	\
+	    lock_put((dbc)->dbp->dbenv->lk_info, lock) : 0)
 
 /*
- * Flags to __bt_search() and __rec_search().
+ * Flags to __bam_search() and __bam_rsearch().
  *
  * Note, internal page searches must find the largest record less than key in
  * the tree so that descents work.  Leaf page searches must find the smallest
@@ -113,22 +94,19 @@ struct __recno;		typedef struct __recno RECNO;
 #define	S_EXACT		0x00400		/* Exact items only. */
 #define	S_PARENT	0x00800		/* Lock page pair. */
 #define	S_STACK		0x01000		/* Need a complete stack. */
+#define	S_PAST_EOF	0x02000		/* If doing insert search (or keyfirst
+					 * or keylast operations), or a split
+					 * on behalf of an insert, it's okay to
+					 * return an entry one past end-of-page.
+					 */
 
 #define	S_DELETE	(S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT | S_STACK)
 #define	S_FIND		(S_READ | S_DUPFIRST | S_DELNO)
-#define	S_INSERT	(S_WRITE | S_DUPLAST | S_STACK)
-#define	S_KEYFIRST	(S_WRITE | S_DUPFIRST | S_STACK)
-#define	S_KEYLAST	(S_WRITE | S_DUPLAST | S_STACK)
-#define	S_WRPAIR	(S_WRITE | S_DUPLAST | S_PARENT)
-
-/*
- * If doing insert search (including keyfirst or keylast operations) or a
- * split search on behalf of an insert, it's okay to return the entry one
- * past the end of the page.
- */
-#define	PAST_END_OK(f)							\
-	((f) == S_INSERT ||						\
-	(f) == S_KEYFIRST || (f) == S_KEYLAST || (f) == S_WRPAIR)
+#define	S_FIND_WR	(S_WRITE | S_DUPFIRST | S_DELNO)
+#define	S_INSERT	(S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK)
+#define	S_KEYFIRST	(S_WRITE | S_DUPFIRST | S_PAST_EOF | S_STACK)
+#define	S_KEYLAST	(S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK)
+#define	S_WRPAIR	(S_WRITE | S_DUPLAST | S_PAST_EOF | S_PARENT)
 
 /*
  * Flags to __bam_iitem().
@@ -149,23 +127,32 @@ struct __epg {
 };
 
 /*
- * All cursors are queued from the master DB structure.  Convert the user's
- * DB reference to the master DB reference.  We lock the master DB mutex
- * so that we can walk the cursor queue.  There's no race in accessing the
- * cursors, because if we're modifying a page, we have a write lock on it,
- * and therefore no other thread than the current one can have a cursor that
- * references the page.
+ * We maintain a stack of the pages that we're locking in the tree.  Btree's
+ * (currently) only save two levels of the tree at a time, so the default
+ * stack is always large enough.  Recno trees have to lock the entire tree to
+ * do inserts/deletes, however.  Grow the stack as necessary.
  */
-#define	CURSOR_SETUP(dbp) {						\
-	(dbp) = (dbp)->master;						\
-	DB_THREAD_LOCK(dbp);						\
-}
-#define	CURSOR_TEARDOWN(dbp)						\
-	DB_THREAD_UNLOCK(dbp);
+#define	BT_STK_CLR(c)							\
+	((c)->csp = (c)->sp)
+
+#define	BT_STK_ENTER(c, pagep, page_indx, lock, ret) do {		\
+	if ((ret =							\
+	    (c)->csp == (c)->esp ? __bam_stkgrow(c) : 0) == 0) {	\
+		(c)->csp->page = pagep;					\
+		(c)->csp->indx = page_indx;				\
+		(c)->csp->lock = lock;					\
+	}								\
+} while (0)
+
+#define	BT_STK_PUSH(c, pagep, page_indx, lock, ret) do {		\
+	BT_STK_ENTER(c, pagep, page_indx, lock, ret);			\
+	++(c)->csp;							\
+} while (0)
+
+#define	BT_STK_POP(c)							\
+	((c)->csp == (c)->stack ? NULL : --(c)->csp)
 
 /*
- * Btree cursor.
- *
  * Arguments passed to __bam_ca_replace().
  */
 typedef enum {
@@ -173,9 +160,27 @@ typedef enum {
 	REPLACE_SUCCESS,
 	REPLACE_FAILED
 } ca_replace_arg;
+
+/* Arguments passed to __ram_ca(). */
+typedef enum {
+	CA_DELETE,
+	CA_IAFTER,
+	CA_IBEFORE
+} ca_recno_arg;
+
+#define	RECNO_OOB	0		/* Illegal record number. */
+
+/* Btree/Recno cursor. */
 struct __cursor {
 	DBC		*dbc;		/* Enclosing DBC. */
 
+	/* Per-thread information: shared by btree/recno. */
+	EPG		*sp;		/* Stack pointer. */
+	EPG	 	*csp;		/* Current stack entry. */
+	EPG		*esp;		/* End stack pointer. */
+	EPG		 stack[5];
+
+	/* Per-thread information: btree private. */
 	PAGE		*page;		/* Cursor page. */
 
 	db_pgno_t	 pgno;		/* Page. */
@@ -187,90 +192,25 @@ struct __cursor {
 	DB_LOCK		 lock;		/* Cursor read lock. */
 	db_lockmode_t	 mode;		/* Lock mode. */
 
-	/*
-	 * If a cursor record is deleted, the key/data pair has to remain on
-	 * the page so that subsequent inserts/deletes don't interrupt the
-	 * cursor progression through the file.  This results in interesting
-	 * cases when "standard" operations, e.g., dbp->put() are done in the
-	 * context of "deleted" cursors.
-	 *
-	 * C_DELETED -- The item referenced by the cursor has been "deleted"
-	 *		but not physically removed from the page.
-	 * C_REPLACE -- The "deleted" item referenced by a cursor has been
-	 *		replaced by a dbp->put(), so the cursor is no longer
-	 *		responsible for physical removal from the page.
-	 * C_REPLACE_SETUP --
-	 *		We are about to overwrite a "deleted" item, flag any
-	 *		cursors referencing it for transition to C_REPLACE
-	 *		state.
-	 */
-#define	C_DELETED	0x0001
-#define	C_REPLACE	0x0002
-#define	C_REPLACE_SETUP	0x0004
-
-	/*
-	 * Internal cursor held for DB->get; don't hold locks unless involved
-	 * in a TXN.
-	 */
-#define	C_INTERNAL	0x0008
-	u_int32_t	 flags;
-};
-
-/*
- * Recno cursor.
- *
- * Arguments passed to __ram_ca().
- */
-typedef enum {
-	CA_DELETE,
-	CA_IAFTER,
-	CA_IBEFORE
-} ca_recno_arg;
-struct __rcursor {
-	DBC		*dbc;		/* Enclosing DBC. */
-
+	/* Per-thread information: recno private. */
 	db_recno_t	 recno;		/* Current record number. */
 
 	/*
-	 * Cursors referencing "deleted" records are positioned between
-	 * two records, and so must be specially adjusted until they are
-	 * moved.
+	 * Btree:
+	 * We set a flag in the cursor structure if the underlying object has
+	 * been deleted.  It's not strictly necessary, we could get the same
+	 * information by looking at the page itself.
+	 *
+	 * Recno:
+	 * When renumbering recno databases during deletes, cursors referencing
+	 * "deleted" records end up positioned between two records, and so must
+	 * be specially adjusted on the next operation.
 	 */
-#define	CR_DELETED	0x0001		/* Record deleted. */
+#define	C_DELETED	0x0001		/* Record was deleted. */
 	u_int32_t	 flags;
 };
 
 /*
- * We maintain a stack of the pages that we're locking in the tree.  Btree's
- * (currently) only save two levels of the tree at a time, so the default
- * stack is always large enough.  Recno trees have to lock the entire tree to
- * do inserts/deletes, however.  Grow the stack as necessary.
- */
-#undef	BT_STK_CLR
-#define	BT_STK_CLR(t)							\
-	((t)->bt_csp = (t)->bt_sp)
-
-#undef	BT_STK_ENTER
-#define	BT_STK_ENTER(t, pagep, page_indx, lock, ret) do {		\
-	if ((ret =							\
-	    (t)->bt_csp == (t)->bt_esp ? __bam_stkgrow(t) : 0) == 0) {	\
-		(t)->bt_csp->page = pagep;				\
-		(t)->bt_csp->indx = page_indx;				\
-		(t)->bt_csp->lock = lock;				\
-	}								\
-} while (0)
-
-#undef	BT_STK_PUSH
-#define	BT_STK_PUSH(t, pagep, page_indx, lock, ret) do {		\
-	BT_STK_ENTER(t, pagep, page_indx, lock, ret);			\
-	++(t)->bt_csp;							\
-} while (0)
-
-#undef	BT_STK_POP
-#define	BT_STK_POP(t)							\
-	((t)->bt_csp == (t)->bt_stack ? NULL : --(t)->bt_csp)
-
-/*
  * The in-memory recno data structure.
  *
  * !!!
@@ -278,9 +218,6 @@ struct __rcursor {
  * are no transaction semantics associated with backing files, nor is there
  * any thread protection.
  */
-#undef	RECNO_OOB
-#define	RECNO_OOB	0		/* Illegal record number. */
-
 struct __recno {
 	int		 re_delim;	/* Variable-length delimiting byte. */
 	int		 re_pad;	/* Fixed-length padding byte. */
@@ -294,7 +231,7 @@ struct __recno {
 	void		*re_emap;	/* End of mapped space. */
 	size_t		 re_msize;	/* Size of mapped region. */
 					/* Recno input function. */
-	int (*re_irec) __P((DB *, db_recno_t));
+	int (*re_irec) __P((DBC *, db_recno_t));
 
 #define	RECNO_EOF	0x0001		/* EOF on backing source file. */
 #define	RECNO_MODIFIED	0x0002		/* Tree was modified. */
@@ -302,31 +239,11 @@ struct __recno {
 };
 
 /*
- * The in-memory btree data structure.
+ * The in-memory, per-tree btree data structure.
  */
 struct __btree {
-/*
- * These fields are per-thread and are initialized when the BTREE structure
- * is created.
- */
 	db_pgno_t	 bt_lpgno;	/* Last insert location. */
 
-	DBT		 bt_rkey;	/* Returned key. */
-	DBT		 bt_rdata;	/* Returned data. */
-
-	EPG		*bt_sp;		/* Stack pointer. */
-	EPG	 	*bt_csp;	/* Current stack entry. */
-	EPG		*bt_esp;	/* End stack pointer. */
-	EPG		 bt_stack[5];
-
-	RECNO		*bt_recno;	/* Private recno structure. */
-
-	DB_BTREE_LSTAT lstat;		/* Btree local statistics. */
-
-/*
- * These fields are copied from the original BTREE structure and never
- * change.
- */
 	db_indx_t 	 bt_maxkey;	/* Maximum keys per page. */
 	db_indx_t 	 bt_minkey;	/* Minimum keys per page. */
 
@@ -336,6 +253,8 @@ struct __btree {
 	    __P((const DBT *, const DBT *));
 
 	db_indx_t	 bt_ovflsize;	/* Maximum key/data on-page size. */
+
+	RECNO		*recno;		/* Private recno structure. */
 };
 
 #include "btree_auto.h"