about summary refs log tree commit diff
path: root/db2/btree
diff options
context:
space:
mode:
Diffstat (limited to 'db2/btree')
-rw-r--r--db2/btree/bt_close.c19
-rw-r--r--db2/btree/bt_compare.c23
-rw-r--r--db2/btree/bt_conv.c4
-rw-r--r--db2/btree/bt_cursor.c381
-rw-r--r--db2/btree/bt_delete.c92
-rw-r--r--db2/btree/bt_open.c14
-rw-r--r--db2/btree/bt_page.c28
-rw-r--r--db2/btree/bt_put.c176
-rw-r--r--db2/btree/bt_rec.c15
-rw-r--r--db2/btree/bt_recno.c230
-rw-r--r--db2/btree/bt_rsearch.c98
-rw-r--r--db2/btree/bt_search.c30
-rw-r--r--db2/btree/bt_split.c22
-rw-r--r--db2/btree/bt_stat.c9
-rw-r--r--db2/btree/btree.src10
-rw-r--r--db2/btree/btree_auto.c186
16 files changed, 795 insertions, 542 deletions
diff --git a/db2/btree/bt_close.c b/db2/btree/bt_close.c
index ecccc9fe08..9df5c717e6 100644
--- a/db2/btree/bt_close.c
+++ b/db2/btree/bt_close.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,18 +47,13 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_close.c	10.25 (Sleepycat) 1/6/98";
+static const char sccsid[] = "@(#)bt_close.c	10.32 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/mman.h>
 
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
@@ -104,12 +99,12 @@ __bam_close(dbp)
  * __bam_sync --
  *	Sync the btree to disk.
  *
- * PUBLIC: int __bam_sync __P((DB *, int));
+ * PUBLIC: int __bam_sync __P((DB *, u_int32_t));
  */
 int
 __bam_sync(argdbp, flags)
 	DB *argdbp;
-	int flags;
+	u_int32_t flags;
 {
 	DB *dbp;
 	int ret;
@@ -146,7 +141,7 @@ __bam_upstat(dbp)
 	BTMETA *meta;
 	DB_LOCK metalock;
 	db_pgno_t pgno;
-	int flags, ret;
+	u_int32_t flags;
 
 	/*
 	 * We use a no-op log call to log the update of the statistics onto the
@@ -166,8 +161,8 @@ __bam_upstat(dbp)
 	if (__bam_pget(dbp, (PAGE **)&meta, &pgno, 0) == 0) {
 		/* Log the change. */
 		if (DB_LOGGING(dbp) &&
-		    (ret = __db_noop_log(dbp->dbenv->lg_info, dbp->txn,
-		    &LSN(meta), 0)) == 0)
+		    __db_noop_log(dbp->dbenv->lg_info, dbp->txn, &LSN(meta), 0,
+		    dbp->log_fileid, PGNO_METADATA, &LSN(meta)) != 0)
 			goto err;
 
 		/* Update the statistics. */
diff --git a/db2/btree/bt_compare.c b/db2/btree/bt_compare.c
index a68b1fa891..5c6d1e38ca 100644
--- a/db2/btree/bt_compare.c
+++ b/db2/btree/bt_compare.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,14 +47,12 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_compare.c	10.4 (Sleepycat) 9/3/97";
+static const char sccsid[] = "@(#)bt_compare.c	10.9 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -106,7 +104,6 @@ __bam_cmp(dbp, k1, e)
 		if (B_TYPE(bk->type) == B_OVERFLOW)
 			bo = (BOVERFLOW *)bk;
 		else {
-			memset(&k2, 0, sizeof(k2));
 			k2.data = bk->data;
 			k2.size = bk->len;
 		}
@@ -115,7 +112,6 @@ __bam_cmp(dbp, k1, e)
 		if (B_TYPE(bi->type) == B_OVERFLOW)
 			bo = (BOVERFLOW *)(bi->data);
 		else {
-			memset(&k2, 0, sizeof(k2));
 			k2.data = bi->data;
 			k2.size = bi->len;
 		}
@@ -139,10 +135,21 @@ __bam_cmp(dbp, k1, e)
 		 * Otherwise, we need a contiguous record so we can hand it
 		 * to the user's routine.
 		 */
+		memset(&k2, 0, sizeof(k2));
 		if (__db_goff(dbp, &k2, bo->tlen,
-		    bo->pgno, &t->bt_rdata.data, &t->bt_rdata.ulen) != 0)
-			abort();
+		    bo->pgno, &t->bt_rdata.data, &t->bt_rdata.ulen) != 0) {
+			(void)__db_panic(dbp);
+			return (0);
+		}
 	}
+
+	/*
+	 * XXX
+	 * Note, we have not cleared the k2 DBT in this path.  This should
+	 * be okay, because the user's comparison routine had better not be
+	 * looking at any fields other than the data/size.  We don't clear
+	 * it because we go through this path a lot and it's expensive.
+	 */
 	return ((*t->bt_compare)(k1, &k2));
 }
 
diff --git a/db2/btree/bt_conv.c b/db2/btree/bt_conv.c
index c89493cbed..3da4507723 100644
--- a/db2/btree/bt_conv.c
+++ b/db2/btree/bt_conv.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_conv.c	10.5 (Sleepycat) 9/15/97";
+static const char sccsid[] = "@(#)bt_conv.c	10.6 (Sleepycat) 4/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
diff --git a/db2/btree/bt_cursor.c b/db2/btree/bt_cursor.c
index f526c965e5..cfa388741e 100644
--- a/db2/btree/bt_cursor.c
+++ b/db2/btree/bt_cursor.c
@@ -1,22 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_cursor.c	10.41 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)bt_cursor.c	10.53 (Sleepycat) 5/25/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -25,24 +23,30 @@ static const char sccsid[] = "@(#)bt_cursor.c	10.41 (Sleepycat) 1/8/98";
 #include "btree.h"
 
 static int __bam_c_close __P((DBC *));
-static int __bam_c_del __P((DBC *, int));
+static int __bam_c_del __P((DBC *, u_int32_t));
 static int __bam_c_first __P((DB *, CURSOR *));
-static int __bam_c_get __P((DBC *, DBT *, DBT *, int));
+static int __bam_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
+static int __bam_c_getstack __P((DB *, CURSOR *));
 static int __bam_c_last __P((DB *, CURSOR *));
 static int __bam_c_next __P((DB *, CURSOR *, int));
 static int __bam_c_physdel __P((DB *, CURSOR *, PAGE *));
 static int __bam_c_prev __P((DB *, CURSOR *));
-static int __bam_c_put __P((DBC *, DBT *, DBT *, int));
-static int __bam_c_rget __P((DB *, CURSOR *, DBT *, int));
-static int __bam_c_search __P((DB *, CURSOR *, const DBT *, u_int, int, int *));
+static int __bam_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
+static int __bam_c_rget __P((DB *, CURSOR *, DBT *, u_int32_t));
+static int __bam_c_search
+	       __P((DB *, CURSOR *, const DBT *, u_int32_t, int, int *));
 
 /* Discard the current page/lock held by a cursor. */
 #undef	DISCARD
 #define	DISCARD(dbp, cp) {						\
-	(void)memp_fput(dbp->mpf, (cp)->page, 0);			\
-	(cp)->page = NULL;						\
-	(void)__BT_TLPUT((dbp), (cp)->lock);				\
-	(cp)->lock = LOCK_INVALID;					\
+	if ((cp)->page != NULL) {					\
+		(void)memp_fput(dbp->mpf, (cp)->page, 0);		\
+		(cp)->page = NULL;					\
+	}								\
+	if ((cp)->lock != LOCK_INVALID) {				\
+		(void)__BT_TLPUT((dbp), (cp)->lock);			\
+		(cp)->lock = LOCK_INVALID;				\
+	}								\
 }
 
 /*
@@ -85,9 +89,9 @@ __bam_cursor(dbp, txn, dbcp)
 	 * All cursors are queued from the master DB structure.  Add the
 	 * cursor to that queue.
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	TAILQ_INSERT_HEAD(&dbp->curs_queue, dbc, links);
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 
 	*dbcp = dbc;
 	return (0);
@@ -128,13 +132,6 @@ __bam_c_iclose(dbp, dbc)
 	CURSOR *cp;
 	int ret;
 
-	/*
-	 * All cursors are queued from the master DB structure.  For
-	 * now, discard the DB handle which triggered this call, and
-	 * replace it with the cursor's reference.
-	 */
-	dbp = dbc->dbp;
-
 	/* If a cursor key was deleted, perform the actual deletion.  */
 	cp = dbc->internal;
 	ret = F_ISSET(cp, C_DELETED) ? __bam_c_physdel(dbp, cp, NULL) : 0;
@@ -144,9 +141,9 @@ __bam_c_iclose(dbp, dbc)
 		(void)__BT_TLPUT(dbp, cp->lock);
 
 	/* Remove the cursor from the queue. */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	TAILQ_REMOVE(&dbp->curs_queue, dbc, links);
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 
 	/* Discard the structures. */
 	FREE(dbc->internal, sizeof(CURSOR));
@@ -162,8 +159,9 @@ __bam_c_iclose(dbp, dbc)
 static int
 __bam_c_del(dbc, flags)
 	DBC *dbc;
-	int flags;
+	u_int32_t flags;
 {
+	BTREE *t;
 	CURSOR *cp;
 	DB *dbp;
 	DB_LOCK lock;
@@ -175,6 +173,7 @@ __bam_c_del(dbc, flags)
 	DEBUG_LWRITE(dbc->dbp, dbc->txn, "bam_c_del", NULL, NULL, flags);
 
 	cp = dbc->internal;
+	h = NULL;
 
 	/* Check for invalid flags. */
 	if ((ret = __db_cdelchk(dbc->dbp, flags,
@@ -186,6 +185,7 @@ __bam_c_del(dbc, flags)
 		return (DB_KEYEMPTY);
 
 	GETHANDLE(dbc->dbp, dbc->txn, &dbp, ret);
+	t = dbp->internal;
 
 	/*
 	 * We don't physically delete the record until the cursor moves,
@@ -235,8 +235,21 @@ __bam_c_del(dbc, flags)
 	(void)__bam_ca_delete(dbp, pgno, indx, NULL, 0);
 
 	ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
+	h = NULL;
+
+	/*
+	 * If it's a btree with record numbers, we have to adjust the
+	 * counts.
+	 */
+	if (F_ISSET(dbp, DB_BT_RECNUM) &&
+	    (ret = __bam_c_getstack(dbp, cp)) == 0) {
+		ret = __bam_adjust(dbp, t, -1);
+		(void)__bam_stkrel(dbp);
+	}
 
-err:	PUTHANDLE(dbp);
+err:	if (h != NULL)
+		(void)memp_fput(dbp->mpf, h, 0);
+	PUTHANDLE(dbp);
 	return (ret);
 }
 
@@ -244,14 +257,14 @@ err:	PUTHANDLE(dbp);
  * __bam_get --
  *	Retrieve a key/data pair from the tree.
  *
- * PUBLIC: int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
+ * PUBLIC: int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
  */
 int
 __bam_get(argdbp, txn, key, data, flags)
 	DB *argdbp;
 	DB_TXN *txn;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	DBC dbc;
 	CURSOR cp;
@@ -289,7 +302,7 @@ static int
 __bam_c_get(dbc, key, data, flags)
 	DBC *dbc;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	CURSOR *cp, copy;
@@ -448,7 +461,7 @@ __bam_c_rget(dbp, cp, data, flags)
 	DB *dbp;
 	CURSOR *cp;
 	DBT *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	DBT dbt;
@@ -491,7 +504,7 @@ static int
 __bam_c_put(dbc, key, data, flags)
 	DBC *dbc;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	CURSOR *cp, copy;
@@ -499,7 +512,8 @@ __bam_c_put(dbc, key, data, flags)
 	DBT dbt;
 	db_indx_t indx;
 	db_pgno_t pgno;
-	int exact, needkey, ret;
+	u_int32_t iiflags;
+	int exact, needkey, ret, stack;
 	void *arg;
 
 	DEBUG_LWRITE(dbc->dbp, dbc->txn, "bam_c_put",
@@ -524,29 +538,34 @@ __bam_c_put(dbc, key, data, flags)
 	 * To split, we need a valid key for the page.  Since it's a cursor,
 	 * we have to build one.
 	 */
+	stack = 0;
 	if (0) {
-split:		if (needkey) {
+split:		/* Acquire a copy of a key from the page. */
+		if (needkey) {
 			memset(&dbt, 0, sizeof(DBT));
-			ret = __db_ret(dbp, cp->page, indx,
-			    &dbt, &t->bt_rkey.data, &t->bt_rkey.ulen);
-
-			DISCARD(dbp, cp);
-
-			if (ret)
+			if ((ret = __db_ret(dbp, cp->page, indx,
+			    &dbt, &t->bt_rkey.data, &t->bt_rkey.ulen)) != 0)
 				goto err;
 			arg = &dbt;
-		} else {
-			(void)__bam_stkrel(dbp);
+		} else
 			arg = key;
-		}
+
+		/* Discard any pinned pages. */
+		if (stack) {
+			(void)__bam_stkrel(dbp);
+			stack = 0;
+		} else
+			DISCARD(dbp, cp);
+
 		if ((ret = __bam_split(dbp, arg)) != 0)
 			goto err;
 	}
 
-	/* If there's no key supplied, use the cursor. */
-	if (flags == DB_KEYFIRST || flags == DB_KEYLAST)
-		needkey = 0;
-	else {
+	ret = 0;
+	switch (flags) {
+	case DB_AFTER:
+	case DB_BEFORE:
+	case DB_CURRENT:
 		needkey = 1;
 		if (cp->dpgno == PGNO_INVALID) {
 			pgno = cp->pgno;
@@ -555,41 +574,53 @@ split:		if (needkey) {
 			pgno = cp->dpgno;
 			indx = cp->dindx;
 		}
-		/* Acquire the current page. */
-		if ((ret = __bam_lget(dbp,
-		    0, cp->pgno, DB_LOCK_WRITE, &cp->lock)) != 0)
-			goto err;
-		if ((ret = __bam_pget(dbp, &cp->page, &pgno, 0)) != 0)
-			goto err;
-	}
+		/*
+		 * XXX
+		 * This test is right -- we don't currently support duplicates
+		 * in the presence of record numbers, so we don't worry about
+		 * them if DB_BT_RECNUM is set.
+		 */
+		if (F_ISSET(dbp, DB_BT_RECNUM) &&
+		    (flags != DB_CURRENT || F_ISSET(cp, C_DELETED))) {
+			/* Acquire a complete stack. */
+			if ((ret = __bam_c_getstack(dbp, cp)) != 0)
+				goto err;
+			cp->page = t->bt_csp->page;
 
-	ret = 0;
-	switch (flags) {
-	case DB_AFTER:
-	case DB_BEFORE:
-	case DB_CURRENT:
+			stack = 1;
+			iiflags = BI_DOINCR;
+		} else {
+			/* Acquire the current page. */
+			if ((ret = __bam_lget(dbp,
+			    0, cp->pgno, DB_LOCK_WRITE, &cp->lock)) == 0)
+				ret = __bam_pget(dbp, &cp->page, &pgno, 0);
+			if (ret != 0)
+				goto err;
+
+			iiflags = 0;
+		}
 		if ((ret = __bam_iitem(dbp, &cp->page,
-		    &indx, key, data, flags, 0)) == DB_NEEDSPLIT)
+		    &indx, key, data, flags, iiflags)) == DB_NEEDSPLIT)
 			goto split;
 		break;
 	case DB_KEYFIRST:
-		exact = 0;
+		exact = needkey = 0;
 		if ((ret =
 		    __bam_c_search(dbp, cp, key, S_KEYFIRST, 0, &exact)) != 0)
 			goto err;
+		stack = 1;
 
 		indx = cp->dpgno == PGNO_INVALID ? cp->indx : cp->dindx;
 		if ((ret = __bam_iitem(dbp, &cp->page, &indx, key,
 		    data, DB_BEFORE, exact ? 0 : BI_NEWKEY)) == DB_NEEDSPLIT)
 			goto split;
-		if (ret)
-			goto err;
 		break;
 	case DB_KEYLAST:
-		exact = 0;
+		exact = needkey = 0;
 		if ((ret =
 		    __bam_c_search(dbp, cp, key, S_KEYLAST, 0, &exact)) != 0)
 			goto err;
+		stack = 1;
 
 		indx = cp->dpgno == PGNO_INVALID ? cp->indx : cp->dindx;
 		if ((ret = __bam_iitem(dbp, &cp->page, &indx, key,
@@ -623,13 +654,27 @@ split:		if (needkey) {
 	if (copy.lock != LOCK_INVALID)
 		(void)__BT_TLPUT(dbp, copy.lock);
 
-	/* Discard the pinned page. */
-	ret = memp_fput(dbp->mpf, cp->page, 0);
+	/*
+	 * Discard any pages pinned in the tree and their locks, except for
+	 * the leaf page, for which we only discard the pin, not the lock.
+	 *
+	 * Note, the leaf page participated in the stack we acquired, and so
+	 * we have to adjust the stack as necessary.  If there was only a
+	 * single page on the stack, we don't have to free further stack pages.
+	 */
+
+	if (stack && BT_STK_POP(t) != NULL)
+		(void)__bam_stkrel(dbp);
+
+	if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0)
+		goto err;
+
 	if (0) {
-err:		if (cp->page != NULL)
-			(void)memp_fput(dbp->mpf, cp->page, 0);
-		if (cp->lock != LOCK_INVALID)
-			(void)__BT_TLPUT(dbp, cp->lock);
+err:		/* Discard any pinned pages. */
+		if (stack)
+			(void)__bam_stkrel(dbp);
+		else
+			DISCARD(dbp, cp);
 		*cp = copy;
 	}
 
@@ -976,7 +1021,7 @@ __bam_c_search(dbp, cp, key, flags, isrecno, exactp)
 	DB *dbp;
 	CURSOR *cp;
 	const DBT *key;
-	u_int flags;
+	u_int32_t flags;
 	int isrecno, *exactp;
 {
 	BTREE *t;
@@ -1032,6 +1077,18 @@ __bam_c_search(dbp, cp, key, flags, isrecno, exactp)
 		} else
 			if ((ret = __bam_c_next(dbp, cp, 0)) != 0)
 				return (ret);
+	/*
+	 * If we don't specify an exact match (the DB_KEYFIRST/DB_KEYLAST or
+	 * DB_SET_RANGE flags were set) __bam_search() may return a deleted
+	 * item.  For DB_KEYFIRST/DB_KEYLAST, we don't care since we're only
+	 * using it for a tree position.  For DB_SET_RANGE, we're returning
+	 * the key, so we have to adjust it.
+	 */
+	if (LF_ISSET(S_DELNO) && cp->dpgno == PGNO_INVALID &&
+	    B_DISSET(GET_BKEYDATA(cp->page, cp->indx + O_INDX)->type))
+		if ((ret = __bam_c_next(dbp, cp, 0)) != 0)
+			return (ret);
+
 	return (0);
 }
 
@@ -1101,7 +1158,7 @@ __bam_cprint(dbp)
 	CURSOR *cp;
 	DBC *dbc;
 
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (CURSOR *)dbc->internal;
@@ -1113,7 +1170,8 @@ __bam_cprint(dbp)
 			fprintf(stderr, "(deleted)");
 		fprintf(stderr, "\n");
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
+
 	return (0);
 }
 #endif /* DEBUG */
@@ -1135,7 +1193,7 @@ __bam_ca_delete(dbp, pgno, indx, curs, key_delete)
 {
 	DBC *dbc;
 	CURSOR *cp;
-	int count;
+	int count;		/* !!!: Has to contain max number of cursors. */
 
 	/*
 	 * Adjust the cursors.  We don't have to review the cursors for any
@@ -1148,8 +1206,7 @@ __bam_ca_delete(dbp, pgno, indx, curs, key_delete)
 	 * locks on the same page, but, cursors within a thread must be single
 	 * threaded, so all we're locking here is the cursor linked list.
 	 */
-	DB_THREAD_LOCK(dbp);
-
+	CURSOR_SETUP(dbp);
 	for (count = 0, dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (CURSOR *)dbc->internal;
@@ -1180,8 +1237,8 @@ __bam_ca_delete(dbp, pgno, indx, curs, key_delete)
 				F_SET(cp, C_DELETED);
 			}
 	}
+	CURSOR_TEARDOWN(dbp);
 
-	DB_THREAD_UNLOCK(dbp);
 	return (count);
 }
 
@@ -1192,11 +1249,11 @@ __bam_ca_delete(dbp, pgno, indx, curs, key_delete)
  * PUBLIC: void __bam_ca_di __P((DB *, db_pgno_t, u_int32_t, int));
  */
 void
-__bam_ca_di(dbp, pgno, indx, value)
+__bam_ca_di(dbp, pgno, indx, adjust)
 	DB *dbp;
 	db_pgno_t pgno;
 	u_int32_t indx;
-	int value;
+	int adjust;
 {
 	CURSOR *cp;
 	DBC *dbc;
@@ -1208,16 +1265,16 @@ __bam_ca_di(dbp, pgno, indx, value)
 	/*
 	 * Adjust the cursors.  See the comment in __bam_ca_delete().
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (CURSOR *)dbc->internal;
 		if (cp->pgno == pgno && cp->indx >= indx)
-			cp->indx += value;
+			cp->indx += adjust;
 		if (cp->dpgno == pgno && cp->dindx >= indx)
-			cp->dindx += value;
+			cp->dindx += adjust;
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 }
 
 /*
@@ -1242,7 +1299,7 @@ __bam_ca_dup(dbp, fpgno, first, fi, tpgno, ti)
 	 * No need to test duplicates, this only gets called when moving
 	 * leaf page data items onto a duplicates page.
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (CURSOR *)dbc->internal;
@@ -1258,7 +1315,7 @@ __bam_ca_dup(dbp, fpgno, first, fi, tpgno, ti)
 			cp->dindx = ti;
 		}
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 }
 
 /*
@@ -1285,14 +1342,14 @@ __bam_ca_move(dbp, fpgno, tpgno)
 	 * No need to test duplicates, this only gets called when copying
 	 * over the root page with a leaf or internal page.
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (CURSOR *)dbc->internal;
 		if (cp->pgno == fpgno)
 			cp->pgno = tpgno;
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 }
 
 /*
@@ -1333,7 +1390,7 @@ __bam_ca_replace(dbp, pgno, indx, pass)
 	 * for the cursor as it may have been changed by other cursor update
 	 * routines as the item was deleted/inserted.
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	switch (pass) {
 	case REPLACE_SETUP:			/* Setup. */
 		for (dbc = TAILQ_FIRST(&dbp->curs_queue);
@@ -1372,7 +1429,7 @@ __bam_ca_replace(dbp, pgno, indx, pass)
 		}
 		break;
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 }
 
 /*
@@ -1406,7 +1463,7 @@ __bam_ca_split(dbp, ppgno, lpgno, rpgno, split_indx, cleft)
 	 * the cursor is on the right page, it is decremented by the number of
 	 * records split to the left page.
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (CURSOR *)dbc->internal;
@@ -1427,7 +1484,7 @@ __bam_ca_split(dbp, ppgno, lpgno, rpgno, split_indx, cleft)
 				cp->dindx -= split_indx;
 			}
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 }
 
 /*
@@ -1440,16 +1497,17 @@ __bam_c_physdel(dbp, cp, h)
 	CURSOR *cp;
 	PAGE *h;
 {
+	enum { DELETE_ITEM, DELETE_PAGE, NOTHING_FURTHER } cmd;
 	BOVERFLOW bo;
 	BTREE *t;
 	DBT dbt;
 	DB_LOCK lock;
 	db_indx_t indx;
 	db_pgno_t pgno, next_pgno, prev_pgno;
-	int local, normal, ret;
+	int delete_page, local_page, ret;
 
 	t = dbp->internal;
-	ret = 0;
+	delete_page = ret = 0;
 
 	/* Figure out what we're deleting. */
 	if (cp->dpgno == PGNO_INVALID) {
@@ -1476,9 +1534,9 @@ __bam_c_physdel(dbp, cp, h)
 			return (ret);
 		if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
 			return (ret);
-		local = 1;
+		local_page = 1;
 	} else
-		local = 0;
+		local_page = 0;
 
 	/*
 	 * If we're deleting a duplicate entry and there are other duplicate
@@ -1515,9 +1573,9 @@ __bam_c_physdel(dbp, cp, h)
 
 		if (NUM_ENT(h) == 1 &&
 		    prev_pgno == PGNO_INVALID && next_pgno == PGNO_INVALID)
-			normal = 1;
+			cmd = DELETE_PAGE;
 		else {
-			normal = 0;
+			cmd = DELETE_ITEM;
 
 			/* Delete the duplicate. */
 			if ((ret = __db_drem(dbp, &h, indx, __bam_free)) != 0)
@@ -1536,18 +1594,27 @@ __bam_c_physdel(dbp, cp, h)
 			 */
 			if ((h != NULL && pgno == h->pgno) ||
 			    prev_pgno != PGNO_INVALID)
-				goto done;
+				cmd = NOTHING_FURTHER;
 		}
 
-		/* Release any page we're holding and its lock. */
-		if (local) {
+		/*
+		 * Release any page we're holding and its lock.
+		 *
+		 * !!!
+		 * If there is no subsequent page in the duplicate chain, then
+		 * __db_drem will have put page "h" and set it to NULL.
+		*/
+		if (local_page) {
 			if (h != NULL)
 				(void)memp_fput(dbp->mpf, h, 0);
 			(void)__BT_TLPUT(dbp, lock);
-			local = 0;
+			local_page = 0;
 		}
 
-		/* Acquire the parent page. */
+		if (cmd == NOTHING_FURTHER)
+			goto done;
+
+		/* Acquire the parent page and switch the index to its entry. */
 		if ((ret =
 		    __bam_lget(dbp, 0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0)
 			goto err;
@@ -1555,11 +1622,10 @@ __bam_c_physdel(dbp, cp, h)
 			(void)__BT_TLPUT(dbp, lock);
 			goto err;
 		}
-		local = 1;
-
-		/* Switch to the parent page's entry. */
+		local_page = 1;
 		indx = cp->indx;
-		if (normal)
+
+		if (cmd == DELETE_PAGE)
 			goto btd;
 
 		/*
@@ -1582,47 +1648,60 @@ __bam_c_physdel(dbp, cp, h)
 		goto done;
 	}
 
-	/* Otherwise, do a normal btree delete. */
-btd:	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
-		goto err;
-	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
-		goto err;
-
-	/*
-	 * If the page is empty, delete it.  To delete a leaf page we need a
-	 * copy of a key from the page.  We use the first one that was there,
-	 * since it's the last key that the page held.  We malloc the page
-	 * information instead of using the return key/data memory because
-	 * we've already set them -- the reason that we've already set them
-	 * is because we're (potentially) about to do a reverse split, which
-	 * would make our saved page information useless.
+btd:	/*
+	 * If the page is going to be emptied, delete it.  To delete a leaf
+	 * page we need a copy of a key from the page.  We use the 0th page
+	 * index since it's the last key that the page held.
+	 *
+	 * We malloc the page information instead of using the return key/data
+	 * memory because we've already set them -- the reason we've already
+	 * set them is because we're (potentially) about to do a reverse split,
+	 * which would make our saved page information useless.
 	 *
 	 * XXX
 	 * The following operations to delete a page might deadlock.  I think
 	 * that's OK.  The problem is if we're deleting an item because we're
 	 * closing cursors because we've already deadlocked and want to call
-	 * txn_abort().  If we fail due to deadlock, we'll leave an locked
-	 * empty page in the tree, which won't be empty long because we're
-	 * going to undo the delete.
+	 * txn_abort().  If we fail due to deadlock, we leave a locked empty
+	 * page in the tree, which won't be empty long because we're going to
+	 * undo the delete.
 	 */
-	if (NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT) {
+	if (NUM_ENT(h) == 2 && h->pgno != PGNO_ROOT) {
 		memset(&dbt, 0, sizeof(DBT));
 		dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL;
 		if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0)
 			goto err;
+		delete_page = 1;
+	}
 
-		if (local) {
-			(void)memp_fput(dbp->mpf, h, 0);
-			(void)__BT_TLPUT(dbp, lock);
-			local = 0;
-		}
+	/*
+	 * Do a normal btree delete.
+	 *
+	 * XXX
+	 * Delete the key item first, otherwise the duplicate checks in
+	 * __bam_ditem() won't work!
+	 */
+	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+		goto err;
+	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+		goto err;
 
-		ret = __bam_dpage(dbp, &dbt);
-		__db_free(dbt.data);
+	/* Discard any remaining locks/pages. */
+	if (local_page) {
+		(void)memp_fput(dbp->mpf, h, 0);
+		(void)__BT_TLPUT(dbp, lock);
+		local_page = 0;
 	}
 
+	/* Delete the page if it was emptied. */
+	if (delete_page)
+		ret = __bam_dpage(dbp, &dbt);
+
 err:
-done:	if (local) {
+done:	if (delete_page)
+		__db_free(dbt.data);
+
+	if (local_page) {
 		(void)memp_fput(dbp->mpf, h, 0);
 		(void)__BT_TLPUT(dbp, lock);
 	}
@@ -1631,3 +1710,43 @@ done:	if (local) {
 		++t->lstat.bt_deleted;
 	return (ret);
 }
+
+/*
+ * __bam_c_getstack --
+ *	Acquire a full stack for a cursor.
+ */
+static int
+__bam_c_getstack(dbp, cp)
+	DB *dbp;
+	CURSOR *cp;
+{
+	DBT dbt;
+	PAGE *h;
+	db_pgno_t pgno;
+	int exact, ret;
+
+	ret = 0;
+	h = NULL;
+	memset(&dbt, 0, sizeof(DBT));
+
+	/* Get the page with the current item on it. */
+	pgno = cp->pgno;
+	if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
+		return (ret);
+
+	/* Get a copy of a key from the page. */
+	dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL;
+	if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0)
+		goto err;
+
+	/* Get a write-locked stack for that page. */
+	exact = 0;
+	ret = __bam_search(dbp, &dbt, S_KEYFIRST, 1, NULL, &exact);
+
+	/* We no longer need the key or the page. */
+err:	if (h != NULL)
+		(void)memp_fput(dbp->mpf, h, 0);
+	if (dbt.data != NULL)
+		__db_free(dbt.data);
+	return (ret);
+}
diff --git a/db2/btree/bt_delete.c b/db2/btree/bt_delete.c
index baa8a25401..7e71037e46 100644
--- a/db2/btree/bt_delete.c
+++ b/db2/btree/bt_delete.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,13 +47,12 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_delete.c	10.25 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)bt_delete.c	10.31 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
-#include <stdio.h>
 #include <string.h>
 #endif
 
@@ -67,14 +66,14 @@ static int __bam_dpages __P((DB *, BTREE *));
  * __bam_delete --
  *	Delete the items referenced by a key.
  *
- * PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, int));
+ * PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
  */
 int
 __bam_delete(argdbp, txn, key, flags)
 	DB *argdbp;
 	DB_TXN *txn;
 	DBT *key;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	DB *dbp;
@@ -87,8 +86,8 @@ __bam_delete(argdbp, txn, key, flags)
 	stack = 0;
 
 	/* Check for invalid flags. */
-	if ((ret =
-	    __db_delchk(argdbp, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0)
+	if ((ret = __db_delchk(argdbp,
+	    key, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0)
 		return (ret);
 
 	GETHANDLE(argdbp, txn, &dbp, ret);
@@ -107,6 +106,11 @@ __bam_delete(argdbp, txn, key, flags)
 			break;
 	for (; cnt > 0; --cnt, ++t->lstat.bt_deleted)
 		if (__bam_ca_delete(dbp, h->pgno, indx, NULL, 1) == 0) {
+			/*
+			 * XXX
+			 * Delete the key item first, otherwise the duplicate
+			 * checks in __bam_ditem() won't work!
+			 */
 			if ((ret = __bam_ditem(dbp, h, indx)) != 0)
 				goto err;
 			if ((ret = __bam_ditem(dbp, h, indx)) != 0)
@@ -138,14 +142,14 @@ err:	if (stack)
  * __ram_delete --
  *	Delete the items referenced by a key.
  *
- * PUBLIC: int __ram_delete __P((DB *, DB_TXN *, DBT *, int));
+ * PUBLIC: int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
  */
 int
 __ram_delete(argdbp, txn, key, flags)
 	DB *argdbp;
 	DB_TXN *txn;
 	DBT *key;
-	int flags;
+	u_int32_t flags;
 {
 	BKEYDATA bk;
 	BTREE *t;
@@ -159,8 +163,8 @@ __ram_delete(argdbp, txn, key, flags)
 	stack = 0;
 
 	/* Check for invalid flags. */
-	if ((ret =
-	    __db_delchk(argdbp, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0)
+	if ((ret = __db_delchk(argdbp,
+	    key, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0)
 		return (ret);
 
 	GETHANDLE(argdbp, txn, &dbp, ret);
@@ -284,19 +288,32 @@ __bam_ditem(dbp, h, indx)
 	case P_LBTREE:
 		/*
 		 * If it's a duplicate key, discard the index and don't touch
-		 * the actual page item.  This works because no data item can
-		 * have an index that matches any other index so even if the
-		 * data item is in an index "slot", it won't match any other
-		 * index.
+		 * the actual page item.
+		 *
+		 * XXX
+		 * This works because no data item can have an index matching
+		 * any other index so even if the data item is in a key "slot",
+		 * it won't match any other index.
 		 */
-		if (!(indx % 2)) {
-			if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
-				return (__bam_adjindx(dbp,
-				    h, indx, indx - P_INDX, 0));
+		if ((indx % 2) == 0) {
+			/*
+			 * Check for a duplicate after us on the page.  NOTE:
+			 * we have to delete the key item before deleting the
+			 * data item, otherwise the "indx + P_INDX" calculation
+			 * won't work!
+			 */
 			if (indx + P_INDX < (u_int32_t)NUM_ENT(h) &&
 			    h->inp[indx] == h->inp[indx + P_INDX])
 				return (__bam_adjindx(dbp,
 				    h, indx, indx + O_INDX, 0));
+			/*
+			 * Check for a duplicate before us on the page.  It
+			 * doesn't matter if we delete the key item before or
+			 * after the data item for the purposes of this one.
+			 */
+			if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
+				return (__bam_adjindx(dbp,
+				    h, indx, indx - P_INDX, 0));
 		}
 		/* FALLTHROUGH */
 	case P_LRECNO:
@@ -396,7 +413,8 @@ __bam_dpage(dbp, key)
 	DB_LOCK lock;
 	PAGE *h;
 	db_pgno_t pgno;
-	int exact, level, ret;
+	int level;		/* !!!: has to hold number of tree levels. */
+	int exact, ret;
 
 	ret = 0;
 	t = dbp->internal;
@@ -527,13 +545,14 @@ __bam_dpages(dbp, t)
 		goto release;
 
 	/*
-	 * If we deleted the next-to-last item from the root page, the tree
-	 * can collapse a level.  Try and write lock the remaining root + 1
-	 * page and copy it onto the root page.  If we can't get the lock,
-	 * that's okay, the tree just stays a level deeper than we'd like.
+	 * If we just deleted the last or next-to-last item from the root page,
+	 * the tree can collapse a level.  Write lock the last page referenced
+	 * by the root page and copy it over the root page.  If we can't get a
+	 * write lock, that's okay, the tree just remains a level deeper than
+	 * we'd like.
 	 */
 	h = epg->page;
-	if (h->pgno == PGNO_ROOT && NUM_ENT(h) == 1) {
+	if (h->pgno == PGNO_ROOT && NUM_ENT(h) <= 1) {
 		pgno = TYPE(epg->page) == P_IBTREE ?
 		    GET_BINTERNAL(epg->page, 0)->pgno :
 		    GET_RINTERNAL(epg->page, 0)->pgno;
@@ -573,13 +592,21 @@ __bam_dpages(dbp, t)
 		(void)memp_fset(dbp->mpf, epg->page, DB_MPOOL_DIRTY);
 
 		/*
-		 * Free the last page in that level of the btree and discard
-		 * the lock.  (The call to __bam_free discards our reference
+		 * Free the page copied onto the root page and discard its
+		 * lock.  (The call to __bam_free() discards our reference
 		 * to the page.)
+		 *
+		 * It's possible that the reverse split we're doing involves
+		 * pages from the stack of pages we're deleting.  Don't free
+		 * the page twice.
 		 */
-		(void)__bam_free(dbp, h);
+		 if (h->pgno == (epg + 1)->page->pgno)
+			(void)memp_fput(dbp->mpf, h, 0);
+		else {
+			(void)__bam_free(dbp, h);
+			++t->lstat.bt_freed;
+		}
 		(void)__BT_TLPUT(dbp, lock);
-		++t->lstat.bt_freed;
 
 		/* Adjust the cursors. */
 		__bam_ca_move(dbp, h->pgno, PGNO_ROOT);
@@ -596,12 +623,17 @@ __bam_dpages(dbp, t)
 	 * Don't bother checking for errors.  We've unlinked the subtree from
 	 * the tree, and there's no possibility of recovery.
 	 */
-	for (; ++epg <= t->bt_csp; ++t->lstat.bt_freed) {
+	while (++epg <= t->bt_csp) {
+		/*
+		 * XXX
+		 * Why do we need to do this?  Isn't the page already empty?
+		 */
 		if (NUM_ENT(epg->page) != 0)
 			(void)__bam_ditem(dbp, epg->page, epg->indx);
 
 		(void)__bam_free(dbp, epg->page);
 		(void)__BT_TLPUT(dbp, epg->lock);
+		++t->lstat.bt_freed;
 	}
 	return (0);
 
diff --git a/db2/btree/bt_open.c b/db2/btree/bt_open.c
index dd9f10927a..f5974ec61e 100644
--- a/db2/btree/bt_open.c
+++ b/db2/btree/bt_open.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_open.c	10.22 (Sleepycat) 1/6/98";
+static const char sccsid[] = "@(#)bt_open.c	10.27 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 /*
@@ -60,21 +60,15 @@ static const char sccsid[] = "@(#)bt_open.c	10.22 (Sleepycat) 1/6/98";
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <fcntl.h>
 #include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
 #include "db_page.h"
 #include "btree.h"
-#include "common_ext.h"
 
 static int __bam_keyalloc __P((BTREE *));
 static int __bam_setmeta __P((DB *, BTREE *));
@@ -295,6 +289,7 @@ __bam_setmeta(dbp, t)
 	}
 
 	/* Initialize the tree structure metadata information. */
+	memset(meta, 0, sizeof(BTMETA));
 	ZERO_LSN(meta->lsn);
 	meta->pgno = PGNO_METADATA;
 	meta->magic = DB_BTREEMAGIC;
@@ -303,7 +298,6 @@ __bam_setmeta(dbp, t)
 	meta->maxkey = t->bt_maxkey;
 	meta->minkey = t->bt_minkey;
 	meta->free = PGNO_INVALID;
-	meta->flags = 0;
 	if (dbp->type == DB_RECNO)
 		F_SET(meta, BTM_RECNO);
 	if (F_ISSET(dbp, DB_AM_DUP))
@@ -314,8 +308,6 @@ __bam_setmeta(dbp, t)
 		F_SET(meta, BTM_RECNUM);
 	if (F_ISSET(dbp, DB_RE_RENUMBER))
 		F_SET(meta, BTM_RENUMBER);
-	meta->re_len = 0;
-	meta->re_pad = 0;
 	memcpy(meta->uid, dbp->lock.fileid, DB_FILE_ID_LEN);
 
 	/* Create and initialize a root page. */
diff --git a/db2/btree/bt_page.c b/db2/btree/bt_page.c
index 853317e835..87f2811398 100644
--- a/db2/btree/bt_page.c
+++ b/db2/btree/bt_page.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,14 +47,13 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_page.c	10.7 (Sleepycat) 1/7/98";
+static const char sccsid[] = "@(#)bt_page.c	10.12 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdio.h>
 #include <string.h>
 #endif
 
@@ -142,7 +141,8 @@ __bam_free(dbp, h)
 	DBT ldbt;
 	DB_LOCK metalock;
 	db_pgno_t pgno;
-	int is_dirty, ret, t_ret;
+	u_int32_t dirty_flag;
+	int ret, t_ret;
 
 	/*
 	 * Retrieve the metadata page and insert the page at the head of
@@ -150,7 +150,7 @@ __bam_free(dbp, h)
 	 * fail, then we need to put the page with which we were called
 	 * back because our caller assumes we take care of it.
 	 */
-	is_dirty = 0;
+	dirty_flag = 0;
 	pgno = PGNO_METADATA;
 	if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &metalock)) != 0)
 		goto err;
@@ -178,7 +178,7 @@ __bam_free(dbp, h)
 	 * The page should have nothing interesting on it, re-initialize it,
 	 * leaving only the page number and the LSN.
 	 */
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	{ db_pgno_t __pgno; DB_LSN __lsn;
 		__pgno = h->pgno;
 		__lsn = h->lsn;
@@ -198,8 +198,8 @@ __bam_free(dbp, h)
 		ret = t_ret;
 
 	/* Discard the caller's page reference. */
-	is_dirty = DB_MPOOL_DIRTY;
-err:	if ((t_ret = memp_fput(dbp->mpf, h, is_dirty)) != 0 && ret == 0)
+	dirty_flag = DB_MPOOL_DIRTY;
+err:	if ((t_ret = memp_fput(dbp->mpf, h, dirty_flag)) != 0 && ret == 0)
 		ret = t_ret;
 
 	/*
@@ -248,8 +248,10 @@ __bam_lget(dbp, do_couple, pgno, mode, lockp)
 	u_int32_t locker;
 	int ret;
 
-	if (!F_ISSET(dbp, DB_AM_LOCKING))
+	if (!F_ISSET(dbp, DB_AM_LOCKING)) {
+		*lockp = LOCK_INVALID;
 		return (0);
+	}
 
 	locker = dbp->txn == NULL ? dbp->locker : dbp->txn->txnid;
 	dbp->lock.pgno = pgno;
@@ -300,15 +302,15 @@ __bam_lput(dbp, lock)
  * __bam_pget --
  *	The standard page get call.
  *
- * PUBLIC: int __bam_pget __P((DB *, PAGE **, db_pgno_t *, int));
+ * PUBLIC: int __bam_pget __P((DB *, PAGE **, db_pgno_t *, u_int32_t));
  */
 int
-__bam_pget(dbp, hp, pgnop, mflags)
+__bam_pget(dbp, hp, pgnop, mpool_flags)
 	DB *dbp;
 	PAGE **hp;
 	db_pgno_t *pgnop;
-	int mflags;
+	u_int32_t mpool_flags;
 {
 	return (memp_fget((dbp)->mpf,
-	    pgnop, mflags, hp) == 0 ? 0 : __db_pgerr(dbp, *pgnop));
+	    pgnop, mpool_flags, hp) == 0 ? 0 : __db_pgerr(dbp, *pgnop));
 }
diff --git a/db2/btree/bt_put.c b/db2/btree/bt_put.c
index 87f3fd9aff..a93faac98c 100644
--- a/db2/btree/bt_put.c
+++ b/db2/btree/bt_put.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,15 +47,13 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_put.c	10.38 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)bt_put.c	10.45 (Sleepycat) 5/25/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -75,21 +73,22 @@ static u_int32_t __bam_partsize __P((DBT *, PAGE *, u_int32_t));
  * __bam_put --
  *	Add a new key/data pair or replace an existing pair (btree).
  *
- * PUBLIC: int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
+ * PUBLIC: int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
  */
 int
 __bam_put(argdbp, txn, key, data, flags)
 	DB *argdbp;
 	DB_TXN *txn;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	CURSOR c;
 	DB *dbp;
 	PAGE *h;
 	db_indx_t indx;
-	int exact, iflags, isdeleted, newkey, replace, ret, stack;
+	u_int32_t iitem_flags, insert_flags;
+	int exact, isdeleted, newkey, ret, stack;
 
 	DEBUG_LWRITE(argdbp, txn, "bam_put", key, data, flags);
 
@@ -121,14 +120,13 @@ retry:	/*
 	 * been marked for deletion, we do a replace, otherwise, it has to be
 	 * a set of duplicates, and we simply append a new one to the set.
 	 */
-	isdeleted = replace = 0;
+	isdeleted = 0;
 	if (exact) {
 		if ((ret = __bam_isdeleted(dbp, h, indx, &isdeleted)) != 0)
 			goto err;
-		if (isdeleted) {
-			replace = 1;
+		if (isdeleted)
 			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP);
-		} else
+		else
 			if (flags == DB_NOOVERWRITE) {
 				ret = DB_KEYEXIST;
 				goto err;
@@ -179,42 +177,38 @@ retry:	/*
 				t->bt_csp->page = h = c.page;
 				indx = c.dindx;
 			}
-			iflags = DB_AFTER;
+			insert_flags = DB_AFTER;
 		} else
-			iflags = DB_CURRENT;
+			insert_flags = DB_CURRENT;
 	} else
-		iflags = DB_BEFORE;
+		insert_flags = DB_BEFORE;
 
 	/*
 	 * The pages we're using may be modified by __bam_iitem(), so make
 	 * sure we reset the stack.
 	 */
-	ret = __bam_iitem(dbp,
-	    &h, &indx, key, data, iflags, newkey ? BI_NEWKEY : 0);
+	iitem_flags = 0;
+	if (newkey)
+		iitem_flags |= BI_NEWKEY;
+	if (isdeleted)
+		iitem_flags |= BI_DOINCR;
+	ret = __bam_iitem(dbp, &h, &indx, key, data, insert_flags, iitem_flags);
 	t->bt_csp->page = h;
 	t->bt_csp->indx = indx;
 
 	switch (ret) {
 	case 0:
-		/*
-		 * Done.  Clean up the cursor, and, if we're doing record
-		 * numbers, adjust the internal page counts.
-		 */
-		if (replace)
+		/* Done.  Clean up the cursor. */
+		if (isdeleted)
 			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_SUCCESS);
-
-		if (!replace && F_ISSET(dbp, DB_BT_RECNUM))
-			ret = __bam_adjust(dbp, t, 1);
 		break;
 	case DB_NEEDSPLIT:
 		/*
 		 * We have to split the page.  Back out the cursor setup,
 		 * discard the stack of pages, and do the split.
 		 */
-		if (replace) {
-			replace = 0;
+		if (isdeleted)
 			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
-		}
 
 		(void)__bam_stkrel(dbp);
 		stack = 0;
@@ -225,7 +219,7 @@ retry:	/*
 		goto retry;
 		/* NOTREACHED */
 	default:
-		if (replace)
+		if (isdeleted)
 			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
 		break;
 	}
@@ -393,7 +387,8 @@ __bam_lookup(dbp, key, exactp)
 				for (indx = 0;
 				    indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
 				    h->inp[indx] == h->inp[indx + P_INDX];
-				    indx += P_INDX);
+				    indx += P_INDX)
+					;
 				e.indx = indx;
 			}
 			goto fast;
@@ -427,7 +422,7 @@ slow:	return (__bam_search(dbp, key, S_INSERT, 1, NULL, exactp));
  *	Insert an item into the tree.
  *
  * PUBLIC: int __bam_iitem __P((DB *,
- * PUBLIC:    PAGE **, db_indx_t *, DBT *, DBT *, int, int));
+ * PUBLIC:    PAGE **, db_indx_t *, DBT *, DBT *, u_int32_t, u_int32_t));
  */
 int
 __bam_iitem(dbp, hp, indxp, key, data, op, flags)
@@ -435,13 +430,13 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 	PAGE **hp;
 	db_indx_t *indxp;
 	DBT *key, *data;
-	int op, flags;
+	u_int32_t op, flags;
 {
 	BTREE *t;
 	BKEYDATA *bk;
 	DBT tdbt;
 	PAGE *h;
-	db_indx_t indx;
+	db_indx_t indx, nbytes;
 	u_int32_t data_size, have_bytes, need_bytes, needed;
 	int bigkey, bigdata, dupadjust, replace, ret;
 
@@ -466,12 +461,27 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 			++*indxp;
 
 		/* Remove the current item if it's a DB_CURRENT op. */
-		if (op == DB_CURRENT && (ret = __db_ditem(dbp, *hp, *indxp,
-		    BKEYDATA_SIZE(GET_BKEYDATA(*hp, *indxp)->len))) != 0)
-			return (ret);
+		if (op == DB_CURRENT) {
+			bk = GET_BKEYDATA(*hp, *indxp);
+			switch (B_TYPE(bk->type)) {
+			case B_KEYDATA:
+				nbytes = BKEYDATA_SIZE(bk->len);
+				break;
+			case B_OVERFLOW:
+				nbytes = BOVERFLOW_SIZE;
+				break;
+			default:
+				return (__db_pgfmt(dbp, h->pgno));
+			}
+			if ((ret = __db_ditem(dbp, *hp, *indxp, nbytes)) != 0)
+				return (ret);
+		}
 
 		/* Put the new/replacement item onto the page. */
-		return (__db_dput(dbp, data, hp, indxp, __bam_new));
+		if ((ret = __db_dput(dbp, data, hp, indxp, __bam_new)) != 0)
+			return (ret);
+
+		goto done;
 	}
 
 	/* Handle fixed-length records: build the real record. */
@@ -568,7 +578,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 		case DB_BEFORE:		/* 2. Insert a new key/data pair. */
 			break;
 		default:
-			abort();
+			return (EINVAL);
 		}
 
 		/* Add the key. */
@@ -638,7 +648,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 			replace = 1;
 			break;
 		default:
-			abort();
+			return (EINVAL);
 		}
 	}
 
@@ -666,9 +676,8 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 			return (ret);
 	}
 
-	++t->lstat.bt_added;
-
-	ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
+	if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
+		return (ret);
 
 	/*
 	 * If the page is at least 50% full, and we added a duplicate, see if
@@ -681,9 +690,25 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 			return (ret);
 	}
 
+	/*
+	 * If we've changed the record count, update the tree.  Record counts
+	 * need to be updated in recno databases and in btree databases where
+	 * we are supporting records.  In both cases, adjust the count if the
+	 * operation wasn't performed on the current record or when the caller
+	 * overrides and wants the adjustment made regardless.
+	 */
+done:	if (LF_ISSET(BI_DOINCR) ||
+	    (op != DB_CURRENT &&
+	    (F_ISSET(dbp, DB_BT_RECNUM) || dbp->type == DB_RECNO)))
+		if ((ret = __bam_adjust(dbp, t, 1)) != 0)
+			return (ret);
+
+	/* If we've modified a recno file, set the flag */
 	if (t->bt_recno != NULL)
 		F_SET(t->bt_recno, RECNO_MODIFIED);
 
+	++t->lstat.bt_added;
+
 	return (ret);
 }
 
@@ -1036,8 +1061,8 @@ __bam_partial(dbp, dbt, h, indx, nbytes)
 	BOVERFLOW *bo;
 	DBT copy;
 	u_int32_t len, tlen;
-	int ret;
 	u_int8_t *p;
+	int ret;
 
 	COMPQUIET(bo, NULL);
 
@@ -1065,59 +1090,62 @@ __bam_partial(dbp, dbt, h, indx, nbytes)
 		bk->len = 0;
 	}
 
-	/* We use nul bytes for extending the record, get it over with. */
+	/*
+	 * We use nul bytes for any part of the record that isn't specified,
+	 * get it over with.
+	 */
 	memset(t->bt_rdata.data, 0, nbytes);
 
-	tlen = 0;
 	if (B_TYPE(bk->type) == B_OVERFLOW) {
-		/* Take up to doff bytes from the record. */
+		/*
+		 * In the case of an overflow record, we shift things around
+		 * in the current record rather than allocate a separate copy.
+		 */
 		memset(&copy, 0, sizeof(copy));
 		if ((ret = __db_goff(dbp, &copy, bo->tlen,
 		    bo->pgno, &t->bt_rdata.data, &t->bt_rdata.ulen)) != 0)
 			return (ret);
-		tlen += dbt->doff;
+
+		/* Skip any leading data from the original record. */
+		tlen = dbt->doff;
+		p = (u_int8_t *)t->bt_rdata.data + dbt->doff;
 
 		/*
-		 * If the original record was larger than the offset:
-		 *	If dlen > size, shift the remaining data down.
-		 *	If dlen < size, shift the remaining data up.
+		 * Copy in any trailing data from the original record.
+		 *
+		 * If the original record was larger than the original offset
+		 * plus the bytes being deleted, there is trailing data in the
+		 * original record we need to preserve.  If we aren't deleting
+		 * the same number of bytes as we're inserting, copy it up or
+		 * down, into place.
+		 *
 		 * Use memmove(), the regions may overlap.
 		 */
-		p = t->bt_rdata.data;
-		if (bo->tlen > dbt->doff)
-			if (dbt->dlen > dbt->size) {
-				tlen += len = bo->tlen -
-				    dbt->doff - (dbt->dlen - dbt->size);
-				memmove(p + dbt->doff + dbt->size,
-				    p + dbt->doff + dbt->dlen, len);
-			} else if (dbt->dlen < dbt->size) {
-				tlen += len = bo->tlen -
-				    dbt->doff - (dbt->size - dbt->dlen);
-				memmove(p + dbt->doff + dbt->dlen,
-				    p + dbt->doff + dbt->size, len);
-			} else
-				tlen += bo->tlen - dbt->doff;
+		if (bo->tlen > dbt->doff + dbt->dlen) {
+			len = bo->tlen - (dbt->doff + dbt->dlen);
+			if (dbt->dlen != dbt->size)
+				memmove(p + dbt->size, p + dbt->dlen, len);
+			tlen += len;
+		}
 
-		/* Copy in the user's data. */
-		memcpy((u_int8_t *)t->bt_rdata.data + dbt->doff,
-		    dbt->data, dbt->size);
+		/* Copy in the application provided data. */
+		memcpy(p, dbt->data, dbt->size);
 		tlen += dbt->size;
 	} else {
-		/* Take up to doff bytes from the record. */
+		/* Copy in any leading data from the original record. */
 		memcpy(t->bt_rdata.data,
 		    bk->data, dbt->doff > bk->len ? bk->len : dbt->doff);
-		tlen += dbt->doff;
+		tlen = dbt->doff;
+		p = (u_int8_t *)t->bt_rdata.data + dbt->doff;
 
-		/* Copy in the user's data. */
-		memcpy((u_int8_t *)t->bt_rdata.data +
-		    dbt->doff, dbt->data, dbt->size);
+		/* Copy in the application provided data. */
+		memcpy(p, dbt->data, dbt->size);
 		tlen += dbt->size;
 
-		/* Copy in any remaining data. */
+		/* Copy in any trailing data from the original record. */
 		len = dbt->doff + dbt->dlen;
 		if (bk->len > len) {
-			memcpy((u_int8_t *)t->bt_rdata.data + dbt->doff +
-			    dbt->size, bk->data + len, bk->len - len);
+			memcpy(p + dbt->size, bk->data + len, bk->len - len);
 			tlen += bk->len - len;
 		}
 	}
diff --git a/db2/btree/bt_rec.c b/db2/btree/bt_rec.c
index 90ee13764e..fe33825ec4 100644
--- a/db2/btree/bt_rec.c
+++ b/db2/btree/bt_rec.c
@@ -1,23 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_rec.c	10.18 (Sleepycat) 12/15/97";
+static const char sccsid[] = "@(#)bt_rec.c	10.21 (Sleepycat) 4/28/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
-#include <ctype.h>
 #include <errno.h>
-#include <stddef.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -27,7 +24,6 @@ static const char sccsid[] = "@(#)bt_rec.c	10.18 (Sleepycat) 12/15/97";
 #include "hash.h"
 #include "btree.h"
 #include "log.h"
-#include "db_dispatch.h"
 #include "common_ext.h"
 
 /*
@@ -51,7 +47,7 @@ __bam_pg_alloc_recover(logp, dbtp, lsnp, redo, info)
 	PAGE *pagep;
 	DB *file_dbp, *mdbp;
 	db_pgno_t pgno;
-	int cmp_n, cmp_p, created, modified, ret;
+	int cmp_n, cmp_p, modified, ret;
 
 	REC_PRINT(__bam_pg_alloc_print);
 	REC_INTRO(__bam_pg_alloc_read);
@@ -86,18 +82,17 @@ __bam_pg_alloc_recover(logp, dbtp, lsnp, redo, info)
 	}
 
 	/* Fix up the allocated page. */
-	created = IS_ZERO_LSN(LSN(pagep));
 	modified = 0;
 	cmp_n = log_compare(lsnp, &LSN(pagep));
 	cmp_p = log_compare(&LSN(pagep), &argp->page_lsn);
-	if ((created || cmp_p == 0) && redo) {
+	if (cmp_p == 0 && redo) {
 		/* Need to redo update described. */
 		P_INIT(pagep, file_dbp->pgsize,
 		    argp->pgno, PGNO_INVALID, PGNO_INVALID, 0, argp->ptype);
 
 		pagep->lsn = *lsnp;
 		modified = 1;
-	} else if ((created || cmp_n == 0) && !redo) {
+	} else if (cmp_n == 0 && !redo) {
 		/* Need to undo update described. */
 		P_INIT(pagep, file_dbp->pgsize,
 		    argp->pgno, PGNO_INVALID, meta->free, 0, P_INVALID);
diff --git a/db2/btree/bt_recno.c b/db2/btree/bt_recno.c
index 70ab63b8d4..38dbbd1c55 100644
--- a/db2/btree/bt_recno.c
+++ b/db2/btree/bt_recno.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_recno.c	10.26 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)bt_recno.c	10.37 (Sleepycat) 5/23/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -16,8 +16,6 @@ static const char sccsid[] = "@(#)bt_recno.c	10.26 (Sleepycat) 1/8/98";
 
 #include <errno.h>
 #include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -25,16 +23,17 @@ static const char sccsid[] = "@(#)bt_recno.c	10.26 (Sleepycat) 1/8/98";
 #include "db_page.h"
 #include "btree.h"
 
-static int __ram_add __P((DB *, db_recno_t *, DBT *, int, int));
+static int __ram_add __P((DB *, db_recno_t *, DBT *, u_int32_t, u_int32_t));
 static int __ram_c_close __P((DBC *));
-static int __ram_c_del __P((DBC *, int));
-static int __ram_c_get __P((DBC *, DBT *, DBT *, int));
-static int __ram_c_put __P((DBC *, DBT *, DBT *, int));
+static int __ram_c_del __P((DBC *, u_int32_t));
+static int __ram_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
+static int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
 static int __ram_fmap __P((DB *, db_recno_t));
-static int __ram_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
-static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
+static int __ram_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+static int __ram_iget __P((DB *, DBT *, DBT *));
+static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
 static int __ram_source __P((DB *, RECNO *, const char *));
-static int __ram_sync __P((DB *, int));
+static int __ram_sync __P((DB *, u_int32_t));
 static int __ram_update __P((DB *, db_recno_t, int));
 static int __ram_vmap __P((DB *, db_recno_t));
 static int __ram_writeback __P((DB *));
@@ -142,7 +141,7 @@ __ram_open(dbp, type, dbinfo)
 
 err:	/* If we mmap'd a source file, discard it. */
 	if (rp->re_smap != NULL)
-		(void)__db_unmap(rp->re_smap, rp->re_msize);
+		(void)__db_unmapfile(rp->re_smap, rp->re_msize);
 
 	/* If we opened a source file, discard it. */
 	if (rp->re_fd != -1)
@@ -199,9 +198,9 @@ __ram_cursor(dbp, txn, dbcp)
 	 * All cursors are queued from the master DB structure.  Add the
 	 * cursor to that queue.
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	TAILQ_INSERT_HEAD(&dbp->curs_queue, dbc, links);
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 
 	*dbcp = dbc;
 	return (0);
@@ -216,16 +215,10 @@ __ram_get(argdbp, txn, key, data, flags)
 	DB *argdbp;
 	DB_TXN *txn;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
-	BTREE *t;
 	DB *dbp;
-	PAGE *h;
-	db_indx_t indx;
-	db_recno_t recno;
-	int exact, ret, stack;
-
-	stack = 0;
+	int ret;
 
 	DEBUG_LWRITE(argdbp, txn, "ram_get", key, NULL, flags);
 
@@ -234,6 +227,30 @@ __ram_get(argdbp, txn, key, data, flags)
 		return (ret);
 
 	GETHANDLE(argdbp, txn, &dbp, ret);
+
+	ret = __ram_iget(dbp, key, data);
+
+	PUTHANDLE(dbp);
+	return (ret);
+}
+
+/*
+ * __ram_iget --
+ *	Internal ram get function, called for both standard and cursor
+ *	get after the flags have been checked.
+ */
+static int
+__ram_iget(dbp, key, data)
+	DB *dbp;
+	DBT *key, *data;
+{
+	BTREE *t;
+	PAGE *h;
+	db_indx_t indx;
+	db_recno_t recno;
+	int exact, ret, stack;
+
+	stack = 0;
 	t = dbp->internal;
 
 	/* Check the user's record number and fill in as necessary. */
@@ -265,7 +282,6 @@ done:	/* Discard the stack. */
 	if (stack)
 		__bam_stkrel(dbp);
 
-	PUTHANDLE(dbp);
 	return (ret);
 }
 
@@ -278,7 +294,7 @@ __ram_put(argdbp, txn, key, data, flags)
 	DB *argdbp;
 	DB_TXN *txn;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	DB *dbp;
@@ -324,7 +340,7 @@ __ram_put(argdbp, txn, key, data, flags)
 static int
 __ram_sync(argdbp, flags)
 	DB *argdbp;
-	int flags;
+	u_int32_t flags;
 {
 	DB *dbp;
 	int ret;
@@ -361,7 +377,7 @@ __ram_close(argdbp)
 
 	/* Close any underlying mmap region. */
 	if (rp->re_smap != NULL)
-		(void)__db_unmap(rp->re_smap, rp->re_msize);
+		(void)__db_unmapfile(rp->re_smap, rp->re_msize);
 
 	/* Close any backing source file descriptor. */
 	if (rp->re_fd != -1)
@@ -403,17 +419,10 @@ __ram_c_iclose(dbp, dbc)
 	DB *dbp;
 	DBC *dbc;
 {
-	/*
-	 * All cursors are queued from the master DB structure.  For
-	 * now, discard the DB handle which triggered this call, and
-	 * replace it with the cursor's reference.
-	 */
-	dbp = dbc->dbp;
-
 	/* Remove the cursor from the queue. */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	TAILQ_REMOVE(&dbp->curs_queue, dbc, links);
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 
 	/* Discard the structures. */
 	FREE(dbc->internal, sizeof(RCURSOR));
@@ -429,7 +438,7 @@ __ram_c_iclose(dbp, dbc)
 static int
 __ram_c_del(dbc, flags)
 	DBC *dbc;
-	int flags;
+	u_int32_t flags;
 {
 	DBT key;
 	RCURSOR *cp;
@@ -466,7 +475,7 @@ static int
 __ram_c_get(dbc, key, data, flags)
 	DBC *dbc;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	DB *dbp;
@@ -537,7 +546,7 @@ retry:	/* Update the record number. */
 
 	/*
 	 * Return the key if the user didn't give us one, and then pass it
-	 * into __ram_get().
+	 * into __ram_iget().
 	 */
 	if (flags != DB_SET && flags != DB_SET_RANGE &&
 	    (ret = __db_retcopy(key, &cp->recno, sizeof(cp->recno),
@@ -555,7 +564,7 @@ retry:	/* Update the record number. */
 	 *
 	 * Skip any keys that don't really exist.
 	 */
-	if ((ret = __ram_get(dbp, dbc->txn, key, data, 0)) != 0)
+	if ((ret = __ram_iget(dbp, key, data)) != 0)
 		if (ret == DB_KEYEMPTY &&
 		    (flags == DB_NEXT || flags == DB_PREV))
 			goto retry;
@@ -575,7 +584,7 @@ static int
 __ram_c_put(dbc, key, data, flags)
 	DBC *dbc;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	RCURSOR *cp, copy;
@@ -624,28 +633,21 @@ split:		arg = &cp->recno;
 	if ((ret = __bam_stkrel(dbp)) != 0)
 		goto err;
 
-	if (flags != DB_CURRENT) {
-		/* Adjust the counts. */
-		if ((ret = __bam_adjust(dbp, t, 1)) != 0)
-			goto err;
-
-		switch (flags) {
-		case DB_AFTER:
-			/* Adjust the cursors. */
-			__ram_ca(dbp, cp->recno, CA_IAFTER);
-
-			/* Set this cursor to reference the new record. */
-			cp->recno = copy.recno + 1;
-			break;
-		case DB_BEFORE:
-			/* Adjust the cursors. */
-			__ram_ca(dbp, cp->recno, CA_IBEFORE);
+	switch (flags) {
+	case DB_AFTER:
+		/* Adjust the cursors. */
+		__ram_ca(dbp, cp->recno, CA_IAFTER);
 
-			/* Set this cursor to reference the new record. */
-			cp->recno = copy.recno;
-			break;
-		}
+		/* Set this cursor to reference the new record. */
+		cp->recno = copy.recno + 1;
+		break;
+	case DB_BEFORE:
+		/* Adjust the cursors. */
+		__ram_ca(dbp, cp->recno, CA_IBEFORE);
 
+		/* Set this cursor to reference the new record. */
+		cp->recno = copy.recno;
+		break;
 	}
 
 	/*
@@ -679,7 +681,7 @@ __ram_ca(dbp, recno, op)
 	/*
 	 * Adjust the cursors.  See the comment in __bam_ca_delete().
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (RCURSOR *)dbc->internal;
@@ -698,7 +700,7 @@ __ram_ca(dbp, recno, op)
 			break;
 		}
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 }
 
 #ifdef DEBUG
@@ -715,14 +717,15 @@ __ram_cprint(dbp)
 	DBC *dbc;
 	RCURSOR *cp;
 
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (RCURSOR *)dbc->internal;
 		fprintf(stderr,
 		    "%#0x: recno: %lu\n", (u_int)cp, (u_long)cp->recno);
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
+
 	return (0);
 }
 #endif /* DEBUG */
@@ -853,11 +856,11 @@ __ram_source(dbp, rp, fname)
 	const char *fname;
 {
 	size_t size;
-	u_int32_t mbytes, bytes;
-	int oflags, ret;
+	u_int32_t bytes, mbytes, oflags;
+	int ret;
 
 	if ((ret = __db_appname(dbp->dbenv,
-	    DB_APP_DATA, NULL, fname, NULL, &rp->re_source)) != 0)
+	    DB_APP_DATA, NULL, fname, 0, NULL, &rp->re_source)) != 0)
 		return (ret);
 
 	oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0;
@@ -886,7 +889,8 @@ __ram_source(dbp, rp, fname)
 	}
 
 	size = mbytes * MEGABYTE + bytes;
-	if ((ret = __db_map(rp->re_fd, (size_t)size, 1, 1, &rp->re_smap)) != 0)
+	if ((ret = __db_mapfile(rp->re_source,
+	    rp->re_fd, (size_t)size, 1, &rp->re_smap)) != 0)
 		goto err;
 	rp->re_cmap = rp->re_smap;
 	rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size);
@@ -952,7 +956,7 @@ __ram_writeback(dbp)
 	 * open will fail.
 	 */
 	if (rp->re_smap != NULL) {
-		(void)__db_unmap(rp->re_smap, rp->re_msize);
+		(void)__db_unmapfile(rp->re_smap, rp->re_msize);
 		rp->re_smap = NULL;
 	}
 
@@ -1078,19 +1082,22 @@ __ram_fmap(dbp, top)
 
 	sp = (u_int8_t *)rp->re_cmap;
 	ep = (u_int8_t *)rp->re_emap;
-	while (recno <= top) {
+	while (recno < top) {
 		if (sp >= ep) {
 			F_SET(rp, RECNO_EOF);
 			return (DB_NOTFOUND);
 		}
 		len = rp->re_len;
 		for (p = t->bt_rdata.data;
-		    sp < ep && len > 0; *p++ = *sp++, --len);
+		    sp < ep && len > 0; *p++ = *sp++, --len)
+			;
 
 		/*
-		 * Another process may have read some portion of the input
-		 * file already, in which case we just want to discard the
-		 * new record.
+		 * Another process may have read this record from the input
+		 * file and stored it into the database already, in which
+		 * case we don't need to repeat that operation.  We detect
+		 * this by checking if the last record we've read is greater
+		 * or equal to the number of records in the database.
 		 *
 		 * XXX
 		 * We should just do a seek, since the records are fixed
@@ -1138,17 +1145,20 @@ __ram_vmap(dbp, top)
 
 	sp = (u_int8_t *)rp->re_cmap;
 	ep = (u_int8_t *)rp->re_emap;
-	while (recno <= top) {
+	while (recno < top) {
 		if (sp >= ep) {
 			F_SET(rp, RECNO_EOF);
 			return (DB_NOTFOUND);
 		}
-		for (data.data = sp; sp < ep && *sp != delim; ++sp);
+		for (data.data = sp; sp < ep && *sp != delim; ++sp)
+			;
 
 		/*
-		 * Another process may have read some portion of the input
-		 * file already, in which case we just want to discard the
-		 * new record.
+		 * Another process may have read this record from the input
+		 * file and stored it into the database already, in which
+		 * case we don't need to repeat that operation.  We detect
+		 * this by checking if the last record we've read is greater
+		 * or equal to the number of records in the database.
 		 */
 		if (rp->re_last >= recno) {
 			data.size = sp - (u_int8_t *)data.data;
@@ -1172,12 +1182,13 @@ __ram_add(dbp, recnop, data, flags, bi_flags)
 	DB *dbp;
 	db_recno_t *recnop;
 	DBT *data;
-	int flags, bi_flags;
+	u_int32_t flags, bi_flags;
 {
+	BKEYDATA *bk;
 	BTREE *t;
 	PAGE *h;
 	db_indx_t indx;
-	int exact, ret, stack;
+	int exact, isdeleted, ret, stack;
 
 	t = dbp->internal;
 
@@ -1190,34 +1201,63 @@ retry:	/* Find the slot for insertion. */
 	stack = 1;
 
 	/*
-	 * The recno access method doesn't currently support duplicates, so
-	 * if an identical key is already in the tree we're either overwriting
-	 * it or an error is returned.
+	 * If DB_NOOVERWRITE is set and the item already exists in the tree,
+	 * return an error unless the item has been marked for deletion.
 	 */
-	if (exact && LF_ISSET(DB_NOOVERWRITE)) {
-		ret = DB_KEYEXIST;
-		goto err;
+	isdeleted = 0;
+	if (exact) {
+		bk = GET_BKEYDATA(h, indx);
+		if (B_DISSET(bk->type)) {
+			isdeleted = 1;
+			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP);
+		} else
+			if (LF_ISSET(DB_NOOVERWRITE)) {
+				ret = DB_KEYEXIST;
+				goto err;
+			}
 	}
 
 	/*
 	 * Select the arguments for __bam_iitem() and do the insert.  If the
 	 * key is an exact match, or we're replacing the data item with a
-	 * new data item.  If the key isn't an exact match, we're inserting
-	 * a new key/data pair, before the search location.
+	 * new data item, replace the current item.  If the key isn't an exact
+	 * match, we're inserting a new key/data pair, before the search
+	 * location.
 	 */
-	if ((ret = __bam_iitem(dbp, &h, &indx, NULL,
-	    data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) == DB_NEEDSPLIT) {
+	switch (ret = __bam_iitem(dbp,
+	    &h, &indx, NULL, data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) {
+	case 0:
+		/*
+		 * Done.  Clean up the cursor and adjust the internal page
+		 * counts.
+		 */
+		if (isdeleted)
+			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_SUCCESS);
+		break;
+	case DB_NEEDSPLIT:
+		/*
+		 * We have to split the page.  Back out the cursor setup,
+		 * discard the stack of pages, and do the split.
+		 */
+		if (isdeleted)
+			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
+
 		(void)__bam_stkrel(dbp);
 		stack = 0;
+
 		if ((ret = __bam_split(dbp, recnop)) != 0)
-			goto err;
+			break;
+
 		goto retry;
+		/* NOTREACHED */
+	default:
+		if (isdeleted)
+			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
+		break;
 	}
 
-	if (!exact && ret == 0)
-		__bam_adjust(dbp, t, 1);
-
 err:	if (stack)
 		__bam_stkrel(dbp);
+
 	return (ret);
 }
diff --git a/db2/btree/bt_rsearch.c b/db2/btree/bt_rsearch.c
index ee26221e25..caa6b3515e 100644
--- a/db2/btree/bt_rsearch.c
+++ b/db2/btree/bt_rsearch.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -44,14 +44,11 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_rsearch.c	10.8 (Sleepycat) 8/24/97";
+static const char sccsid[] = "@(#)bt_rsearch.c	10.15 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-
-#include <stdio.h>
-#include <stdlib.h>
 #endif
 
 #include "db_int.h"
@@ -62,13 +59,13 @@ static const char sccsid[] = "@(#)bt_rsearch.c	10.8 (Sleepycat) 8/24/97";
  * __bam_rsearch --
  *	Search a btree for a record number.
  *
- * PUBLIC: int __bam_rsearch __P((DB *, db_recno_t *, u_int, int, int *));
+ * PUBLIC: int __bam_rsearch __P((DB *, db_recno_t *, u_int32_t, int, int *));
  */
 int
 __bam_rsearch(dbp, recnop, flags, stop, exactp)
 	DB *dbp;
 	db_recno_t *recnop;
-	u_int flags;
+	u_int32_t flags;
 	int stop, *exactp;
 {
 	BINTERNAL *bi;
@@ -78,7 +75,7 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
 	RINTERNAL *ri;
 	db_indx_t indx, top;
 	db_pgno_t pg;
-	db_recno_t recno, total;
+	db_recno_t i, recno, total;
 	int isappend, ret, stack;
 
 	t = dbp->internal;
@@ -136,8 +133,7 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
 			*exactp = 1;
 		else {
 			*exactp = 0;
-			if (flags == S_DELETE ||
-			    flags == S_FIND || recno > total + 1) {
+			if (!PAST_END_OK(flags) || recno > total + 1) {
 				(void)memp_fput(dbp->mpf, h, 0);
 				(void)__BT_LPUT(dbp, lock);
 				return (DB_NOTFOUND);
@@ -164,30 +160,65 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
 		stack = 1;
 	}
 
-	/* Records in the tree are 0-based, and record numbers are 1-based. */
-	--recno;
-
+	/*
+	 * !!!
+	 * Record numbers in the tree are 0-based, but the recno is
+	 * 1-based.  All of the calculations below have to take this
+	 * into account.
+	 */
 	for (total = 0;;) {
 		switch (TYPE(h)) {
 		case P_LBTREE:
-			BT_STK_ENTER(t, h, (recno - total) * P_INDX, lock, ret);
+			recno -= total;
+
+			/*
+			 * There may be logically deleted records on the page,
+			 * walk the page correcting for them.  The record may
+			 * not exist if there are enough deleted records in the
+			 * page.
+			 */
+			if (recno <= NUM_ENT(h))
+				for (i = recno - 1;; --i) {
+					if (B_DISSET(GET_BKEYDATA(h,
+					    i * P_INDX + O_INDX)->type))
+						++recno;
+					if (i == 0)
+						break;
+				}
+			if (recno > NUM_ENT(h)) {
+				*exactp = 0;
+				if (!PAST_END_OK(flags) ||
+				    recno > (db_recno_t)(NUM_ENT(h) + 1)) {
+					ret = DB_NOTFOUND;
+					goto err;
+				}
+
+			}
+
+			/* Correct from 1-based to 0-based for a page offset. */
+			--recno;
+			BT_STK_ENTER(t, h, recno * P_INDX, lock, ret);
 			return (ret);
 		case P_IBTREE:
 			for (indx = 0, top = NUM_ENT(h);;) {
 				bi = GET_BINTERNAL(h, indx);
-				if (++indx == top || total + bi->nrecs > recno)
+				if (++indx == top || total + bi->nrecs >= recno)
 					break;
 				total += bi->nrecs;
 			}
 			pg = bi->pgno;
 			break;
 		case P_LRECNO:
-			BT_STK_ENTER(t, h, recno - total, lock, ret);
+			recno -= total;
+
+			/* Correct from 1-based to 0-based for a page offset. */
+			--recno;
+			BT_STK_ENTER(t, h, recno, lock, ret);
 			return (ret);
 		case P_IRECNO:
 			for (indx = 0, top = NUM_ENT(h);;) {
 				ri = GET_RINTERNAL(h, indx);
-				if (++indx == top || total + ri->nrecs > recno)
+				if (++indx == top || total + ri->nrecs >= recno)
 					break;
 				total += ri->nrecs;
 			}
@@ -244,13 +275,13 @@ err:	BT_STK_POP(t);
  * __bam_adjust --
  *	Adjust the tree after adding or deleting a record.
  *
- * PUBLIC: int __bam_adjust __P((DB *, BTREE *, int));
+ * PUBLIC: int __bam_adjust __P((DB *, BTREE *, int32_t));
  */
 int
 __bam_adjust(dbp, t, adjust)
 	DB *dbp;
 	BTREE *t;
-	int adjust;
+	int32_t adjust;
 {
 	EPG *epg;
 	PAGE *h;
@@ -264,7 +295,7 @@ __bam_adjust(dbp, t, adjust)
 			    (ret = __bam_cadjust_log(dbp->dbenv->lg_info,
 			    dbp->txn, &LSN(h), 0, dbp->log_fileid,
 			    PGNO(h), &LSN(h), (u_int32_t)epg->indx,
-			    (int32_t)adjust, 1)) != 0)
+			    adjust, 1)) != 0)
 				return (ret);
 
 			if (TYPE(h) == P_IBTREE)
@@ -322,26 +353,31 @@ db_recno_t
 __bam_total(h)
 	PAGE *h;
 {
-	db_recno_t recs;
-	db_indx_t nxt, top;
+	db_recno_t nrecs;
+	db_indx_t indx, top;
+
+	nrecs = 0;
+	top = NUM_ENT(h);
 
 	switch (TYPE(h)) {
 	case P_LBTREE:
-		recs = NUM_ENT(h) / 2;
+		/* Check for logically deleted records. */
+		for (indx = 0; indx < top; indx += P_INDX)
+			if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type))
+				++nrecs;
 		break;
 	case P_IBTREE:
-		for (recs = 0, nxt = 0, top = NUM_ENT(h); nxt < top; ++nxt)
-			recs += GET_BINTERNAL(h, nxt)->nrecs;
+		for (indx = 0; indx < top; indx += O_INDX)
+			nrecs += GET_BINTERNAL(h, indx)->nrecs;
 		break;
 	case P_LRECNO:
-		recs = NUM_ENT(h);
+		nrecs = NUM_ENT(h);
 		break;
 	case P_IRECNO:
-		for (recs = 0, nxt = 0, top = NUM_ENT(h); nxt < top; ++nxt)
-			recs += GET_RINTERNAL(h, nxt)->nrecs;
+		for (indx = 0; indx < top; indx += O_INDX)
+			nrecs += GET_RINTERNAL(h, indx)->nrecs;
 		break;
-	default:
-		abort();
 	}
-	return (recs);
+
+	return (nrecs);
 }
diff --git a/db2/btree/bt_search.c b/db2/btree/bt_search.c
index c39c9af322..09ce46d90a 100644
--- a/db2/btree/bt_search.c
+++ b/db2/btree/bt_search.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,15 +47,13 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_search.c	10.9 (Sleepycat) 11/18/97";
+static const char sccsid[] = "@(#)bt_search.c	10.15 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -68,13 +66,13 @@ static const char sccsid[] = "@(#)bt_search.c	10.9 (Sleepycat) 11/18/97";
  *	Search a btree for a key.
  *
  * PUBLIC: int __bam_search __P((DB *,
- * PUBLIC:     const DBT *, u_int, int, db_recno_t *, int *));
+ * PUBLIC:     const DBT *, u_int32_t, int, db_recno_t *, int *));
  */
 int
 __bam_search(dbp, key, flags, stop, recnop, exactp)
 	DB *dbp;
 	const DBT *key;
-	u_int flags;
+	u_int32_t flags;
 	int stop, *exactp;
 	db_recno_t *recnop;
 {
@@ -109,8 +107,7 @@ __bam_search(dbp, key, flags, stop, recnop, exactp)
 	 * Retrieve the root page.
 	 */
 	pg = PGNO_ROOT;
-	stack = F_ISSET(dbp, DB_BT_RECNUM) &&
-	    (flags == S_INSERT || flags == S_DELETE);
+	stack = F_ISSET(dbp, DB_BT_RECNUM) && LF_ISSET(S_STACK);
 	if ((ret = __bam_lget(dbp,
 	    0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
 		return (ret);
@@ -179,6 +176,14 @@ __bam_search(dbp, key, flags, stop, recnop, exactp)
 			if (LF_ISSET(S_EXACT))
 				goto notfound;
 
+			/*
+			 * !!!
+			 * Possibly returning a deleted record -- DB_SET_RANGE,
+			 * DB_KEYFIRST and DB_KEYLAST don't require an exact
+			 * match, and we don't want to walk multiple pages here
+			 * to find an undeleted record.  This is handled in the
+			 * __bam_c_search() routine.
+			 */
 			BT_STK_ENTER(t, h, base, lock, ret);
 			return (ret);
 		}
@@ -249,7 +254,10 @@ match:	*exactp = 1;
 	/*
 	 * If we got here, we know that we have a btree leaf page.
 	 *
-	 * If there are duplicates, go to the first/last one.
+	 * If there are duplicates, go to the first/last one.  This is
+	 * safe because we know that we're not going to leave the page,
+	 * all duplicate sets that are not on overflow pages exist on a
+	 * single leaf page.
 	 */
 	if (LF_ISSET(S_DUPLAST))
 		while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
@@ -261,8 +269,8 @@ match:	*exactp = 1;
 			indx -= P_INDX;
 
 	/*
-	 * Now check if we are allowed to return deleted item; if not
-	 * find/last the first non-deleted item.
+	 * Now check if we are allowed to return deleted items; if not
+	 * find the next (or previous) non-deleted item.
 	 */
 	if (LF_ISSET(S_DELNO)) {
 		if (LF_ISSET(S_DUPLAST))
diff --git a/db2/btree/bt_split.c b/db2/btree/bt_split.c
index 219d486dc5..da9417c781 100644
--- a/db2/btree/bt_split.c
+++ b/db2/btree/bt_split.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -44,7 +44,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_split.c	10.18 (Sleepycat) 11/23/97";
+static const char sccsid[] = "@(#)bt_split.c	10.23 (Sleepycat) 5/23/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -52,8 +52,6 @@ static const char sccsid[] = "@(#)bt_split.c	10.18 (Sleepycat) 11/23/97";
 
 #include <errno.h>
 #include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -168,8 +166,10 @@ __bam_root(dbp, cp)
 	t = dbp->internal;
 
 	/* Yeah, right. */
-	if (cp->page->level >= MAXBTREELEVEL)
-		return (ENOSPC);
+	if (cp->page->level >= MAXBTREELEVEL) {
+		ret = ENOSPC;
+		goto err;
+	}
 
 	/* Create new left and right pages for the split. */
 	lp = rp = NULL;
@@ -237,18 +237,16 @@ __bam_page(dbp, pp, cp)
 	DB *dbp;
 	EPG *pp, *cp;
 {
-	BTREE *t;
 	DB_LOCK tplock;
 	PAGE *lp, *rp, *tp;
 	int ret;
 
-	t = dbp->internal;
 	lp = rp = tp = NULL;
 	ret = -1;
 
 	/* Create new right page for the split. */
 	if ((ret = __bam_new(dbp, TYPE(cp->page), &rp)) != 0)
-		return (ret);
+		goto err;
 	P_INIT(rp, dbp->pgsize, rp->pgno,
 	    ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->pgno,
 	    ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->next_pgno,
@@ -259,7 +257,7 @@ __bam_page(dbp, pp, cp)
 		ret = ENOMEM;
 		goto err;
 	}
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	memset(lp, 0xff, dbp->pgsize);
 #endif
 	P_INIT(lp, dbp->pgsize, cp->page->pgno,
@@ -906,13 +904,13 @@ __bam_copy(dbp, pp, cp, nxt, stop)
 	PAGE *pp, *cp;
 	u_int32_t nxt, stop;
 {
-	db_indx_t dup, nbytes, off;
+	db_indx_t nbytes, off;
 
 	/*
 	 * Copy the rest of the data to the right page.  Nxt is the next
 	 * offset placed on the target page.
 	 */
-	for (dup = off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) {
+	for (off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) {
 		switch (TYPE(pp)) {
 		case P_IBTREE:
 			if (B_TYPE(GET_BINTERNAL(pp, nxt)->type) == B_KEYDATA)
diff --git a/db2/btree/bt_stat.c b/db2/btree/bt_stat.c
index e88b5dac2d..2236434b38 100644
--- a/db2/btree/bt_stat.c
+++ b/db2/btree/bt_stat.c
@@ -1,21 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_stat.c	10.14 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)bt_stat.c	10.17 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -29,14 +28,14 @@ static void __bam_add_rstat __P((DB_BTREE_LSTAT *, DB_BTREE_STAT *));
  * __bam_stat --
  *	Gather/print the btree statistics
  *
- * PUBLIC: int __bam_stat __P((DB *, void *, void *(*)(size_t), int));
+ * PUBLIC: int __bam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t));
  */
 int
 __bam_stat(argdbp, spp, db_malloc, flags)
 	DB *argdbp;
 	void *spp;
 	void *(*db_malloc) __P((size_t));
-	int flags;
+	u_int32_t flags;
 {
 	BTMETA *meta;
 	BTREE *t;
diff --git a/db2/btree/btree.src b/db2/btree/btree.src
index 6145696d28..928dce2196 100644
--- a/db2/btree/btree.src
+++ b/db2/btree/btree.src
@@ -1,16 +1,12 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
+ *
+ *	@(#)btree.src	10.8 (Sleepycat) 4/10/98
  */
 
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)btree.src	10.6 (Sleepycat) 11/2/97";
-#endif /* not lint */
-
 PREFIX	bam
 
 /*
diff --git a/db2/btree/btree_auto.c b/db2/btree/btree_auto.c
index 18bbd5db37..75eadb1d62 100644
--- a/db2/btree/btree_auto.c
+++ b/db2/btree/btree_auto.c
@@ -15,8 +15,6 @@
 #include "db_dispatch.h"
 #include "btree.h"
 #include "db_am.h"
-#include "common_ext.h"
-
 /*
  * PUBLIC: int __bam_pg_alloc_log
  * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
@@ -85,7 +83,7 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(ptype);
 	memcpy(bp, &next, sizeof(next));
 	bp += sizeof(next);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -101,22 +99,23 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_pg_alloc_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_pg_alloc_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_pg_alloc_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_pg_alloc_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -249,7 +248,7 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags,
 	}
 	memcpy(bp, &next, sizeof(next));
 	bp += sizeof(next);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -265,22 +264,23 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_pg_free_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_pg_free_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_pg_free_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_pg_free_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -297,11 +297,11 @@ __bam_pg_free_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
 	printf("\theader: ");
 	for (i = 0; i < argp->header.size; i++) {
-		c = ((char *)argp->header.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->header.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tnext: %lu\n", (u_long)argp->next);
@@ -443,7 +443,7 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags,
 		memcpy(bp, pg->data, pg->size);
 		bp += pg->size;
 	}
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -459,22 +459,23 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_split_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_split_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_split_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_split_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -498,11 +499,11 @@ __bam_split_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->nlsn.file, (u_long)argp->nlsn.offset);
 	printf("\tpg: ");
 	for (i = 0; i < argp->pg.size; i++) {
-		c = ((char *)argp->pg.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->pg.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\n");
@@ -639,7 +640,7 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*rootlsn));
 	bp += sizeof(*rootlsn);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -655,22 +656,23 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_rsplit_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_rsplit_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_rsplit_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -685,21 +687,21 @@ __bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tpgno: %lu\n", (u_long)argp->pgno);
 	printf("\tpgdbt: ");
 	for (i = 0; i < argp->pgdbt.size; i++) {
-		c = ((char *)argp->pgdbt.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->pgdbt.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tnrec: %lu\n", (u_long)argp->nrec);
 	printf("\trootent: ");
 	for (i = 0; i < argp->rootent.size; i++) {
-		c = ((char *)argp->rootent.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->rootent.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\trootlsn: [%lu][%lu]\n",
@@ -817,7 +819,7 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(indx_copy);
 	memcpy(bp, &is_insert, sizeof(is_insert));
 	bp += sizeof(is_insert);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -833,22 +835,23 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_adj_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_adj_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_adj_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_adj_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -975,7 +978,7 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(adjust);
 	memcpy(bp, &total, sizeof(total));
 	bp += sizeof(total);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -991,22 +994,23 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_cadjust_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_cadjust_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_cadjust_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_cadjust_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -1124,7 +1128,7 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(*lsn);
 	memcpy(bp, &indx, sizeof(indx));
 	bp += sizeof(indx);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -1140,22 +1144,23 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_cdel_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_cdel_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_cdel_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_cdel_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -1307,7 +1312,7 @@ int __bam_repl_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(prefix);
 	memcpy(bp, &suffix, sizeof(suffix));
 	bp += sizeof(suffix);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -1323,22 +1328,23 @@ int __bam_repl_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_repl_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_repl_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_repl_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_repl_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -1357,20 +1363,20 @@ __bam_repl_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tisdeleted: %lu\n", (u_long)argp->isdeleted);
 	printf("\torig: ");
 	for (i = 0; i < argp->orig.size; i++) {
-		c = ((char *)argp->orig.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->orig.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\trepl: ");
 	for (i = 0; i < argp->repl.size; i++) {
-		c = ((char *)argp->repl.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->repl.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tprefix: %lu\n", (u_long)argp->prefix);