about summary refs log tree commit diff
path: root/db2/btree
diff options
context:
space:
mode:
Diffstat (limited to 'db2/btree')
-rw-r--r--db2/btree/bt_cursor.c152
-rw-r--r--db2/btree/bt_delete.c27
-rw-r--r--db2/btree/bt_open.c10
-rw-r--r--db2/btree/bt_put.c461
-rw-r--r--db2/btree/bt_rec.c116
-rw-r--r--db2/btree/bt_recno.c39
-rw-r--r--db2/btree/bt_search.c4
-rw-r--r--db2/btree/bt_split.c44
-rw-r--r--db2/btree/bt_stat.c11
-rw-r--r--db2/btree/btree.src25
-rw-r--r--db2/btree/btree_auto.c294
11 files changed, 846 insertions, 337 deletions
diff --git a/db2/btree/bt_cursor.c b/db2/btree/bt_cursor.c
index a1266bcd3c..e5f3faeb70 100644
--- a/db2/btree/bt_cursor.c
+++ b/db2/btree/bt_cursor.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_cursor.c	10.33 (Sleepycat) 9/24/97";
+static const char sccsid[] = "@(#)bt_cursor.c	10.35 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -62,10 +62,10 @@ __bam_cursor(dbp, txn, dbcp)
 
 	DEBUG_LWRITE(dbp, txn, "bam_cursor", NULL, NULL, 0);
 
-	if ((dbc = (DBC *)calloc(1, sizeof(DBC))) == NULL)
+	if ((dbc = (DBC *)__db_calloc(1, sizeof(DBC))) == NULL)
 		return (ENOMEM);
-	if ((cp = (CURSOR *)calloc(1, sizeof(CURSOR))) == NULL) {
-		free(dbc);
+	if ((cp = (CURSOR *)__db_calloc(1, sizeof(CURSOR))) == NULL) {
+		__db_free(dbc);
 		return (ENOMEM);
 	}
 
@@ -474,7 +474,7 @@ __bam_c_rget(dbp, cp, key, data, flags)
 	__bam_stkrel(dbp);
 
 err:	(void)memp_fput(dbp->mpf, cp->page, 0);
-	free(dbt.data);
+	__db_free(dbt.data);
 	return (ret);
 }
 
@@ -1422,7 +1422,7 @@ __bam_c_physdel(dbp, cp, h)
 	DB_LOCK lock;
 	db_indx_t indx;
 	db_pgno_t pgno, next_pgno, prev_pgno;
-	int local, ret;
+	int local, normal, ret;
 
 	t = dbp->internal;
 	ret = 0;
@@ -1457,51 +1457,65 @@ __bam_c_physdel(dbp, cp, h)
 		local = 0;
 
 	/*
-	 * If we're deleting a duplicate entry, call the common code to do
-	 * the work.
+	 * If we're deleting a duplicate entry and there are other duplicate
+	 * entries remaining, call the common code to do the work and fix up
+	 * the parent page as necessary.  Otherwise, do a normal btree delete.
+	 *
+	 * There are 5 possible cases:
+	 *
+	 * 1. It's not a duplicate item: do a normal btree delete.
+	 * 2. It's a duplicate item:
+	 *	2a: We delete an item from a page of duplicates, but there are
+	 *	    more items on the page.
+	 *      2b: We delete the last item from a page of duplicates, deleting
+	 *	    the last duplicate.
+	 *      2c: We delete the last item from a page of duplicates, but there
+	 *	    is a previous page of duplicates.
+	 *      2d: We delete the last item from a page of duplicates, but there
+	 *	    is a following page of duplicates.
+	 *
+	 * In the case of:
+	 *
+	 *  1: There's nothing further to do.
+	 * 2a: There's nothing further to do.
+	 * 2b: Do the normal btree delete instead of a duplicate delete, as
+	 *     that deletes both the duplicate chain and the parent page's
+	 *     entry.
+	 * 2c: There's nothing further to do.
+	 * 2d: Delete the duplicate, and update the parent page's entry.
 	 */
 	if (TYPE(h) == P_DUPLICATE) {
 		pgno = PGNO(h);
 		prev_pgno = PREV_PGNO(h);
 		next_pgno = NEXT_PGNO(h);
-		if ((ret = __db_drem(dbp, &h, indx, __bam_free)) != 0)
-			goto err;
 
-		/*
-		 * There are 4 cases:
-		 *
-		 * 1. We removed an item on a page, but there are more items
-		 *    on the page.
-		 * 2. We removed the last item on a page, removing the last
-		 *    duplicate.
-		 * 3. We removed the last item on a page, but there is a
-		 *    following page of duplicates.
-		 * 4. We removed the last item on a page, but there is a
-		 *    previous page of duplicates.
-		 *
-		 * In case 1, h != NULL, h->pgno == pgno
-		 * In case 2, h == NULL,
-		 *    prev_pgno == PGNO_INVALID, next_pgno == PGNO_INVALID
-		 * In case 3, h != NULL, next_pgno != PGNO_INVALID
-		 * In case 4, h == NULL, prev_pgno != PGNO_INVALID
-		 *
-		 * In case 1, there's nothing else to do.
-		 * In case 2, remove the entry from the parent page.
-		 * In case 3 or 4, if the deleted page was the first in a chain
-		 *    of duplicate pages, update the parent page's entry.
-		 *
-		 * Test:
-		 *	If there were previous pages of duplicates or we didn't
-		 *	empty the current page of duplicates, we don't need to
-		 *	touch the parent page.
-		 */
-		if (prev_pgno != PGNO_INVALID || (h != NULL && pgno == h->pgno))
-			goto done;
+		if (NUM_ENT(h) == 1 &&
+		    prev_pgno == PGNO_INVALID && next_pgno == PGNO_INVALID)
+			normal = 1;
+		else {
+			normal = 0;
 
-		/*
-		 * Release any page we're holding and the lock on the deleted
-		 * page.
-		 */
+			/* Delete the duplicate. */
+			if ((ret = __db_drem(dbp, &h, indx, __bam_free)) != 0)
+				goto err;
+
+			/*
+			 * 2a: h != NULL, h->pgno == pgno
+			 * 2b: We don't reach this clause, as the above test
+			 *     was true.
+			 * 2c: h == NULL, prev_pgno != PGNO_INVALID
+			 * 2d: h != NULL, next_pgno != PGNO_INVALID
+			 *
+			 * Test for 2a and 2c: if we didn't empty the current
+			 * page or there was a previous page of duplicates, we
+			 * don't need to touch the parent page.
+			 */
+			if ((h != NULL && pgno == h->pgno) ||
+			    prev_pgno != PGNO_INVALID)
+				goto done;
+		}
+
+		/* Release any page we're holding and its lock. */
 		if (local) {
 			if (h != NULL)
 				(void)memp_fput(dbp->mpf, h, 0);
@@ -1519,37 +1533,33 @@ __bam_c_physdel(dbp, cp, h)
 		}
 		local = 1;
 
-		/*
-		 * If we deleted the last duplicate, we can fall out and do a
-		 * normal btree delete in the context of the parent page.  If
-		 * not, we have to update the parent's page.
-		 */
+		/* Switch to the parent page's entry. */
 		indx = cp->indx;
-		if (next_pgno != PGNO_INVALID) {
-			/*
-			 * Copy, delete, update and re-insert the parent page's
-			 * entry.
-			 */
-			bo = *GET_BOVERFLOW(h, indx);
-			(void)__db_ditem(dbp, h, indx, BOVERFLOW_SIZE);
-			bo.pgno = next_pgno;
-			memset(&dbt, 0, sizeof(dbt));
-			dbt.data = &bo;
-			dbt.size = BOVERFLOW_SIZE;
-			(void)__db_pitem(dbp,
-			    h, indx, BOVERFLOW_SIZE, &dbt, NULL);
-
-			/* Discard the parent page. */
-			(void)memp_fput(dbp->mpf, h, 0);
-			(void)__BT_TLPUT(dbp, lock);
-			local = 0;
+		if (normal)
+			goto btd;
 
-			goto done;
-		}
+		/*
+		 * Copy, delete, update, add-back the parent page's data entry.
+		 *
+		 * XXX
+		 * This may be a performance/logging problem.  We should add a
+		 * log message which simply logs/updates a random set of bytes
+		 * on a page, and use it instead of doing a delete/add pair.
+		 */
+		indx += O_INDX;
+		bo = *GET_BOVERFLOW(h, indx);
+		(void)__db_ditem(dbp, h, indx, BOVERFLOW_SIZE);
+		bo.pgno = next_pgno;
+		memset(&dbt, 0, sizeof(dbt));
+		dbt.data = &bo;
+		dbt.size = BOVERFLOW_SIZE;
+		(void)__db_pitem(dbp, h, indx, BOVERFLOW_SIZE, &dbt, NULL);
+		(void)memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
+		goto done;
 	}
 
 	/* Otherwise, do a normal btree delete. */
-	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+btd:	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
 		goto err;
 	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
 		goto err;
@@ -1584,7 +1594,7 @@ __bam_c_physdel(dbp, cp, h)
 		}
 
 		ret = __bam_dpage(dbp, &dbt);
-		free(dbt.data);
+		__db_free(dbt.data);
 	}
 
 err:
diff --git a/db2/btree/bt_delete.c b/db2/btree/bt_delete.c
index 98929540e4..9593d0109c 100644
--- a/db2/btree/bt_delete.c
+++ b/db2/btree/bt_delete.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_delete.c	10.21 (Sleepycat) 9/3/97";
+static const char sccsid[] = "@(#)bt_delete.c	10.22 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -266,9 +266,10 @@ __bam_ditem(dbp, h, indx)
 		case B_DUPLICATE:
 		case B_OVERFLOW:
 			nbytes = BINTERNAL_SIZE(bi->len);
+			bo = (BOVERFLOW *)bi->data;
 			goto offpage;
 		case B_KEYDATA:
-			nbytes = BKEYDATA_SIZE(bi->len);
+			nbytes = BINTERNAL_SIZE(bi->len);
 			break;
 		default:
 			return (__db_pgfmt(dbp, h->pgno));
@@ -289,7 +290,7 @@ __bam_ditem(dbp, h, indx)
 			if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
 				return (__bam_adjindx(dbp,
 				    h, indx, indx - P_INDX, 0));
-			if (indx < (u_int32_t)(NUM_ENT(h) - P_INDX) &&
+			if (indx + P_INDX < (u_int32_t)NUM_ENT(h) &&
 			    h->inp[indx] == h->inp[indx + P_INDX])
 				return (__bam_adjindx(dbp,
 				    h, indx, indx + O_INDX, 0));
@@ -301,9 +302,9 @@ __bam_ditem(dbp, h, indx)
 		case B_DUPLICATE:
 		case B_OVERFLOW:
 			nbytes = BOVERFLOW_SIZE;
+			bo = GET_BOVERFLOW(h, indx);
 
 offpage:		/* Delete duplicate/offpage chains. */
-			bo = GET_BOVERFLOW(h, indx);
 			if (B_TYPE(bo->type) == B_DUPLICATE) {
 				if ((ret =
 				    __db_ddup(dbp, bo->pgno, __bam_free)) != 0)
@@ -523,7 +524,7 @@ __bam_dpages(dbp, t)
 
 	/*
 	 * If we deleted the next-to-last item from the root page, the tree
-	 * has collapsed a level.  Try and write lock the remaining root + 1
+	 * can collapse a level.  Try and write lock the remaining root + 1
 	 * page and copy it onto the root page.  If we can't get the lock,
 	 * that's okay, the tree just stays a level deeper than we'd like.
 	 */
@@ -546,8 +547,8 @@ __bam_dpages(dbp, t)
 			b.data = P_ENTRY(epg->page, 0);
 			b.size = BINTERNAL_SIZE(((BINTERNAL *)b.data)->len);
 			__bam_rsplit_log(dbp->dbenv->lg_info, dbp->txn,
-			   &h->lsn, 0, dbp->log_fileid, h->pgno, &a, &b,
-			   &epg->page->lsn);
+			   &h->lsn, 0, dbp->log_fileid, h->pgno, &a,
+			   RE_NREC(epg->page), &b, &epg->page->lsn);
 		}
 
 		/*
@@ -565,15 +566,19 @@ __bam_dpages(dbp, t)
 		if (TYPE(h) == P_IRECNO ||
 		    (TYPE(h) == P_IBTREE && F_ISSET(dbp, DB_BT_RECNUM)))
 			RE_NREC_SET(epg->page, rcnt);
+		(void)memp_fset(dbp->mpf, epg->page, DB_MPOOL_DIRTY);
 
-		/* Free the last page in that level of the btree. */
-		++t->lstat.bt_freed;
+		/*
+		 * Free the last page in that level of the btree and discard
+		 * the lock.  (The call to __bam_free discards our reference
+		 * to the page.)
+		 */
 		(void)__bam_free(dbp, h);
+		(void)__BT_TLPUT(dbp, lock);
+		++t->lstat.bt_freed;
 
 		/* Adjust the cursors. */
 		__bam_ca_move(dbp, t, h->pgno, PGNO_ROOT);
-
-		(void)__BT_TLPUT(dbp, lock);
 	}
 
 	/* Release the top page in the subtree. */
diff --git a/db2/btree/bt_open.c b/db2/btree/bt_open.c
index 354888c6c2..2361f69a3e 100644
--- a/db2/btree/bt_open.c
+++ b/db2/btree/bt_open.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_open.c	10.20 (Sleepycat) 8/19/97";
+static const char sccsid[] = "@(#)bt_open.c	10.21 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 /*
@@ -95,7 +95,7 @@ __bam_open(dbp, type, dbinfo)
 	int ret;
 
 	/* Allocate the btree internal structure. */
-	if ((t = (BTREE *)calloc(1, sizeof(BTREE))) == NULL)
+	if ((t = (BTREE *)__db_calloc(1, sizeof(BTREE))) == NULL)
 		return (ENOMEM);
 
 	t->bt_sp = t->bt_csp = t->bt_stack;
@@ -179,7 +179,7 @@ einval:	ret = EINVAL;
 err:	if (t != NULL) {
 		/* If we allocated room for key/data return, discard it. */
 		if (t->bt_rkey.data != NULL)
-			free(t->bt_rkey.data);
+			__db_free(t->bt_rkey.data);
 
 		FREE(t, sizeof(BTREE));
 	}
@@ -201,7 +201,7 @@ __bam_bdup(orig, new)
 
 	ot = orig->internal;
 
-	if ((t = (BTREE *)calloc(1, sizeof(*t))) == NULL)
+	if ((t = (BTREE *)__db_calloc(1, sizeof(*t))) == NULL)
 		return (ENOMEM);
 
 	/*
@@ -248,7 +248,7 @@ __bam_keyalloc(t)
 	 * Recno keys are always the same size, and we don't want to have
 	 * to check for space on each return.  Allocate it now.
 	 */
-	if ((t->bt_rkey.data = (void *)malloc(sizeof(db_recno_t))) == NULL)
+	if ((t->bt_rkey.data = (void *)__db_malloc(sizeof(db_recno_t))) == NULL)
 		return (ENOMEM);
 	t->bt_rkey.ulen = sizeof(db_recno_t);
 	return (0);
diff --git a/db2/btree/bt_put.c b/db2/btree/bt_put.c
index af09f76d41..b3d775bb0f 100644
--- a/db2/btree/bt_put.c
+++ b/db2/btree/bt_put.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_put.c	10.25 (Sleepycat) 9/17/97";
+static const char sccsid[] = "@(#)bt_put.c	10.31 (Sleepycat) 10/26/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -66,7 +66,10 @@ static const char sccsid[] = "@(#)bt_put.c	10.25 (Sleepycat) 9/17/97";
 static int __bam_fixed __P((BTREE *, DBT *));
 static int __bam_lookup __P((DB *, DBT *, int *));
 static int __bam_ndup __P((DB *, PAGE *, u_int32_t));
-static int __bam_partial __P((DB *, DBT *, PAGE *, u_int32_t));
+static int __bam_ovput __P((DB *, PAGE *, u_int32_t, DBT *));
+static int __bam_partial __P((DB *, DBT *, PAGE *, u_int32_t, u_int32_t));
+static u_int32_t
+	   __bam_partsize __P((DB *, DBT *, PAGE *, u_int32_t));
 
 /*
  * __bam_put --
@@ -334,21 +337,6 @@ slow:	return (__bam_search(dbp, key, S_INSERT, 1, NULL, exactp));
 }
 
 /*
- * OVPUT --
- *	Copy an overflow item onto a page.
- */
-#undef	OVPUT
-#define	OVPUT(h, indx, bo) do {						\
-	DBT __hdr;							\
-	memset(&__hdr, 0, sizeof(__hdr));				\
-	__hdr.data = &bo;						\
-	__hdr.size = BOVERFLOW_SIZE;					\
-	if ((ret = __db_pitem(dbp,					\
-	    h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0)		\
-		return (ret);						\
-} while (0)
-
-/*
  * __bam_iitem --
  *	Insert an item into the tree.
  *
@@ -365,19 +353,18 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 {
 	BTREE *t;
 	BKEYDATA *bk;
-	BOVERFLOW kbo, dbo;
 	DBT tdbt;
 	PAGE *h;
 	db_indx_t indx;
-	u_int32_t have_bytes, need_bytes, needed;
-	int bigkey, bigdata, dcopy, dupadjust, ret;
+	u_int32_t data_size, have_bytes, need_bytes, needed;
+	int bigkey, bigdata, dupadjust, replace, ret;
 
 	t = dbp->internal;
 	h = *hp;
 	indx = *indxp;
 
-	dupadjust = 0;
 	bk = NULL;			/* XXX: Shut the compiler up. */
+	dupadjust = replace = 0;
 
 	/*
 	 * If it's a page of duplicates, call the common code to do the work.
@@ -385,7 +372,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 	 * !!!
 	 * Here's where the hp and indxp are important.  The duplicate code
 	 * may decide to rework/rearrange the pages and indices we're using,
-	 * so the caller must understand that the stack has to change.
+	 * so the caller must understand that the page stack may change.
 	 */
 	if (TYPE(h) == P_DUPLICATE) {
 		/* Adjust the index for the new item if it's a DB_AFTER op. */
@@ -401,24 +388,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 		return (__db_dput(dbp, data, hp, indxp, __bam_new));
 	}
 
-	/*
-	 * XXX
-	 * Handle partial puts.
-	 *
-	 * This is truly awful from a performance standput.  We don't optimize
-	 * for partial puts at all, we delete the record and add it back in,
-	 * regardless of size or if we're simply overwriting current data.
-	 * The hash access method does this a lot better than we do, and we're
-	 * eventually going to have to fix it.
-	 */
-	if (F_ISSET(data, DB_DBT_PARTIAL)) {
-		tdbt = *data;
-		if ((ret = __bam_partial(dbp, &tdbt, h, indx)) != 0)
-			return (ret);
-		data = &tdbt;
-	}
-
-	/* If it's a short fixed-length record, fix it up. */
+	/* Handle fixed-length records: build the real record. */
 	if (F_ISSET(dbp, DB_RE_FIXEDLEN) && data->size != t->bt_recno->re_len) {
 		tdbt = *data;
 		if ((ret = __bam_fixed(t, &tdbt)) != 0)
@@ -427,30 +397,15 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 	}
 
 	/*
-	 * If the key or data item won't fit on a page, store it in the
-	 * overflow pages.
-	 *
-	 * !!!
-	 * From this point on, we have to recover the allocated overflow
-	 * pages on error.
+	 * Figure out how much space the data will take, including if it's a
+	 * partial record.  If either of the key or data items won't fit on
+	 * a page, we'll have to store them on overflow pages.
 	 */
-	bigkey = bigdata = 0;
-	if (LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize) {
-		B_TSET(kbo.type, B_OVERFLOW, 0);
-		kbo.tlen = key->size;
-		if ((ret = __db_poff(dbp, key, &kbo.pgno, __bam_new)) != 0)
-			goto err;
-		bigkey = 1;
-	}
-	if (data->size > t->bt_ovflsize) {
-		B_TSET(dbo.type, B_OVERFLOW, 0);
-		dbo.tlen = data->size;
-		if ((ret = __db_poff(dbp, data, &dbo.pgno, __bam_new)) != 0)
-			goto err;
-		bigdata = 1;
-	}
+	bigkey = LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize;
+	data_size = F_ISSET(data, DB_DBT_PARTIAL) ?
+	    __bam_partsize(dbp, data, h, indx) : data->size;
+	bigdata = data_size > t->bt_ovflsize;
 
-	dcopy = 0;
 	needed = 0;
 	if (LF_ISSET(BI_NEWKEY)) {
 		/* If BI_NEWKEY is set we're adding a new key and data pair. */
@@ -461,7 +416,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 		if (bigdata)
 			needed += BOVERFLOW_PSIZE;
 		else
-			needed += BKEYDATA_PSIZE(data->size);
+			needed += BKEYDATA_PSIZE(data_size);
 	} else {
 		/*
 		 * We're either overwriting the data item of a key/data pair
@@ -482,16 +437,8 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 		if (bigdata)
 			need_bytes += BOVERFLOW_PSIZE;
 		else
-			need_bytes += BKEYDATA_PSIZE(data->size);
+			need_bytes += BKEYDATA_PSIZE(data_size);
 
-		/*
-		 * If we're overwriting a data item, we copy it if it's not a
-		 * special record type and it's the same size (including any
-		 * alignment) and do a delete/insert otherwise.
-		 */
-		if (op == DB_CURRENT && !bigdata &&
-		    B_TYPE(bk->type) == B_KEYDATA && have_bytes == need_bytes)
-			dcopy = 1;
 		if (have_bytes < need_bytes)
 			needed += need_bytes - have_bytes;
 	}
@@ -505,9 +452,15 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 	 * check in the btree split code, so we don't undo it there!?!?
 	 */
 	if (P_FREESPACE(h) < needed ||
-	    (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey)) {
-		ret = DB_NEEDSPLIT;
-		goto err;
+	    (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey))
+		return (DB_NEEDSPLIT);
+
+	/* Handle partial puts: build the real record. */
+	if (F_ISSET(data, DB_DBT_PARTIAL)) {
+		tdbt = *data;
+		if ((ret = __bam_partial(dbp, &tdbt, h, indx, data_size)) != 0)
+			return (ret);
+		data = &tdbt;
 	}
 
 	/*
@@ -515,10 +468,10 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 	 *
 	 * 1. Append a new key/data pair.
 	 * 2. Insert a new key/data pair.
-	 * 3. Copy the data item.
-	 * 4. Delete/insert the data item.
-	 * 5. Append a new data item.
-	 * 6. Insert a new data item.
+	 * 3. Append a new data item (a new duplicate).
+	 * 4. Insert a new data item (a new duplicate).
+	 * 5. Overflow item: delete and re-add the data item.
+	 * 6. Replace the data item.
 	 */
 	if (LF_ISSET(BI_NEWKEY)) {
 		switch (op) {
@@ -533,42 +486,17 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 		}
 
 		/* Add the key. */
-		if (bigkey)
-			OVPUT(h, indx, kbo);
-		else {
-			DBT __data;
-			memset(&__data, 0, sizeof(__data));
-			__data.data = key->data;
-			__data.size = key->size;
+		if (bigkey) {
+			if ((ret = __bam_ovput(dbp, h, indx, key)) != 0)
+				return (ret);
+		} else
 			if ((ret = __db_pitem(dbp, h, indx,
-			    BKEYDATA_SIZE(key->size), NULL, &__data)) != 0)
-				goto err;
-		}
+			    BKEYDATA_SIZE(key->size), NULL, key)) != 0)
+				return (ret);
 		++indx;
 	} else {
 		switch (op) {
-		case DB_CURRENT:	/* 3. Copy the data item. */
-			/*
-			 * If we're not logging and it's possible, overwrite
-			 * the current item.
-			 *
-			 * XXX
-			 * We should add a separate logging message so that
-			 * we can do this anytime it's possible, including
-			 * for partial record puts.
-			 */
-			if (dcopy && !DB_LOGGING(dbp)) {
-				bk->len = data->size;
-				memcpy(bk->data, data->data, data->size);
-				goto done;
-			}
-					/* 4. Delete/insert the data item. */
-			if (TYPE(h) == P_LBTREE)
-				++indx;
-			if ((ret = __bam_ditem(dbp, h, indx)) != 0)
-				goto err;
-			break;
-		case DB_AFTER:		/* 5. Append a new data item. */
+		case DB_AFTER:		/* 3. Append a new data item. */
 			if (TYPE(h) == P_LBTREE) {
 				/*
 				 * Adjust the cursor and copy in the key for
@@ -576,7 +504,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 				 */
 				if ((ret = __bam_adjindx(dbp,
 				    h, indx + P_INDX, indx, 1)) != 0)
-					goto err;
+					return (ret);
 
 				indx += 3;
 				dupadjust = 1;
@@ -589,7 +517,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 				*indxp += 1;
 			}
 			break;
-		case DB_BEFORE:		/* 6. Insert a new data item. */
+		case DB_BEFORE:		/* 4. Insert a new data item. */
 			if (TYPE(h) == P_LBTREE) {
 				/*
 				 * Adjust the cursor and copy in the key for
@@ -597,43 +525,62 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 				 */
 				if ((ret =
 				    __bam_adjindx(dbp, h, indx, indx, 1)) != 0)
-					goto err;
+					return (ret);
 
 				++indx;
 				dupadjust = 1;
 			} else
 				__bam_ca_di(dbp, h->pgno, indx, 1);
 			break;
+		case DB_CURRENT:
+			if (TYPE(h) == P_LBTREE)
+				++indx;
+
+			/*
+			 * 5. Delete/re-add the data item.
+			 *
+			 * If we're dealing with offpage items, we have to 
+			 * delete and then re-add the item.
+			 */
+			if (bigdata || B_TYPE(bk->type) == B_OVERFLOW) {
+				if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+					return (ret);
+				break;
+			}
+
+			/* 6. Replace the data item. */
+			replace = 1;
+			break;
 		default:
 			abort();
 		}
 	}
 
 	/* Add the data. */
-	if (bigdata)
-		OVPUT(h, indx, dbo);
-	else {
+	if (bigdata) {
+		if ((ret = __bam_ovput(dbp, h, indx, data)) != 0)
+			return (ret);
+	} else {
 		BKEYDATA __bk;
-		DBT __hdr, __data;
-		memset(&__data, 0, sizeof(__data));
-		__data.data = data->data;
-		__data.size = data->size;
+		DBT __hdr;
 
 		if (LF_ISSET(BI_DELETED)) {
 			B_TSET(__bk.type, B_KEYDATA, 1);
-			__bk.len = __data.size;
+			__bk.len = data->size;
 			__hdr.data = &__bk;
 			__hdr.size = SSZA(BKEYDATA, data);
 			ret = __db_pitem(dbp, h, indx,
-			    BKEYDATA_SIZE(__data.size), &__hdr, &__data);
-		} else
+			    BKEYDATA_SIZE(data->size), &__hdr, data);
+		} else if (replace)
+			ret = __bam_ritem(dbp, h, indx, data);
+		else
 			ret = __db_pitem(dbp, h, indx,
-			    BKEYDATA_SIZE(data->size), NULL, &__data);
+			    BKEYDATA_SIZE(data->size), NULL, data);
 		if (ret != 0)
-			goto err;
+			return (ret);
 	}
 
-done:	++t->lstat.bt_added;
+	++t->lstat.bt_added;
 
 	ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
 
@@ -645,22 +592,206 @@ done:	++t->lstat.bt_added;
 	if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) {
 		--indx;
 		if ((ret = __bam_ndup(dbp, h, indx)) != 0)
-			goto err;
+			return (ret);
 	}
 
 	if (t->bt_recno != NULL)
 		F_SET(t->bt_recno, RECNO_MODIFIED);
 
-	if (0) {
-err:		if (bigkey)
-			(void)__db_doff(dbp, kbo.pgno, __bam_free);
-		if (bigdata)
-			(void)__db_doff(dbp, dbo.pgno, __bam_free);
-	}
 	return (ret);
 }
 
 /*
+ * __bam_partsize --
+ *	Figure out how much space a partial data item is in total.
+ */
+static u_int32_t
+__bam_partsize(dbp, data, h, indx)
+	DB *dbp;
+	DBT *data;
+	PAGE *h;
+	u_int32_t indx;
+{
+	BKEYDATA *bk;
+	u_int32_t nbytes;
+
+	/*
+	 * Figure out how much total space we'll need.  If the record doesn't
+	 * already exist, it's simply the data we're provided.
+	 */
+	if (indx >= NUM_ENT(h))
+		return (data->doff + data->size);
+
+	/*
+	 * Otherwise, it's the data provided plus any already existing data
+	 * that we're not replacing.
+	 */
+	bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
+	nbytes =
+	    B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len;
+
+	/*
+	 * There are really two cases here:
+	 *
+	 * Case 1: We are replacing some bytes that do not exist (i.e., they
+	 * are past the end of the record).  In this case the number of bytes
+	 * we are replacing is irrelevant and all we care about is how many
+	 * bytes we are going to add from offset.  So, the new record length
+	 * is going to be the size of the new bytes (size) plus wherever those
+	 * new bytes begin (doff).
+	 *
+	 * Case 2: All the bytes we are replacing exist.  Therefore, the new
+	 * size is the oldsize (nbytes) minus the bytes we are replacing (dlen)
+	 * plus the bytes we are adding (size).
+	 */
+	if (nbytes < data->doff + data->dlen)		/* Case 1 */
+		return (data->doff + data->size);
+
+	return (nbytes + data->size - data->dlen);	/* Case 2 */
+}
+
+/*
+ * OVPUT --
+ *	Copy an overflow item onto a page.
+ */
+#undef	OVPUT
+#define	OVPUT(h, indx, bo) do {						\
+	DBT __hdr;							\
+	memset(&__hdr, 0, sizeof(__hdr));				\
+	__hdr.data = &bo;						\
+	__hdr.size = BOVERFLOW_SIZE;					\
+	if ((ret = __db_pitem(dbp,					\
+	    h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0)		\
+		return (ret);						\
+} while (0)
+
+/*
+ * __bam_ovput --
+ *	Build an overflow item and put it on the page.
+ */
+static int
+__bam_ovput(dbp, h, indx, item)
+	DB *dbp;
+	PAGE *h;
+	u_int32_t indx;
+	DBT *item;
+{
+	BOVERFLOW bo;
+	int ret;
+
+	B_TSET(bo.type, B_OVERFLOW, 0);
+	bo.tlen = item->size;
+	if ((ret = __db_poff(dbp, item, &bo.pgno, __bam_new)) != 0)
+		return (ret);
+
+	OVPUT(h, indx, bo);
+
+	return (0);
+}
+
+/*
+ * __bam_ritem --
+ *	Replace an item on a page.
+ *
+ * PUBLIC: int __bam_ritem __P((DB *, PAGE *, u_int32_t, DBT *));
+ */
+int
+__bam_ritem(dbp, h, indx, data)
+	DB *dbp;
+	PAGE *h;
+	u_int32_t indx;
+	DBT *data;
+{
+	BKEYDATA *bk;
+	DBT orig, repl;
+	db_indx_t lo, ln, min, off, prefix, suffix;
+	int32_t nbytes;
+	int cnt, ret;
+	u_int8_t *p, *t;
+
+	/*
+	 * Replace a single item onto a page.  The logic figuring out where
+	 * to insert and whether it fits is handled in the caller.  All we do
+	 * here is manage the page shuffling.
+	 */
+	bk = GET_BKEYDATA(h, indx);
+
+	/* Log the change. */
+	if (DB_LOGGING(dbp)) {
+		/*
+		 * We might as well check to see if the two data items share
+		 * a common prefix and suffix -- it can save us a lot of log
+		 * message if they're large.
+		 */
+		min = data->size < bk->len ? data->size : bk->len;
+		for (prefix = 0,
+		    p = bk->data, t = data->data;
+		    prefix < min && *p == *t; ++prefix, ++p, ++t)
+			;
+
+		min -= prefix;
+		for (suffix = 0,
+		    p = (u_int8_t *)bk->data + bk->len - 1,
+		    t = (u_int8_t *)data->data + data->size - 1;
+		    suffix < min && *p == *t; ++suffix, --p, --t)
+			;
+
+		/* We only log the parts of the keys that have changed. */
+		orig.data = (u_int8_t *)bk->data + prefix;
+		orig.size = bk->len - (prefix + suffix);
+		repl.data = (u_int8_t *)data->data + prefix;
+		repl.size = data->size - (prefix + suffix);
+		if ((ret = __bam_repl_log(dbp->dbenv->lg_info, dbp->txn,
+		    &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h),
+		    (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type),
+		    &orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0)
+			return (ret);
+	}
+
+	/*
+	 * Set references to the first in-use byte on the page and the
+	 * first byte of the item being replaced.
+	 */
+	p = (u_int8_t *)h + HOFFSET(h);
+	t = (u_int8_t *)bk;
+
+	/*
+	 * If the entry is growing in size, shift the beginning of the data
+	 * part of the page down.  If the entry is shrinking in size, shift
+	 * the beginning of the data part of the page up.  Use memmove(3),
+	 * the regions overlap.
+	 */
+	lo = BKEYDATA_SIZE(bk->len);
+	ln = BKEYDATA_SIZE(data->size);
+	if (lo != ln) {
+		nbytes = lo - ln;		/* Signed difference. */
+		if (p == t)			/* First index is fast. */
+			h->inp[indx] += nbytes;
+		else {				/* Else, shift the page. */
+			memmove(p + nbytes, p, t - p);
+
+			/* Adjust the indices' offsets. */
+			off = h->inp[indx];
+			for (cnt = 0; cnt < NUM_ENT(h); ++cnt)
+				if (h->inp[cnt] <= off)
+					h->inp[cnt] += nbytes;
+		}
+
+		/* Clean up the page and adjust the item's reference. */
+		HOFFSET(h) += nbytes;
+		t += nbytes;
+	}
+
+	/* Copy the new item onto the page. */
+	bk = (BKEYDATA *)t;
+	B_TSET(bk->type, B_KEYDATA, 0);
+	bk->len = data->size;
+	memcpy(bk->data, data->data, data->size);
+
+	return (0);
+}
+
+/*
  * __bam_ndup --
  *	Check to see if the duplicate set at indx should have its own page.
  *	If it should, create it.
@@ -766,16 +897,21 @@ __bam_fixed(t, dbt)
 	rp = t->bt_recno;
 
 	/*
-	 * If using fixed-length records, and the record is long, return
-	 * EINVAL.  If it's short, pad it out.  Use the record data return
-	 * memory, it's only short-term.
+	 * If database contains fixed-length records, and the record is long,
+	 * return EINVAL.
 	 */
 	if (dbt->size > rp->re_len)
 		return (EINVAL);
+
+	/*
+	 * The caller checked to see if it was just right, so we know it's
+	 * short.  Pad it out.  We use the record data return memory, it's
+	 * only a short-term use.
+	 */
 	if (t->bt_rdata.ulen < rp->re_len) {
 		t->bt_rdata.data = t->bt_rdata.data == NULL ?
-		    (void *)malloc(rp->re_len) :
-		    (void *)realloc(t->bt_rdata.data, rp->re_len);
+		    (void *)__db_malloc(rp->re_len) :
+		    (void *)__db_realloc(t->bt_rdata.data, rp->re_len);
 		if (t->bt_rdata.data == NULL) {
 			t->bt_rdata.ulen = 0;
 			return (ENOMEM);
@@ -786,12 +922,16 @@ __bam_fixed(t, dbt)
 	memset((u_int8_t *)t->bt_rdata.data + dbt->size,
 	    rp->re_pad, rp->re_len - dbt->size);
 
-	/* Set the DBT to reference our new record. */
+	/*
+	 * Clean up our flags and other information just in case, and
+	 * change the caller's DBT to reference our created record.
+	 */
 	t->bt_rdata.size = rp->re_len;
 	t->bt_rdata.dlen = 0;
 	t->bt_rdata.doff = 0;
 	t->bt_rdata.flags = 0;
 	*dbt = t->bt_rdata;
+
 	return (0);
 }
 
@@ -800,47 +940,28 @@ __bam_fixed(t, dbt)
  *	Build the real record for a partial put.
  */
 static int
-__bam_partial(dbp, dbt, h, indx)
+__bam_partial(dbp, dbt, h, indx, nbytes)
 	DB *dbp;
 	DBT *dbt;
 	PAGE *h;
-	u_int32_t indx;
+	u_int32_t indx, nbytes;
 {
 	BTREE *t;
 	BKEYDATA *bk, tbk;
 	BOVERFLOW *bo;
 	DBT copy;
-	u_int32_t len, nbytes, tlen;
+	u_int32_t len, tlen;
 	int ret;
 	u_int8_t *p;
 
 	bo = NULL;			/* XXX: Shut the compiler up. */
 	t = dbp->internal;
 
-	/*
-	 * Figure out how much total space we'll need.  Worst case is where
-	 * the record is 0 bytes long, in which case doff causes the record
-	 * to extend, and the put data is appended to it.
-	 */
-	if (indx < NUM_ENT(h)) {
-		bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
-		if (B_TYPE(bk->type) == B_OVERFLOW) {
-			bo = (BOVERFLOW *)bk;
-			nbytes = bo->tlen;
-		} else
-			nbytes = bk->len;
-	} else {
-		bk = &tbk;
-		B_TSET(bk->type, B_KEYDATA, 0);
-		nbytes = bk->len = 0;
-	}
-	nbytes += dbt->doff + dbt->size + dbt->dlen;
-
-	/* Allocate the space. */
+	/* We use the record data return memory, it's only a short-term use. */
 	if (t->bt_rdata.ulen < nbytes) {
 		t->bt_rdata.data = t->bt_rdata.data == NULL ?
-		    (void *)malloc(nbytes) :
-		    (void *)realloc(t->bt_rdata.data, nbytes);
+		    (void *)__db_malloc(nbytes) :
+		    (void *)__db_realloc(t->bt_rdata.data, nbytes);
 		if (t->bt_rdata.data == NULL) {
 			t->bt_rdata.ulen = 0;
 			return (ENOMEM);
@@ -848,6 +969,16 @@ __bam_partial(dbp, dbt, h, indx)
 		t->bt_rdata.ulen = nbytes;
 	}
 
+	/* Find the current record. */
+	if (indx < NUM_ENT(h)) {
+		bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
+		bo = (BOVERFLOW *)bk;
+	} else {
+		bk = &tbk;
+		B_TSET(bk->type, B_KEYDATA, 0);
+		bk->len = 0;
+	}
+
 	/* We use nul bytes for extending the record, get it over with. */
 	memset(t->bt_rdata.data, 0, nbytes);
 
diff --git a/db2/btree/bt_rec.c b/db2/btree/bt_rec.c
index 9aeb395f27..c0b7c8ae4c 100644
--- a/db2/btree/bt_rec.c
+++ b/db2/btree/bt_rec.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_rec.c	10.14 (Sleepycat) 9/6/97";
+static const char sccsid[] = "@(#)bt_rec.c	10.17 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -298,8 +298,8 @@ __bam_split_recover(logp, dbtp, lsnp, redo, info)
 			goto done;
 
 		/* Allocate and initialize new left/right child pages. */
-		if ((_lp = (PAGE *)malloc(file_dbp->pgsize)) == NULL ||
-		    (_rp = (PAGE *)malloc(file_dbp->pgsize)) == NULL) {
+		if ((_lp = (PAGE *)__db_malloc(file_dbp->pgsize)) == NULL ||
+		    (_rp = (PAGE *)__db_malloc(file_dbp->pgsize)) == NULL) {
 			ret = ENOMEM;
 			__db_err(file_dbp->dbenv, "%s", strerror(ret));
 			goto out;
@@ -490,9 +490,9 @@ out:	/* Free any pages that weren't dirtied. */
 
 	/* Free any allocated space. */
 	if (_lp != NULL)
-		free(_lp);
+		__db_free(_lp);
 	if (_rp != NULL)
-		free(_rp);
+		__db_free(_rp);
 
 	REC_CLOSE;
 }
@@ -541,7 +541,8 @@ __bam_rsplit_recover(logp, dbtp, lsnp, redo, info)
 	} else if (cmp_n == 0 && !redo) {
 		/* Need to undo update described. */
 		P_INIT(pagep, file_dbp->pgsize, PGNO_ROOT,
-		    PGNO_INVALID, PGNO_INVALID, pagep->level + 1, TYPE(pagep));
+		    argp->nrec, PGNO_INVALID, pagep->level + 1,
+		    file_dbp->type == DB_BTREE ? P_IBTREE : P_IRECNO);
 		if ((ret = __db_pitem(file_dbp, pagep, 0,
 		    argp->rootent.size, &argp->rootent, NULL)) != 0)
 			goto out;
@@ -764,3 +765,106 @@ __bam_cdel_recover(logp, dbtp, lsnp, redo, info)
 
 out:	REC_CLOSE;
 }
+
+/*
+ * __bam_repl_recover --
+ *	Recovery function for page item replacement.
+ *
+ * PUBLIC: int __bam_repl_recover
+ * PUBLIC:   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__bam_repl_recover(logp, dbtp, lsnp, redo, info)
+	DB_LOG *logp;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	int redo;
+	void *info;
+{
+	__bam_repl_args *argp;
+	BKEYDATA *bk;
+	DB *file_dbp, *mdbp;
+	DBT dbt;
+	DB_MPOOLFILE *mpf;
+	PAGE *pagep;
+	int cmp_n, cmp_p, modified, ret;
+	u_int8_t *p;
+
+	REC_PRINT(__bam_repl_print);
+	REC_INTRO(__bam_repl_read);
+
+	if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+		(void)__db_pgerr(file_dbp, argp->pgno);
+		pagep = NULL;
+		goto out;
+	}
+	bk = GET_BKEYDATA(pagep, argp->indx);
+
+	modified = 0;
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->lsn);
+	if (cmp_p == 0 && redo) {
+		/*
+		 * Need to redo update described.
+		 *
+		 * Re-build the replacement item.
+		 */
+		memset(&dbt, 0, sizeof(dbt));
+		dbt.size = argp->prefix + argp->suffix + argp->repl.size;
+		if ((dbt.data = __db_malloc(dbt.size)) == NULL) {
+			ret = ENOMEM;
+			goto err;
+		}
+		p = dbt.data;
+		memcpy(p, bk->data, argp->prefix);
+		p += argp->prefix;
+		memcpy(p, argp->repl.data, argp->repl.size);
+		p += argp->repl.size;
+		memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix);
+
+		ret = __bam_ritem(file_dbp, pagep, argp->indx, &dbt);
+		__db_free(dbt.data);
+		if (ret != 0)
+			goto err;
+
+		LSN(pagep) = *lsnp;
+		modified = 1;
+	} else if (cmp_n == 0 && !redo) {
+		/*
+		 * Need to undo update described.
+		 *
+		 * Re-build the original item.
+		 */
+		memset(&dbt, 0, sizeof(dbt));
+		dbt.size = argp->prefix + argp->suffix + argp->orig.size;
+		if ((dbt.data = __db_malloc(dbt.size)) == NULL) {
+			ret = ENOMEM;
+			goto err;
+		}
+		p = dbt.data;
+		memcpy(p, bk->data, argp->prefix);
+		p += argp->prefix;
+		memcpy(p, argp->orig.data, argp->orig.size);
+		p += argp->orig.size;
+		memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix);
+
+		ret = __bam_ritem(file_dbp, pagep, argp->indx, &dbt);
+		__db_free(dbt.data);
+		if (ret != 0)
+			goto err;
+
+		/* Reset the deleted flag, if necessary. */
+		if (argp->isdeleted)
+			B_DSET(GET_BKEYDATA(pagep, argp->indx)->type);
+
+		LSN(pagep) = argp->lsn;
+		modified = 1;
+	}
+	if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
+		*lsnp = argp->prev_lsn;
+
+	if (0) {
+err:		(void)memp_fput(mpf, pagep, 0);
+	}
+out:	REC_CLOSE;
+}
diff --git a/db2/btree/bt_recno.c b/db2/btree/bt_recno.c
index f7c5cffdc6..5e1cbc426c 100644
--- a/db2/btree/bt_recno.c
+++ b/db2/btree/bt_recno.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_recno.c	10.19 (Sleepycat) 9/20/97";
+static const char sccsid[] = "@(#)bt_recno.c	10.22 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -75,7 +75,7 @@ __ram_open(dbp, type, dbinfo)
 	ret = 0;
 
 	/* Allocate and initialize the private RECNO structure. */
-	if ((rp = (RECNO *)calloc(1, sizeof(*rp))) == NULL)
+	if ((rp = (RECNO *)__db_calloc(1, sizeof(*rp))) == NULL)
 		return (ENOMEM);
 
 	if (dbinfo != NULL) {
@@ -140,7 +140,7 @@ __ram_open(dbp, type, dbinfo)
 
 err:	/* If we mmap'd a source file, discard it. */
 	if (rp->re_smap != NULL)
-		(void)__db_munmap(rp->re_smap, rp->re_msize);
+		(void)__db_unmap(rp->re_smap, rp->re_msize);
 
 	/* If we opened a source file, discard it. */
 	if (rp->re_fd != -1)
@@ -151,7 +151,7 @@ err:	/* If we mmap'd a source file, discard it. */
 	/* If we allocated room for key/data return, discard it. */
 	t = dbp->internal;
 	if (t != NULL && t->bt_rkey.data != NULL)
-		free(t->bt_rkey.data);
+		__db_free(t->bt_rkey.data);
 
 	FREE(rp, sizeof(*rp));
 
@@ -175,10 +175,10 @@ __ram_cursor(dbp, txn, dbcp)
 
 	DEBUG_LWRITE(dbp, txn, "ram_cursor", NULL, NULL, 0);
 
-	if ((dbc = (DBC *)calloc(1, sizeof(DBC))) == NULL)
+	if ((dbc = (DBC *)__db_calloc(1, sizeof(DBC))) == NULL)
 		return (ENOMEM);
-	if ((cp = (RCURSOR *)calloc(1, sizeof(RCURSOR))) == NULL) {
-		free(dbc);
+	if ((cp = (RCURSOR *)__db_calloc(1, sizeof(RCURSOR))) == NULL) {
+		__db_free(dbc);
 		return (ENOMEM);
 	}
 
@@ -359,7 +359,7 @@ __ram_close(argdbp)
 
 	/* Close any underlying mmap region. */
 	if (rp->re_smap != NULL)
-		(void)__db_munmap(rp->re_smap, rp->re_msize);
+		(void)__db_unmap(rp->re_smap, rp->re_msize);
 
 	/* Close any backing source file descriptor. */
 	if (rp->re_fd != -1)
@@ -814,8 +814,8 @@ __ram_update(dbp, recno, can_create)
 	if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
 		if (t->bt_rdata.ulen < rp->re_len) {
 			t->bt_rdata.data = t->bt_rdata.data == NULL ?
-			    (void *)malloc(rp->re_len) :
-			    (void *)realloc(t->bt_rdata.data, rp->re_len);
+			    (void *)__db_malloc(rp->re_len) :
+			    (void *)__db_realloc(t->bt_rdata.data, rp->re_len);
 			if (t->bt_rdata.data == NULL) {
 				t->bt_rdata.ulen = 0;
 				return (ENOMEM);
@@ -853,7 +853,7 @@ __ram_source(dbp, rp, fname)
 
 	oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0;
 	if ((ret =
-	    __db_fdopen(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) {
+	    __db_open(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) {
 		__db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
 		goto err;
 	}
@@ -866,15 +866,16 @@ __ram_source(dbp, rp, fname)
 	 * compiler will perpetrate, doing the comparison in a portable way is
 	 * flatly impossible.  Hope that mmap fails if the file is too large.
 	 */
-	if ((ret =
-	    __db_stat(dbp->dbenv, rp->re_source, rp->re_fd, &size, NULL)) != 0)
+	if ((ret = __db_ioinfo(rp->re_source, rp->re_fd, &size, NULL)) != 0) {
+		__db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
 		goto err;
+	}
 	if (size == 0) {
 		F_SET(rp, RECNO_EOF);
 		return (0);
 	}
 
-	if ((ret = __db_mmap(rp->re_fd, (size_t)size, 1, 1, &rp->re_smap)) != 0)
+	if ((ret = __db_map(rp->re_fd, (size_t)size, 1, 1, &rp->re_smap)) != 0)
 		goto err;
 	rp->re_cmap = rp->re_smap;
 	rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size);
@@ -940,7 +941,7 @@ __ram_writeback(dbp)
 	 * open will fail.
 	 */
 	if (rp->re_smap != NULL) {
-		(void)__db_munmap(rp->re_smap, rp->re_msize);
+		(void)__db_unmap(rp->re_smap, rp->re_msize);
 		rp->re_smap = NULL;
 	}
 
@@ -951,7 +952,7 @@ __ram_writeback(dbp)
 	}
 
 	/* Open the file, truncating it. */
-	if ((ret = __db_fdopen(rp->re_source,
+	if ((ret = __db_open(rp->re_source,
 	    DB_SEQUENTIAL | DB_TRUNCATE,
 	    DB_SEQUENTIAL | DB_TRUNCATE, 0, &fd)) != 0) {
 		__db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
@@ -974,7 +975,7 @@ __ram_writeback(dbp)
 	 */
 	delim = rp->re_delim;
 	if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
-		if ((pad = malloc(rp->re_len)) == NULL) {
+		if ((pad = (u_int8_t *)__db_malloc(rp->re_len)) == NULL) {
 			ret = ENOMEM;
 			goto err;
 		}
@@ -1051,8 +1052,8 @@ __ram_fmap(dbp, top)
 	rp = t->bt_recno;
 	if (t->bt_rdata.ulen < rp->re_len) {
 		t->bt_rdata.data = t->bt_rdata.data == NULL ?
-		    (void *)malloc(rp->re_len) :
-		    (void *)realloc(t->bt_rdata.data, rp->re_len);
+		    (void *)__db_malloc(rp->re_len) :
+		    (void *)__db_realloc(t->bt_rdata.data, rp->re_len);
 		if (t->bt_rdata.data == NULL) {
 			t->bt_rdata.ulen = 0;
 			return (ENOMEM);
diff --git a/db2/btree/bt_search.c b/db2/btree/bt_search.c
index fa3e018313..a21a8208bc 100644
--- a/db2/btree/bt_search.c
+++ b/db2/btree/bt_search.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_search.c	10.7 (Sleepycat) 9/3/97";
+static const char sccsid[] = "@(#)bt_search.c	10.8 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -323,7 +323,7 @@ __bam_stkgrow(t)
 
 	entries = t->bt_esp - t->bt_sp;
 
-	if ((p = (EPG *)calloc(entries * 2, sizeof(EPG))) == NULL)
+	if ((p = (EPG *)__db_calloc(entries * 2, sizeof(EPG))) == NULL)
 		return (ENOMEM);
 	memcpy(p, t->bt_sp, entries * sizeof(EPG));
 	if (t->bt_sp != t->bt_stack)
diff --git a/db2/btree/bt_split.c b/db2/btree/bt_split.c
index 25cfacc4d0..bc09131b00 100644
--- a/db2/btree/bt_split.c
+++ b/db2/btree/bt_split.c
@@ -44,7 +44,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_split.c	10.14 (Sleepycat) 9/3/97";
+static const char sccsid[] = "@(#)bt_split.c	10.17 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -255,7 +255,7 @@ __bam_page(dbp, pp, cp)
 	    cp->page->level, TYPE(cp->page));
 
 	/* Create new left page for the split. */
-	if ((lp = (PAGE *)malloc(dbp->pgsize)) == NULL) {
+	if ((lp = (PAGE *)__db_malloc(dbp->pgsize)) == NULL) {
 		ret = ENOMEM;
 		goto err;
 	}
@@ -389,6 +389,9 @@ __bam_broot(dbp, rootp, lp, rp)
 	P_INIT(rootp, dbp->pgsize,
 	    PGNO_ROOT, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IBTREE);
 
+	memset(&data, 0, sizeof(data));
+	memset(&hdr, 0, sizeof(hdr));
+
 	/*
 	 * The btree comparison code guarantees that the left-most key on any
 	 * level of the tree is never used, so it doesn't need to be filled in.
@@ -399,15 +402,12 @@ __bam_broot(dbp, rootp, lp, rp)
 	if (F_ISSET(dbp, DB_BT_RECNUM)) {
 		bi.nrecs = __bam_total(lp);
 		RE_NREC_SET(rootp, bi.nrecs);
-	}
-	memset(&hdr, 0, sizeof(hdr));
+	} else
+		bi.nrecs = 0;
 	hdr.data = &bi;
 	hdr.size = SSZA(BINTERNAL, data);
-	memset(&data, 0, sizeof(data));
-	data.data = (char *)"";
-	data.size = 0;
 	if ((ret =
-	    __db_pitem(dbp, rootp, 0, BINTERNAL_SIZE(0), &hdr, &data)) != 0)
+	    __db_pitem(dbp, rootp, 0, BINTERNAL_SIZE(0), &hdr, NULL)) != 0)
 		return (ret);
 
 	switch (TYPE(rp)) {
@@ -431,9 +431,10 @@ __bam_broot(dbp, rootp, lp, rp)
 			return (ret);
 
 		/* Increment the overflow ref count. */
-		if (B_TYPE(child_bi->type) == B_OVERFLOW && (ret =
-		    __db_ioff(dbp, ((BOVERFLOW *)(child_bi->data))->pgno)) != 0)
-			return (ret);
+		if (B_TYPE(child_bi->type) == B_OVERFLOW)
+			if ((ret = __db_ovref(dbp,
+			    ((BOVERFLOW *)(child_bi->data))->pgno, 1)) != 0)
+				return (ret);
 		break;
 	case P_LBTREE:
 		/* Copy the first key of the child page onto the root page. */
@@ -473,9 +474,10 @@ __bam_broot(dbp, rootp, lp, rp)
 				return (ret);
 
 			/* Increment the overflow ref count. */
-			if (B_TYPE(child_bk->type) == B_OVERFLOW && (ret =
-			    __db_ioff(dbp, ((BOVERFLOW *)child_bk)->pgno)) != 0)
-				return (ret);
+			if (B_TYPE(child_bk->type) == B_OVERFLOW)
+				if ((ret = __db_ovref(dbp,
+				    ((BOVERFLOW *)child_bk)->pgno, 1)) != 0)
+					return (ret);
 			break;
 		default:
 			return (__db_pgfmt(dbp, rp->pgno));
@@ -604,9 +606,10 @@ __bam_pinsert(dbp, parent, lchild, rchild)
 			return (ret);
 
 		/* Increment the overflow ref count. */
-		if (B_TYPE(child_bi->type) == B_OVERFLOW && (ret =
-		    __db_ioff(dbp, ((BOVERFLOW *)(child_bi->data))->pgno)) != 0)
-			return (ret);
+		if (B_TYPE(child_bi->type) == B_OVERFLOW)
+			if ((ret = __db_ovref(dbp,
+			    ((BOVERFLOW *)(child_bi->data))->pgno, 1)) != 0)
+				return (ret);
 		break;
 	case P_LBTREE:
 		child_bk = GET_BKEYDATA(rchild, 0);
@@ -673,9 +676,10 @@ noprefix:			nksize = child_bk->len;
 				return (ret);
 
 			/* Increment the overflow ref count. */
-			if (B_TYPE(child_bk->type) == B_OVERFLOW && (ret =
-			    __db_ioff(dbp, ((BOVERFLOW *)child_bk)->pgno)) != 0)
-				return (ret);
+			if (B_TYPE(child_bk->type) == B_OVERFLOW)
+				if ((ret = __db_ovref(dbp,
+				    ((BOVERFLOW *)child_bk)->pgno, 1)) != 0)
+					return (ret);
 			break;
 		default:
 			return (__db_pgfmt(dbp, rchild->pgno));
diff --git a/db2/btree/bt_stat.c b/db2/btree/bt_stat.c
index ab3bc4c431..e88b5dac2d 100644
--- a/db2/btree/bt_stat.c
+++ b/db2/btree/bt_stat.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_stat.c	10.12 (Sleepycat) 9/3/97";
+static const char sccsid[] = "@(#)bt_stat.c	10.14 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -61,7 +61,7 @@ __bam_stat(argdbp, spp, db_malloc, flags)
 
 	/* Allocate and clear the structure. */
 	if ((sp = db_malloc == NULL ?
-	    (DB_BTREE_STAT *)malloc(sizeof(*sp)) :
+	    (DB_BTREE_STAT *)__db_malloc(sizeof(*sp)) :
 	    (DB_BTREE_STAT *)db_malloc(sizeof(*sp))) == NULL) {
 		ret = ENOMEM;
 		goto err;
@@ -100,14 +100,13 @@ __bam_stat(argdbp, spp, db_malloc, flags)
 	if (F_ISSET(meta, BTM_RENUMBER))
 		sp->bt_flags |= DB_RENUMBER;
 
-	/*
-	 * Get the maxkey, minkey, re_len and re_pad fields from the
-	 * metadata.
-	 */
+	/* Get the remaining metadata fields. */
 	sp->bt_minkey = meta->minkey;
 	sp->bt_maxkey = meta->maxkey;
 	sp->bt_re_len = meta->re_len;
 	sp->bt_re_pad = meta->re_pad;
+	sp->bt_magic = meta->magic;
+	sp->bt_version = meta->version;
 
 	/* Get the page size from the DB. */
 	sp->bt_pagesize = dbp->pgsize;
diff --git a/db2/btree/btree.src b/db2/btree/btree.src
index 7c8c4b125f..6145696d28 100644
--- a/db2/btree/btree.src
+++ b/db2/btree/btree.src
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)btree.src	10.4 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)btree.src	10.6 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 PREFIX	bam
@@ -75,6 +75,7 @@ END
  *
  * pgno:	the page number of the page copied over the root.
  * pgdbt:	the page being copied on the root page.
+ * nrec:	the tree's record count.
  * rootent:	last entry on the root page.
  * rootlsn:	the root page's original lsn.
  */
@@ -82,6 +83,7 @@ BEGIN rsplit
 ARG	fileid		u_int32_t	lu
 ARG	pgno		db_pgno_t	lu
 DBT	pgdbt		DBT		s
+ARG	nrec		db_pgno_t	lu
 DBT	rootent		DBT		s
 POINTER rootlsn		DB_LSN *	lu
 END
@@ -135,3 +137,24 @@ ARG	pgno		db_pgno_t	lu
 POINTER	lsn		DB_LSN *	lu
 ARG	indx		u_int32_t	lu
 END
+
+/*
+ * BTREE-repl: used to log the replacement of an item.
+ *
+ * pgno:	the page modified.
+ * lsn:		the page's original lsn.
+ * orig:	the original data.
+ * new:		the replacement data.
+ * duplicate:	the prefix of the replacement that matches the original.
+ */
+BEGIN repl
+ARG	fileid		u_int32_t	lu
+ARG	pgno		db_pgno_t	lu
+POINTER	lsn		DB_LSN *	lu
+ARG	indx		u_int32_t	lu
+ARG	isdeleted	u_int32_t	lu
+DBT	orig		DBT		s
+DBT	repl		DBT		s
+ARG	prefix		u_int32_t	lu
+ARG	suffix		u_int32_t	lu
+END
diff --git a/db2/btree/btree_auto.c b/db2/btree/btree_auto.c
index 353ee7bc27..45232bbc41 100644
--- a/db2/btree/btree_auto.c
+++ b/db2/btree/btree_auto.c
@@ -57,7 +57,7 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(pgno)
 	    + sizeof(ptype)
 	    + sizeof(next);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -92,7 +92,7 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -137,7 +137,7 @@ __bam_pg_alloc_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tptype: %lu\n", (u_long)argp->ptype);
 	printf("\tnext: %lu\n", (u_long)argp->next);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -152,7 +152,7 @@ __bam_pg_alloc_read(recbuf, argpp)
 	__bam_pg_alloc_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_pg_alloc_args *)malloc(sizeof(__bam_pg_alloc_args) +
+	argp = (__bam_pg_alloc_args *)__db_malloc(sizeof(__bam_pg_alloc_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -219,7 +219,7 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(*meta_lsn)
 	    + sizeof(u_int32_t) + (header == NULL ? 0 : header->size)
 	    + sizeof(next);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -257,7 +257,7 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -308,7 +308,7 @@ __bam_pg_free_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\n");
 	printf("\tnext: %lu\n", (u_long)argp->next);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -323,7 +323,7 @@ __bam_pg_free_read(recbuf, argpp)
 	__bam_pg_free_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_pg_free_args *)malloc(sizeof(__bam_pg_free_args) +
+	argp = (__bam_pg_free_args *)__db_malloc(sizeof(__bam_pg_free_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -400,7 +400,7 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(npgno)
 	    + sizeof(*nlsn)
 	    + sizeof(u_int32_t) + (pg == NULL ? 0 : pg->size);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -452,7 +452,7 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -509,7 +509,7 @@ __bam_split_print(notused1, dbtp, lsnp, notused3, notused4)
 	}
 	printf("\n");
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -524,7 +524,7 @@ __bam_split_read(recbuf, argpp)
 	__bam_split_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_split_args *)malloc(sizeof(__bam_split_args) +
+	argp = (__bam_split_args *)__db_malloc(sizeof(__bam_split_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -563,11 +563,11 @@ __bam_split_read(recbuf, argpp)
 /*
  * PUBLIC: int __bam_rsplit_log
  * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
- * PUBLIC:     u_int32_t, db_pgno_t, DBT *, DBT *,
- * PUBLIC:     DB_LSN *));
+ * PUBLIC:     u_int32_t, db_pgno_t, DBT *, db_pgno_t,
+ * PUBLIC:     DBT *, DB_LSN *));
  */
 int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
-	fileid, pgno, pgdbt, rootent, rootlsn)
+	fileid, pgno, pgdbt, nrec, rootent, rootlsn)
 	DB_LOG *logp;
 	DB_TXN *txnid;
 	DB_LSN *ret_lsnp;
@@ -575,6 +575,7 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
 	u_int32_t fileid;
 	db_pgno_t pgno;
 	DBT *pgdbt;
+	db_pgno_t nrec;
 	DBT *rootent;
 	DB_LSN * rootlsn;
 {
@@ -597,9 +598,10 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(fileid)
 	    + sizeof(pgno)
 	    + sizeof(u_int32_t) + (pgdbt == NULL ? 0 : pgdbt->size)
+	    + sizeof(nrec)
 	    + sizeof(u_int32_t) + (rootent == NULL ? 0 : rootent->size)
 	    + sizeof(*rootlsn);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -623,6 +625,8 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
 		memcpy(bp, pgdbt->data, pgdbt->size);
 		bp += pgdbt->size;
 	}
+	memcpy(bp, &nrec, sizeof(nrec));
+	bp += sizeof(nrec);
 	if (rootent == NULL) {
 		zero = 0;
 		memcpy(bp, &zero, sizeof(u_int32_t));
@@ -645,7 +649,7 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -692,6 +696,7 @@ __bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4)
 			printf("%#x ", c);
 	}
 	printf("\n");
+	printf("\tnrec: %lu\n", (u_long)argp->nrec);
 	printf("\trootent: ");
 	for (i = 0; i < argp->rootent.size; i++) {
 		c = ((char *)argp->rootent.data)[i];
@@ -704,7 +709,7 @@ __bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\trootlsn: [%lu][%lu]\n",
 	    (u_long)argp->rootlsn.file, (u_long)argp->rootlsn.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -719,7 +724,7 @@ __bam_rsplit_read(recbuf, argpp)
 	__bam_rsplit_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_rsplit_args *)malloc(sizeof(__bam_rsplit_args) +
+	argp = (__bam_rsplit_args *)__db_malloc(sizeof(__bam_rsplit_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -739,6 +744,8 @@ __bam_rsplit_read(recbuf, argpp)
 	bp += sizeof(u_int32_t);
 	argp->pgdbt.data = bp;
 	bp += argp->pgdbt.size;
+	memcpy(&argp->nrec, bp, sizeof(argp->nrec));
+	bp += sizeof(argp->nrec);
 	memcpy(&argp->rootent.size, bp, sizeof(u_int32_t));
 	bp += sizeof(u_int32_t);
 	argp->rootent.data = bp;
@@ -789,7 +796,7 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(indx)
 	    + sizeof(indx_copy)
 	    + sizeof(is_insert);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -821,7 +828,7 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -865,7 +872,7 @@ __bam_adj_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tindx_copy: %lu\n", (u_long)argp->indx_copy);
 	printf("\tis_insert: %lu\n", (u_long)argp->is_insert);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -880,7 +887,7 @@ __bam_adj_read(recbuf, argpp)
 	__bam_adj_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_adj_args *)malloc(sizeof(__bam_adj_args) +
+	argp = (__bam_adj_args *)__db_malloc(sizeof(__bam_adj_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -948,7 +955,7 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(indx)
 	    + sizeof(adjust)
 	    + sizeof(total);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -980,7 +987,7 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -1024,7 +1031,7 @@ __bam_cadjust_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tadjust: %ld\n", (long)argp->adjust);
 	printf("\ttotal: %ld\n", (long)argp->total);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -1039,7 +1046,7 @@ __bam_cadjust_read(recbuf, argpp)
 	__bam_cadjust_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_cadjust_args *)malloc(sizeof(__bam_cadjust_args) +
+	argp = (__bam_cadjust_args *)__db_malloc(sizeof(__bam_cadjust_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -1102,7 +1109,7 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(pgno)
 	    + sizeof(*lsn)
 	    + sizeof(indx);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -1130,7 +1137,7 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -1172,7 +1179,7 @@ __bam_cdel_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
 	printf("\tindx: %lu\n", (u_long)argp->indx);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -1187,7 +1194,7 @@ __bam_cdel_read(recbuf, argpp)
 	__bam_cdel_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_cdel_args *)malloc(sizeof(__bam_cdel_args) +
+	argp = (__bam_cdel_args *)__db_malloc(sizeof(__bam_cdel_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -1212,6 +1219,225 @@ __bam_cdel_read(recbuf, argpp)
 }
 
 /*
+ * PUBLIC: int __bam_repl_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, DBT *, DBT *, u_int32_t,
+ * PUBLIC:     u_int32_t));
+ */
+int __bam_repl_log(logp, txnid, ret_lsnp, flags,
+	fileid, pgno, lsn, indx, isdeleted, orig,
+	repl, prefix, suffix)
+	DB_LOG *logp;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	u_int32_t fileid;
+	db_pgno_t pgno;
+	DB_LSN * lsn;
+	u_int32_t indx;
+	u_int32_t isdeleted;
+	DBT *orig;
+	DBT *repl;
+	u_int32_t prefix;
+	u_int32_t suffix;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t zero;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_bam_repl;
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		null_lsn.file = 0;
+		null_lsn.offset = 0;
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(fileid)
+	    + sizeof(pgno)
+	    + sizeof(*lsn)
+	    + sizeof(indx)
+	    + sizeof(isdeleted)
+	    + sizeof(u_int32_t) + (orig == NULL ? 0 : orig->size)
+	    + sizeof(u_int32_t) + (repl == NULL ? 0 : repl->size)
+	    + sizeof(prefix)
+	    + sizeof(suffix);
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
+		return (ENOMEM);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(bp, &fileid, sizeof(fileid));
+	bp += sizeof(fileid);
+	memcpy(bp, &pgno, sizeof(pgno));
+	bp += sizeof(pgno);
+	if (lsn != NULL)
+		memcpy(bp, lsn, sizeof(*lsn));
+	else
+		memset(bp, 0, sizeof(*lsn));
+	bp += sizeof(*lsn);
+	memcpy(bp, &indx, sizeof(indx));
+	bp += sizeof(indx);
+	memcpy(bp, &isdeleted, sizeof(isdeleted));
+	bp += sizeof(isdeleted);
+	if (orig == NULL) {
+		zero = 0;
+		memcpy(bp, &zero, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else {
+		memcpy(bp, &orig->size, sizeof(orig->size));
+		bp += sizeof(orig->size);
+		memcpy(bp, orig->data, orig->size);
+		bp += orig->size;
+	}
+	if (repl == NULL) {
+		zero = 0;
+		memcpy(bp, &zero, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else {
+		memcpy(bp, &repl->size, sizeof(repl->size));
+		bp += sizeof(repl->size);
+		memcpy(bp, repl->data, repl->size);
+		bp += repl->size;
+	}
+	memcpy(bp, &prefix, sizeof(prefix));
+	bp += sizeof(prefix);
+	memcpy(bp, &suffix, sizeof(suffix));
+	bp += sizeof(suffix);
+#ifdef DEBUG
+	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+		fprintf(stderr, "Error in log record length");
+#endif
+	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	__db_free(logrec.data);
+	return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_repl_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__bam_repl_print(notused1, dbtp, lsnp, notused3, notused4)
+	DB_LOG *notused1;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	int notused3;
+	void *notused4;
+{
+	__bam_repl_args *argp;
+	u_int32_t i;
+	int c, ret;
+
+	i = 0;
+	c = 0;
+	notused1 = NULL;
+	notused3 = 0;
+	notused4 = NULL;
+
+	if ((ret = __bam_repl_read(dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]bam_repl: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %lu\n", (u_long)argp->fileid);
+	printf("\tpgno: %lu\n", (u_long)argp->pgno);
+	printf("\tlsn: [%lu][%lu]\n",
+	    (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
+	printf("\tindx: %lu\n", (u_long)argp->indx);
+	printf("\tisdeleted: %lu\n", (u_long)argp->isdeleted);
+	printf("\torig: ");
+	for (i = 0; i < argp->orig.size; i++) {
+		c = ((char *)argp->orig.data)[i];
+		if (isprint(c) || c == 0xa)
+			putchar(c);
+		else
+			printf("%#x ", c);
+	}
+	printf("\n");
+	printf("\trepl: ");
+	for (i = 0; i < argp->repl.size; i++) {
+		c = ((char *)argp->repl.data)[i];
+		if (isprint(c) || c == 0xa)
+			putchar(c);
+		else
+			printf("%#x ", c);
+	}
+	printf("\n");
+	printf("\tprefix: %lu\n", (u_long)argp->prefix);
+	printf("\tsuffix: %lu\n", (u_long)argp->suffix);
+	printf("\n");
+	__db_free(argp);
+	return (0);
+}
+
+/*
+ * PUBLIC: int __bam_repl_read __P((void *, __bam_repl_args **));
+ */
+int
+__bam_repl_read(recbuf, argpp)
+	void *recbuf;
+	__bam_repl_args **argpp;
+{
+	__bam_repl_args *argp;
+	u_int8_t *bp;
+
+	argp = (__bam_repl_args *)__db_malloc(sizeof(__bam_repl_args) +
+	    sizeof(DB_TXN));
+	if (argp == NULL)
+		return (ENOMEM);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+	bp += sizeof(argp->pgno);
+	memcpy(&argp->lsn, bp,  sizeof(argp->lsn));
+	bp += sizeof(argp->lsn);
+	memcpy(&argp->indx, bp, sizeof(argp->indx));
+	bp += sizeof(argp->indx);
+	memcpy(&argp->isdeleted, bp, sizeof(argp->isdeleted));
+	bp += sizeof(argp->isdeleted);
+	memcpy(&argp->orig.size, bp, sizeof(u_int32_t));
+	bp += sizeof(u_int32_t);
+	argp->orig.data = bp;
+	bp += argp->orig.size;
+	memcpy(&argp->repl.size, bp, sizeof(u_int32_t));
+	bp += sizeof(u_int32_t);
+	argp->repl.data = bp;
+	bp += argp->repl.size;
+	memcpy(&argp->prefix, bp, sizeof(argp->prefix));
+	bp += sizeof(argp->prefix);
+	memcpy(&argp->suffix, bp, sizeof(argp->suffix));
+	bp += sizeof(argp->suffix);
+	*argpp = argp;
+	return (0);
+}
+
+/*
  * PUBLIC: int __bam_init_print __P((DB_ENV *));
  */
 int
@@ -1241,6 +1467,9 @@ __bam_init_print(dbenv)
 	if ((ret = __db_add_recovery(dbenv,
 	    __bam_cdel_print, DB_bam_cdel)) != 0)
 		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __bam_repl_print, DB_bam_repl)) != 0)
+		return (ret);
 	return (0);
 }
 
@@ -1274,6 +1503,9 @@ __bam_init_recover(dbenv)
 	if ((ret = __db_add_recovery(dbenv,
 	    __bam_cdel_recover, DB_bam_cdel)) != 0)
 		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __bam_repl_recover, DB_bam_repl)) != 0)
+		return (ret);
 	return (0);
 }