about summary refs log tree commit diff
path: root/db2
diff options
context:
space:
mode:
Diffstat (limited to 'db2')
-rw-r--r--db2/Makefile4
-rw-r--r--db2/btree/bt_cursor.c152
-rw-r--r--db2/btree/bt_delete.c27
-rw-r--r--db2/btree/bt_open.c10
-rw-r--r--db2/btree/bt_put.c461
-rw-r--r--db2/btree/bt_rec.c116
-rw-r--r--db2/btree/bt_recno.c39
-rw-r--r--db2/btree/bt_search.c4
-rw-r--r--db2/btree/bt_split.c44
-rw-r--r--db2/btree/bt_stat.c11
-rw-r--r--db2/btree/btree.src25
-rw-r--r--db2/btree/btree_auto.c294
-rw-r--r--db2/common/db_appinit.c40
-rw-r--r--db2/common/db_apprec.c36
-rw-r--r--db2/common/db_byteorder.c14
-rw-r--r--db2/common/db_region.c103
-rw-r--r--db2/db.h135
-rw-r--r--db2/db/db.c30
-rw-r--r--db2/db/db.src10
-rw-r--r--db2/db/db_auto.c75
-rw-r--r--db2/db/db_dispatch.c14
-rw-r--r--db2/db/db_dup.c4
-rw-r--r--db2/db/db_overflow.c32
-rw-r--r--db2/db/db_pr.c9
-rw-r--r--db2/db/db_rec.c21
-rw-r--r--db2/db/db_ret.c8
-rw-r--r--db2/db/db_thread.c10
-rw-r--r--db2/db185/db185.c18
-rw-r--r--db2/db_int.h28
-rw-r--r--db2/hash/hash.c135
-rw-r--r--db2/hash/hash.src24
-rw-r--r--db2/hash/hash_auto.c274
-rw-r--r--db2/hash/hash_dup.c4
-rw-r--r--db2/hash/hash_page.c101
-rw-r--r--db2/hash/hash_rec.c146
-rw-r--r--db2/include/btree_auto.h19
-rw-r--r--db2/include/btree_ext.h17
-rw-r--r--db2/include/clib_ext.h2
-rw-r--r--db2/include/common_ext.h3
-rw-r--r--db2/include/db.h.src135
-rw-r--r--db2/include/db_am.h6
-rw-r--r--db2/include/db_auto.h1
-rw-r--r--db2/include/db_cxx.h19
-rw-r--r--db2/include/db_ext.h6
-rw-r--r--db2/include/db_int.h.src28
-rw-r--r--db2/include/hash.h6
-rw-r--r--db2/include/hash_auto.h18
-rw-r--r--db2/include/hash_ext.h15
-rw-r--r--db2/include/lock.h5
-rw-r--r--db2/include/lock_ext.h2
-rw-r--r--db2/include/log.h19
-rw-r--r--db2/include/log_ext.h2
-rw-r--r--db2/include/mp.h41
-rw-r--r--db2/include/mp_ext.h2
-rw-r--r--db2/include/mutex_ext.h4
-rw-r--r--db2/include/os_ext.h24
-rw-r--r--db2/include/os_func.h76
-rw-r--r--db2/include/txn.h8
-rw-r--r--db2/include/txn_ext.h2
-rw-r--r--db2/lock/lock.c16
-rw-r--r--db2/lock/lock_deadlock.c108
-rw-r--r--db2/log/log.c56
-rw-r--r--db2/log/log_archive.c38
-rw-r--r--db2/log/log_auto.c16
-rw-r--r--db2/log/log_findckp.c12
-rw-r--r--db2/log/log_get.c27
-rw-r--r--db2/log/log_put.c210
-rw-r--r--db2/log/log_rec.c12
-rw-r--r--db2/log/log_register.c12
-rw-r--r--db2/mp/mp_bh.c54
-rw-r--r--db2/mp/mp_fget.c41
-rw-r--r--db2/mp/mp_fopen.c34
-rw-r--r--db2/mp/mp_fput.c26
-rw-r--r--db2/mp/mp_fset.c18
-rw-r--r--db2/mp/mp_open.c9
-rw-r--r--db2/mp/mp_pr.c48
-rw-r--r--db2/mp/mp_region.c33
-rw-r--r--db2/mp/mp_sync.c393
-rw-r--r--db2/mutex/mutex.c21
-rw-r--r--db2/os/db_os_abs.c82
-rw-r--r--db2/os/db_os_dir.c138
-rw-r--r--db2/os/db_os_lseek.c60
-rw-r--r--db2/os/db_os_mmap.c106
-rw-r--r--db2/os/os_abs.c31
-rw-r--r--db2/os/os_dir.c100
-rw-r--r--db2/os/os_fid.c (renamed from db2/os/db_os_fid.c)55
-rw-r--r--db2/os/os_fsync.c34
-rw-r--r--db2/os/os_func.c153
-rw-r--r--db2/os/os_map.c71
-rw-r--r--db2/os/os_oflags.c48
-rw-r--r--db2/os/os_open.c (renamed from db2/os/db_os_open.c)58
-rw-r--r--db2/os/os_rpath.c42
-rw-r--r--db2/os/os_rw.c (renamed from db2/os/db_os_rw.c)7
-rw-r--r--db2/os/os_seek.c42
-rw-r--r--db2/os/os_sleep.c (renamed from db2/os/db_os_sleep.c)16
-rw-r--r--db2/os/os_stat.c (renamed from db2/os/db_os_stat.c)20
-rw-r--r--db2/os/os_unlink.c (renamed from db2/os/db_os_unlink.c)5
-rw-r--r--db2/progs/db_deadlock/db_deadlock.c24
-rw-r--r--db2/progs/db_dump/db_dump.c2
-rw-r--r--db2/progs/db_load/db_load.c8
-rw-r--r--db2/progs/db_recover/db_recover.c4
-rw-r--r--db2/progs/db_stat/db_stat.c182
-rw-r--r--db2/txn/txn.c31
-rw-r--r--db2/txn/txn_auto.c16
-rw-r--r--db2/txn/txn_rec.c6
105 files changed, 3593 insertions, 1850 deletions
diff --git a/db2/Makefile b/db2/Makefile
index e6b35aa51b..8e5cea7b17 100644
--- a/db2/Makefile
+++ b/db2/Makefile
@@ -58,8 +58,8 @@ libdb-routines := bt_close bt_compare bt_conv bt_cursor bt_delete \
 	bt_split bt_stat btree_auto db db_appinit db_apprec \
 	db_auto \
 	db_byteorder db_conv db_dispatch db_dup db_err db_log2 \
-	db_os_abs db_os_dir db_os_fid db_os_lseek db_os_mmap \
-	db_os_open db_os_rw db_os_sleep db_os_stat db_os_unlink \
+	os_abs os_dir os_fid os_fsync os_func os_map os_oflags \
+	os_open os_rpath os_rw os_seek os_sleep os_stat os_unlink \
 	db_overflow db_pr db_rec db_region db_ret db_salloc \
 	db_shash db_thread hash hash_auto hash_conv hash_debug \
 	hash_dup hash_func hash_page hash_rec hash_stat lock \
diff --git a/db2/btree/bt_cursor.c b/db2/btree/bt_cursor.c
index a1266bcd3c..e5f3faeb70 100644
--- a/db2/btree/bt_cursor.c
+++ b/db2/btree/bt_cursor.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_cursor.c	10.33 (Sleepycat) 9/24/97";
+static const char sccsid[] = "@(#)bt_cursor.c	10.35 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -62,10 +62,10 @@ __bam_cursor(dbp, txn, dbcp)
 
 	DEBUG_LWRITE(dbp, txn, "bam_cursor", NULL, NULL, 0);
 
-	if ((dbc = (DBC *)calloc(1, sizeof(DBC))) == NULL)
+	if ((dbc = (DBC *)__db_calloc(1, sizeof(DBC))) == NULL)
 		return (ENOMEM);
-	if ((cp = (CURSOR *)calloc(1, sizeof(CURSOR))) == NULL) {
-		free(dbc);
+	if ((cp = (CURSOR *)__db_calloc(1, sizeof(CURSOR))) == NULL) {
+		__db_free(dbc);
 		return (ENOMEM);
 	}
 
@@ -474,7 +474,7 @@ __bam_c_rget(dbp, cp, key, data, flags)
 	__bam_stkrel(dbp);
 
 err:	(void)memp_fput(dbp->mpf, cp->page, 0);
-	free(dbt.data);
+	__db_free(dbt.data);
 	return (ret);
 }
 
@@ -1422,7 +1422,7 @@ __bam_c_physdel(dbp, cp, h)
 	DB_LOCK lock;
 	db_indx_t indx;
 	db_pgno_t pgno, next_pgno, prev_pgno;
-	int local, ret;
+	int local, normal, ret;
 
 	t = dbp->internal;
 	ret = 0;
@@ -1457,51 +1457,65 @@ __bam_c_physdel(dbp, cp, h)
 		local = 0;
 
 	/*
-	 * If we're deleting a duplicate entry, call the common code to do
-	 * the work.
+	 * If we're deleting a duplicate entry and there are other duplicate
+	 * entries remaining, call the common code to do the work and fix up
+	 * the parent page as necessary.  Otherwise, do a normal btree delete.
+	 *
+	 * There are 5 possible cases:
+	 *
+	 * 1. It's not a duplicate item: do a normal btree delete.
+	 * 2. It's a duplicate item:
+	 *	2a: We delete an item from a page of duplicates, but there are
+	 *	    more items on the page.
+	 *      2b: We delete the last item from a page of duplicates, deleting
+	 *	    the last duplicate.
+	 *      2c: We delete the last item from a page of duplicates, but there
+	 *	    is a previous page of duplicates.
+	 *      2d: We delete the last item from a page of duplicates, but there
+	 *	    is a following page of duplicates.
+	 *
+	 * In the case of:
+	 *
+	 *  1: There's nothing further to do.
+	 * 2a: There's nothing further to do.
+	 * 2b: Do the normal btree delete instead of a duplicate delete, as
+	 *     that deletes both the duplicate chain and the parent page's
+	 *     entry.
+	 * 2c: There's nothing further to do.
+	 * 2d: Delete the duplicate, and update the parent page's entry.
 	 */
 	if (TYPE(h) == P_DUPLICATE) {
 		pgno = PGNO(h);
 		prev_pgno = PREV_PGNO(h);
 		next_pgno = NEXT_PGNO(h);
-		if ((ret = __db_drem(dbp, &h, indx, __bam_free)) != 0)
-			goto err;
 
-		/*
-		 * There are 4 cases:
-		 *
-		 * 1. We removed an item on a page, but there are more items
-		 *    on the page.
-		 * 2. We removed the last item on a page, removing the last
-		 *    duplicate.
-		 * 3. We removed the last item on a page, but there is a
-		 *    following page of duplicates.
-		 * 4. We removed the last item on a page, but there is a
-		 *    previous page of duplicates.
-		 *
-		 * In case 1, h != NULL, h->pgno == pgno
-		 * In case 2, h == NULL,
-		 *    prev_pgno == PGNO_INVALID, next_pgno == PGNO_INVALID
-		 * In case 3, h != NULL, next_pgno != PGNO_INVALID
-		 * In case 4, h == NULL, prev_pgno != PGNO_INVALID
-		 *
-		 * In case 1, there's nothing else to do.
-		 * In case 2, remove the entry from the parent page.
-		 * In case 3 or 4, if the deleted page was the first in a chain
-		 *    of duplicate pages, update the parent page's entry.
-		 *
-		 * Test:
-		 *	If there were previous pages of duplicates or we didn't
-		 *	empty the current page of duplicates, we don't need to
-		 *	touch the parent page.
-		 */
-		if (prev_pgno != PGNO_INVALID || (h != NULL && pgno == h->pgno))
-			goto done;
+		if (NUM_ENT(h) == 1 &&
+		    prev_pgno == PGNO_INVALID && next_pgno == PGNO_INVALID)
+			normal = 1;
+		else {
+			normal = 0;
 
-		/*
-		 * Release any page we're holding and the lock on the deleted
-		 * page.
-		 */
+			/* Delete the duplicate. */
+			if ((ret = __db_drem(dbp, &h, indx, __bam_free)) != 0)
+				goto err;
+
+			/*
+			 * 2a: h != NULL, h->pgno == pgno
+			 * 2b: We don't reach this clause, as the above test
+			 *     was true.
+			 * 2c: h == NULL, prev_pgno != PGNO_INVALID
+			 * 2d: h != NULL, next_pgno != PGNO_INVALID
+			 *
+			 * Test for 2a and 2c: if we didn't empty the current
+			 * page or there was a previous page of duplicates, we
+			 * don't need to touch the parent page.
+			 */
+			if ((h != NULL && pgno == h->pgno) ||
+			    prev_pgno != PGNO_INVALID)
+				goto done;
+		}
+
+		/* Release any page we're holding and its lock. */
 		if (local) {
 			if (h != NULL)
 				(void)memp_fput(dbp->mpf, h, 0);
@@ -1519,37 +1533,33 @@ __bam_c_physdel(dbp, cp, h)
 		}
 		local = 1;
 
-		/*
-		 * If we deleted the last duplicate, we can fall out and do a
-		 * normal btree delete in the context of the parent page.  If
-		 * not, we have to update the parent's page.
-		 */
+		/* Switch to the parent page's entry. */
 		indx = cp->indx;
-		if (next_pgno != PGNO_INVALID) {
-			/*
-			 * Copy, delete, update and re-insert the parent page's
-			 * entry.
-			 */
-			bo = *GET_BOVERFLOW(h, indx);
-			(void)__db_ditem(dbp, h, indx, BOVERFLOW_SIZE);
-			bo.pgno = next_pgno;
-			memset(&dbt, 0, sizeof(dbt));
-			dbt.data = &bo;
-			dbt.size = BOVERFLOW_SIZE;
-			(void)__db_pitem(dbp,
-			    h, indx, BOVERFLOW_SIZE, &dbt, NULL);
-
-			/* Discard the parent page. */
-			(void)memp_fput(dbp->mpf, h, 0);
-			(void)__BT_TLPUT(dbp, lock);
-			local = 0;
+		if (normal)
+			goto btd;
 
-			goto done;
-		}
+		/*
+		 * Copy, delete, update, add-back the parent page's data entry.
+		 *
+		 * XXX
+		 * This may be a performance/logging problem.  We should add a
+		 * log message which simply logs/updates a random set of bytes
+		 * on a page, and use it instead of doing a delete/add pair.
+		 */
+		indx += O_INDX;
+		bo = *GET_BOVERFLOW(h, indx);
+		(void)__db_ditem(dbp, h, indx, BOVERFLOW_SIZE);
+		bo.pgno = next_pgno;
+		memset(&dbt, 0, sizeof(dbt));
+		dbt.data = &bo;
+		dbt.size = BOVERFLOW_SIZE;
+		(void)__db_pitem(dbp, h, indx, BOVERFLOW_SIZE, &dbt, NULL);
+		(void)memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
+		goto done;
 	}
 
 	/* Otherwise, do a normal btree delete. */
-	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+btd:	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
 		goto err;
 	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
 		goto err;
@@ -1584,7 +1594,7 @@ __bam_c_physdel(dbp, cp, h)
 		}
 
 		ret = __bam_dpage(dbp, &dbt);
-		free(dbt.data);
+		__db_free(dbt.data);
 	}
 
 err:
diff --git a/db2/btree/bt_delete.c b/db2/btree/bt_delete.c
index 98929540e4..9593d0109c 100644
--- a/db2/btree/bt_delete.c
+++ b/db2/btree/bt_delete.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_delete.c	10.21 (Sleepycat) 9/3/97";
+static const char sccsid[] = "@(#)bt_delete.c	10.22 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -266,9 +266,10 @@ __bam_ditem(dbp, h, indx)
 		case B_DUPLICATE:
 		case B_OVERFLOW:
 			nbytes = BINTERNAL_SIZE(bi->len);
+			bo = (BOVERFLOW *)bi->data;
 			goto offpage;
 		case B_KEYDATA:
-			nbytes = BKEYDATA_SIZE(bi->len);
+			nbytes = BINTERNAL_SIZE(bi->len);
 			break;
 		default:
 			return (__db_pgfmt(dbp, h->pgno));
@@ -289,7 +290,7 @@ __bam_ditem(dbp, h, indx)
 			if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
 				return (__bam_adjindx(dbp,
 				    h, indx, indx - P_INDX, 0));
-			if (indx < (u_int32_t)(NUM_ENT(h) - P_INDX) &&
+			if (indx + P_INDX < (u_int32_t)NUM_ENT(h) &&
 			    h->inp[indx] == h->inp[indx + P_INDX])
 				return (__bam_adjindx(dbp,
 				    h, indx, indx + O_INDX, 0));
@@ -301,9 +302,9 @@ __bam_ditem(dbp, h, indx)
 		case B_DUPLICATE:
 		case B_OVERFLOW:
 			nbytes = BOVERFLOW_SIZE;
+			bo = GET_BOVERFLOW(h, indx);
 
 offpage:		/* Delete duplicate/offpage chains. */
-			bo = GET_BOVERFLOW(h, indx);
 			if (B_TYPE(bo->type) == B_DUPLICATE) {
 				if ((ret =
 				    __db_ddup(dbp, bo->pgno, __bam_free)) != 0)
@@ -523,7 +524,7 @@ __bam_dpages(dbp, t)
 
 	/*
 	 * If we deleted the next-to-last item from the root page, the tree
-	 * has collapsed a level.  Try and write lock the remaining root + 1
+	 * can collapse a level.  Try and write lock the remaining root + 1
 	 * page and copy it onto the root page.  If we can't get the lock,
 	 * that's okay, the tree just stays a level deeper than we'd like.
 	 */
@@ -546,8 +547,8 @@ __bam_dpages(dbp, t)
 			b.data = P_ENTRY(epg->page, 0);
 			b.size = BINTERNAL_SIZE(((BINTERNAL *)b.data)->len);
 			__bam_rsplit_log(dbp->dbenv->lg_info, dbp->txn,
-			   &h->lsn, 0, dbp->log_fileid, h->pgno, &a, &b,
-			   &epg->page->lsn);
+			   &h->lsn, 0, dbp->log_fileid, h->pgno, &a,
+			   RE_NREC(epg->page), &b, &epg->page->lsn);
 		}
 
 		/*
@@ -565,15 +566,19 @@ __bam_dpages(dbp, t)
 		if (TYPE(h) == P_IRECNO ||
 		    (TYPE(h) == P_IBTREE && F_ISSET(dbp, DB_BT_RECNUM)))
 			RE_NREC_SET(epg->page, rcnt);
+		(void)memp_fset(dbp->mpf, epg->page, DB_MPOOL_DIRTY);
 
-		/* Free the last page in that level of the btree. */
-		++t->lstat.bt_freed;
+		/*
+		 * Free the last page in that level of the btree and discard
+		 * the lock.  (The call to __bam_free discards our reference
+		 * to the page.)
+		 */
 		(void)__bam_free(dbp, h);
+		(void)__BT_TLPUT(dbp, lock);
+		++t->lstat.bt_freed;
 
 		/* Adjust the cursors. */
 		__bam_ca_move(dbp, t, h->pgno, PGNO_ROOT);
-
-		(void)__BT_TLPUT(dbp, lock);
 	}
 
 	/* Release the top page in the subtree. */
diff --git a/db2/btree/bt_open.c b/db2/btree/bt_open.c
index 354888c6c2..2361f69a3e 100644
--- a/db2/btree/bt_open.c
+++ b/db2/btree/bt_open.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_open.c	10.20 (Sleepycat) 8/19/97";
+static const char sccsid[] = "@(#)bt_open.c	10.21 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 /*
@@ -95,7 +95,7 @@ __bam_open(dbp, type, dbinfo)
 	int ret;
 
 	/* Allocate the btree internal structure. */
-	if ((t = (BTREE *)calloc(1, sizeof(BTREE))) == NULL)
+	if ((t = (BTREE *)__db_calloc(1, sizeof(BTREE))) == NULL)
 		return (ENOMEM);
 
 	t->bt_sp = t->bt_csp = t->bt_stack;
@@ -179,7 +179,7 @@ einval:	ret = EINVAL;
 err:	if (t != NULL) {
 		/* If we allocated room for key/data return, discard it. */
 		if (t->bt_rkey.data != NULL)
-			free(t->bt_rkey.data);
+			__db_free(t->bt_rkey.data);
 
 		FREE(t, sizeof(BTREE));
 	}
@@ -201,7 +201,7 @@ __bam_bdup(orig, new)
 
 	ot = orig->internal;
 
-	if ((t = (BTREE *)calloc(1, sizeof(*t))) == NULL)
+	if ((t = (BTREE *)__db_calloc(1, sizeof(*t))) == NULL)
 		return (ENOMEM);
 
 	/*
@@ -248,7 +248,7 @@ __bam_keyalloc(t)
 	 * Recno keys are always the same size, and we don't want to have
 	 * to check for space on each return.  Allocate it now.
 	 */
-	if ((t->bt_rkey.data = (void *)malloc(sizeof(db_recno_t))) == NULL)
+	if ((t->bt_rkey.data = (void *)__db_malloc(sizeof(db_recno_t))) == NULL)
 		return (ENOMEM);
 	t->bt_rkey.ulen = sizeof(db_recno_t);
 	return (0);
diff --git a/db2/btree/bt_put.c b/db2/btree/bt_put.c
index af09f76d41..b3d775bb0f 100644
--- a/db2/btree/bt_put.c
+++ b/db2/btree/bt_put.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_put.c	10.25 (Sleepycat) 9/17/97";
+static const char sccsid[] = "@(#)bt_put.c	10.31 (Sleepycat) 10/26/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -66,7 +66,10 @@ static const char sccsid[] = "@(#)bt_put.c	10.25 (Sleepycat) 9/17/97";
 static int __bam_fixed __P((BTREE *, DBT *));
 static int __bam_lookup __P((DB *, DBT *, int *));
 static int __bam_ndup __P((DB *, PAGE *, u_int32_t));
-static int __bam_partial __P((DB *, DBT *, PAGE *, u_int32_t));
+static int __bam_ovput __P((DB *, PAGE *, u_int32_t, DBT *));
+static int __bam_partial __P((DB *, DBT *, PAGE *, u_int32_t, u_int32_t));
+static u_int32_t
+	   __bam_partsize __P((DB *, DBT *, PAGE *, u_int32_t));
 
 /*
  * __bam_put --
@@ -334,21 +337,6 @@ slow:	return (__bam_search(dbp, key, S_INSERT, 1, NULL, exactp));
 }
 
 /*
- * OVPUT --
- *	Copy an overflow item onto a page.
- */
-#undef	OVPUT
-#define	OVPUT(h, indx, bo) do {						\
-	DBT __hdr;							\
-	memset(&__hdr, 0, sizeof(__hdr));				\
-	__hdr.data = &bo;						\
-	__hdr.size = BOVERFLOW_SIZE;					\
-	if ((ret = __db_pitem(dbp,					\
-	    h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0)		\
-		return (ret);						\
-} while (0)
-
-/*
  * __bam_iitem --
  *	Insert an item into the tree.
  *
@@ -365,19 +353,18 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 {
 	BTREE *t;
 	BKEYDATA *bk;
-	BOVERFLOW kbo, dbo;
 	DBT tdbt;
 	PAGE *h;
 	db_indx_t indx;
-	u_int32_t have_bytes, need_bytes, needed;
-	int bigkey, bigdata, dcopy, dupadjust, ret;
+	u_int32_t data_size, have_bytes, need_bytes, needed;
+	int bigkey, bigdata, dupadjust, replace, ret;
 
 	t = dbp->internal;
 	h = *hp;
 	indx = *indxp;
 
-	dupadjust = 0;
 	bk = NULL;			/* XXX: Shut the compiler up. */
+	dupadjust = replace = 0;
 
 	/*
 	 * If it's a page of duplicates, call the common code to do the work.
@@ -385,7 +372,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 	 * !!!
 	 * Here's where the hp and indxp are important.  The duplicate code
 	 * may decide to rework/rearrange the pages and indices we're using,
-	 * so the caller must understand that the stack has to change.
+	 * so the caller must understand that the page stack may change.
 	 */
 	if (TYPE(h) == P_DUPLICATE) {
 		/* Adjust the index for the new item if it's a DB_AFTER op. */
@@ -401,24 +388,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 		return (__db_dput(dbp, data, hp, indxp, __bam_new));
 	}
 
-	/*
-	 * XXX
-	 * Handle partial puts.
-	 *
-	 * This is truly awful from a performance standput.  We don't optimize
-	 * for partial puts at all, we delete the record and add it back in,
-	 * regardless of size or if we're simply overwriting current data.
-	 * The hash access method does this a lot better than we do, and we're
-	 * eventually going to have to fix it.
-	 */
-	if (F_ISSET(data, DB_DBT_PARTIAL)) {
-		tdbt = *data;
-		if ((ret = __bam_partial(dbp, &tdbt, h, indx)) != 0)
-			return (ret);
-		data = &tdbt;
-	}
-
-	/* If it's a short fixed-length record, fix it up. */
+	/* Handle fixed-length records: build the real record. */
 	if (F_ISSET(dbp, DB_RE_FIXEDLEN) && data->size != t->bt_recno->re_len) {
 		tdbt = *data;
 		if ((ret = __bam_fixed(t, &tdbt)) != 0)
@@ -427,30 +397,15 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 	}
 
 	/*
-	 * If the key or data item won't fit on a page, store it in the
-	 * overflow pages.
-	 *
-	 * !!!
-	 * From this point on, we have to recover the allocated overflow
-	 * pages on error.
+	 * Figure out how much space the data will take, including if it's a
+	 * partial record.  If either of the key or data items won't fit on
+	 * a page, we'll have to store them on overflow pages.
 	 */
-	bigkey = bigdata = 0;
-	if (LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize) {
-		B_TSET(kbo.type, B_OVERFLOW, 0);
-		kbo.tlen = key->size;
-		if ((ret = __db_poff(dbp, key, &kbo.pgno, __bam_new)) != 0)
-			goto err;
-		bigkey = 1;
-	}
-	if (data->size > t->bt_ovflsize) {
-		B_TSET(dbo.type, B_OVERFLOW, 0);
-		dbo.tlen = data->size;
-		if ((ret = __db_poff(dbp, data, &dbo.pgno, __bam_new)) != 0)
-			goto err;
-		bigdata = 1;
-	}
+	bigkey = LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize;
+	data_size = F_ISSET(data, DB_DBT_PARTIAL) ?
+	    __bam_partsize(dbp, data, h, indx) : data->size;
+	bigdata = data_size > t->bt_ovflsize;
 
-	dcopy = 0;
 	needed = 0;
 	if (LF_ISSET(BI_NEWKEY)) {
 		/* If BI_NEWKEY is set we're adding a new key and data pair. */
@@ -461,7 +416,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 		if (bigdata)
 			needed += BOVERFLOW_PSIZE;
 		else
-			needed += BKEYDATA_PSIZE(data->size);
+			needed += BKEYDATA_PSIZE(data_size);
 	} else {
 		/*
 		 * We're either overwriting the data item of a key/data pair
@@ -482,16 +437,8 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 		if (bigdata)
 			need_bytes += BOVERFLOW_PSIZE;
 		else
-			need_bytes += BKEYDATA_PSIZE(data->size);
+			need_bytes += BKEYDATA_PSIZE(data_size);
 
-		/*
-		 * If we're overwriting a data item, we copy it if it's not a
-		 * special record type and it's the same size (including any
-		 * alignment) and do a delete/insert otherwise.
-		 */
-		if (op == DB_CURRENT && !bigdata &&
-		    B_TYPE(bk->type) == B_KEYDATA && have_bytes == need_bytes)
-			dcopy = 1;
 		if (have_bytes < need_bytes)
 			needed += need_bytes - have_bytes;
 	}
@@ -505,9 +452,15 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 	 * check in the btree split code, so we don't undo it there!?!?
 	 */
 	if (P_FREESPACE(h) < needed ||
-	    (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey)) {
-		ret = DB_NEEDSPLIT;
-		goto err;
+	    (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey))
+		return (DB_NEEDSPLIT);
+
+	/* Handle partial puts: build the real record. */
+	if (F_ISSET(data, DB_DBT_PARTIAL)) {
+		tdbt = *data;
+		if ((ret = __bam_partial(dbp, &tdbt, h, indx, data_size)) != 0)
+			return (ret);
+		data = &tdbt;
 	}
 
 	/*
@@ -515,10 +468,10 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 	 *
 	 * 1. Append a new key/data pair.
 	 * 2. Insert a new key/data pair.
-	 * 3. Copy the data item.
-	 * 4. Delete/insert the data item.
-	 * 5. Append a new data item.
-	 * 6. Insert a new data item.
+	 * 3. Append a new data item (a new duplicate).
+	 * 4. Insert a new data item (a new duplicate).
+	 * 5. Overflow item: delete and re-add the data item.
+	 * 6. Replace the data item.
 	 */
 	if (LF_ISSET(BI_NEWKEY)) {
 		switch (op) {
@@ -533,42 +486,17 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 		}
 
 		/* Add the key. */
-		if (bigkey)
-			OVPUT(h, indx, kbo);
-		else {
-			DBT __data;
-			memset(&__data, 0, sizeof(__data));
-			__data.data = key->data;
-			__data.size = key->size;
+		if (bigkey) {
+			if ((ret = __bam_ovput(dbp, h, indx, key)) != 0)
+				return (ret);
+		} else
 			if ((ret = __db_pitem(dbp, h, indx,
-			    BKEYDATA_SIZE(key->size), NULL, &__data)) != 0)
-				goto err;
-		}
+			    BKEYDATA_SIZE(key->size), NULL, key)) != 0)
+				return (ret);
 		++indx;
 	} else {
 		switch (op) {
-		case DB_CURRENT:	/* 3. Copy the data item. */
-			/*
-			 * If we're not logging and it's possible, overwrite
-			 * the current item.
-			 *
-			 * XXX
-			 * We should add a separate logging message so that
-			 * we can do this anytime it's possible, including
-			 * for partial record puts.
-			 */
-			if (dcopy && !DB_LOGGING(dbp)) {
-				bk->len = data->size;
-				memcpy(bk->data, data->data, data->size);
-				goto done;
-			}
-					/* 4. Delete/insert the data item. */
-			if (TYPE(h) == P_LBTREE)
-				++indx;
-			if ((ret = __bam_ditem(dbp, h, indx)) != 0)
-				goto err;
-			break;
-		case DB_AFTER:		/* 5. Append a new data item. */
+		case DB_AFTER:		/* 3. Append a new data item. */
 			if (TYPE(h) == P_LBTREE) {
 				/*
 				 * Adjust the cursor and copy in the key for
@@ -576,7 +504,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 				 */
 				if ((ret = __bam_adjindx(dbp,
 				    h, indx + P_INDX, indx, 1)) != 0)
-					goto err;
+					return (ret);
 
 				indx += 3;
 				dupadjust = 1;
@@ -589,7 +517,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 				*indxp += 1;
 			}
 			break;
-		case DB_BEFORE:		/* 6. Insert a new data item. */
+		case DB_BEFORE:		/* 4. Insert a new data item. */
 			if (TYPE(h) == P_LBTREE) {
 				/*
 				 * Adjust the cursor and copy in the key for
@@ -597,43 +525,62 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 				 */
 				if ((ret =
 				    __bam_adjindx(dbp, h, indx, indx, 1)) != 0)
-					goto err;
+					return (ret);
 
 				++indx;
 				dupadjust = 1;
 			} else
 				__bam_ca_di(dbp, h->pgno, indx, 1);
 			break;
+		case DB_CURRENT:
+			if (TYPE(h) == P_LBTREE)
+				++indx;
+
+			/*
+			 * 5. Delete/re-add the data item.
+			 *
+			 * If we're dealing with offpage items, we have to 
+			 * delete and then re-add the item.
+			 */
+			if (bigdata || B_TYPE(bk->type) == B_OVERFLOW) {
+				if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+					return (ret);
+				break;
+			}
+
+			/* 6. Replace the data item. */
+			replace = 1;
+			break;
 		default:
 			abort();
 		}
 	}
 
 	/* Add the data. */
-	if (bigdata)
-		OVPUT(h, indx, dbo);
-	else {
+	if (bigdata) {
+		if ((ret = __bam_ovput(dbp, h, indx, data)) != 0)
+			return (ret);
+	} else {
 		BKEYDATA __bk;
-		DBT __hdr, __data;
-		memset(&__data, 0, sizeof(__data));
-		__data.data = data->data;
-		__data.size = data->size;
+		DBT __hdr;
 
 		if (LF_ISSET(BI_DELETED)) {
 			B_TSET(__bk.type, B_KEYDATA, 1);
-			__bk.len = __data.size;
+			__bk.len = data->size;
 			__hdr.data = &__bk;
 			__hdr.size = SSZA(BKEYDATA, data);
 			ret = __db_pitem(dbp, h, indx,
-			    BKEYDATA_SIZE(__data.size), &__hdr, &__data);
-		} else
+			    BKEYDATA_SIZE(data->size), &__hdr, data);
+		} else if (replace)
+			ret = __bam_ritem(dbp, h, indx, data);
+		else
 			ret = __db_pitem(dbp, h, indx,
-			    BKEYDATA_SIZE(data->size), NULL, &__data);
+			    BKEYDATA_SIZE(data->size), NULL, data);
 		if (ret != 0)
-			goto err;
+			return (ret);
 	}
 
-done:	++t->lstat.bt_added;
+	++t->lstat.bt_added;
 
 	ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
 
@@ -645,22 +592,206 @@ done:	++t->lstat.bt_added;
 	if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) {
 		--indx;
 		if ((ret = __bam_ndup(dbp, h, indx)) != 0)
-			goto err;
+			return (ret);
 	}
 
 	if (t->bt_recno != NULL)
 		F_SET(t->bt_recno, RECNO_MODIFIED);
 
-	if (0) {
-err:		if (bigkey)
-			(void)__db_doff(dbp, kbo.pgno, __bam_free);
-		if (bigdata)
-			(void)__db_doff(dbp, dbo.pgno, __bam_free);
-	}
 	return (ret);
 }
 
 /*
+ * __bam_partsize --
+ *	Figure out how much space a partial data item is in total.
+ */
+static u_int32_t
+__bam_partsize(dbp, data, h, indx)
+	DB *dbp;
+	DBT *data;
+	PAGE *h;
+	u_int32_t indx;
+{
+	BKEYDATA *bk;
+	u_int32_t nbytes;
+
+	/*
+	 * Figure out how much total space we'll need.  If the record doesn't
+	 * already exist, it's simply the data we're provided.
+	 */
+	if (indx >= NUM_ENT(h))
+		return (data->doff + data->size);
+
+	/*
+	 * Otherwise, it's the data provided plus any already existing data
+	 * that we're not replacing.
+	 */
+	bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
+	nbytes =
+	    B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len;
+
+	/*
+	 * There are really two cases here:
+	 *
+	 * Case 1: We are replacing some bytes that do not exist (i.e., they
+	 * are past the end of the record).  In this case the number of bytes
+	 * we are replacing is irrelevant and all we care about is how many
+	 * bytes we are going to add from offset.  So, the new record length
+	 * is going to be the size of the new bytes (size) plus wherever those
+	 * new bytes begin (doff).
+	 *
+	 * Case 2: All the bytes we are replacing exist.  Therefore, the new
+	 * size is the oldsize (nbytes) minus the bytes we are replacing (dlen)
+	 * plus the bytes we are adding (size).
+	 */
+	if (nbytes < data->doff + data->dlen)		/* Case 1 */
+		return (data->doff + data->size);
+
+	return (nbytes + data->size - data->dlen);	/* Case 2 */
+}
+
+/*
+ * OVPUT --
+ *	Copy an overflow item onto a page.
+ */
+#undef	OVPUT
+#define	OVPUT(h, indx, bo) do {						\
+	DBT __hdr;							\
+	memset(&__hdr, 0, sizeof(__hdr));				\
+	__hdr.data = &bo;						\
+	__hdr.size = BOVERFLOW_SIZE;					\
+	if ((ret = __db_pitem(dbp,					\
+	    h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0)		\
+		return (ret);						\
+} while (0)
+
+/*
+ * __bam_ovput --
+ *	Build an overflow item and put it on the page.
+ */
+static int
+__bam_ovput(dbp, h, indx, item)
+	DB *dbp;
+	PAGE *h;
+	u_int32_t indx;
+	DBT *item;
+{
+	BOVERFLOW bo;
+	int ret;
+
+	B_TSET(bo.type, B_OVERFLOW, 0);
+	bo.tlen = item->size;
+	if ((ret = __db_poff(dbp, item, &bo.pgno, __bam_new)) != 0)
+		return (ret);
+
+	OVPUT(h, indx, bo);
+
+	return (0);
+}
+
+/*
+ * __bam_ritem --
+ *	Replace an item on a page.
+ *
+ * PUBLIC: int __bam_ritem __P((DB *, PAGE *, u_int32_t, DBT *));
+ */
+int
+__bam_ritem(dbp, h, indx, data)
+	DB *dbp;
+	PAGE *h;
+	u_int32_t indx;
+	DBT *data;
+{
+	BKEYDATA *bk;
+	DBT orig, repl;
+	db_indx_t lo, ln, min, off, prefix, suffix;
+	int32_t nbytes;
+	int cnt, ret;
+	u_int8_t *p, *t;
+
+	/*
+	 * Replace a single item onto a page.  The logic figuring out where
+	 * to insert and whether it fits is handled in the caller.  All we do
+	 * here is manage the page shuffling.
+	 */
+	bk = GET_BKEYDATA(h, indx);
+
+	/* Log the change. */
+	if (DB_LOGGING(dbp)) {
+		/*
+		 * We might as well check to see if the two data items share
+		 * a common prefix and suffix -- it can save us a lot of log
+		 * message if they're large.
+		 */
+		min = data->size < bk->len ? data->size : bk->len;
+		for (prefix = 0,
+		    p = bk->data, t = data->data;
+		    prefix < min && *p == *t; ++prefix, ++p, ++t)
+			;
+
+		min -= prefix;
+		for (suffix = 0,
+		    p = (u_int8_t *)bk->data + bk->len - 1,
+		    t = (u_int8_t *)data->data + data->size - 1;
+		    suffix < min && *p == *t; ++suffix, --p, --t)
+			;
+
+		/* We only log the parts of the keys that have changed. */
+		orig.data = (u_int8_t *)bk->data + prefix;
+		orig.size = bk->len - (prefix + suffix);
+		repl.data = (u_int8_t *)data->data + prefix;
+		repl.size = data->size - (prefix + suffix);
+		if ((ret = __bam_repl_log(dbp->dbenv->lg_info, dbp->txn,
+		    &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h),
+		    (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type),
+		    &orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0)
+			return (ret);
+	}
+
+	/*
+	 * Set references to the first in-use byte on the page and the
+	 * first byte of the item being replaced.
+	 */
+	p = (u_int8_t *)h + HOFFSET(h);
+	t = (u_int8_t *)bk;
+
+	/*
+	 * If the entry is growing in size, shift the beginning of the data
+	 * part of the page down.  If the entry is shrinking in size, shift
+	 * the beginning of the data part of the page up.  Use memmove(3),
+	 * the regions overlap.
+	 */
+	lo = BKEYDATA_SIZE(bk->len);
+	ln = BKEYDATA_SIZE(data->size);
+	if (lo != ln) {
+		nbytes = lo - ln;		/* Signed difference. */
+		if (p == t)			/* First index is fast. */
+			h->inp[indx] += nbytes;
+		else {				/* Else, shift the page. */
+			memmove(p + nbytes, p, t - p);
+
+			/* Adjust the indices' offsets. */
+			off = h->inp[indx];
+			for (cnt = 0; cnt < NUM_ENT(h); ++cnt)
+				if (h->inp[cnt] <= off)
+					h->inp[cnt] += nbytes;
+		}
+
+		/* Clean up the page and adjust the item's reference. */
+		HOFFSET(h) += nbytes;
+		t += nbytes;
+	}
+
+	/* Copy the new item onto the page. */
+	bk = (BKEYDATA *)t;
+	B_TSET(bk->type, B_KEYDATA, 0);
+	bk->len = data->size;
+	memcpy(bk->data, data->data, data->size);
+
+	return (0);
+}
+
+/*
  * __bam_ndup --
  *	Check to see if the duplicate set at indx should have its own page.
  *	If it should, create it.
@@ -766,16 +897,21 @@ __bam_fixed(t, dbt)
 	rp = t->bt_recno;
 
 	/*
-	 * If using fixed-length records, and the record is long, return
-	 * EINVAL.  If it's short, pad it out.  Use the record data return
-	 * memory, it's only short-term.
+	 * If database contains fixed-length records, and the record is long,
+	 * return EINVAL.
 	 */
 	if (dbt->size > rp->re_len)
 		return (EINVAL);
+
+	/*
+	 * The caller checked to see if it was just right, so we know it's
+	 * short.  Pad it out.  We use the record data return memory, it's
+	 * only a short-term use.
+	 */
 	if (t->bt_rdata.ulen < rp->re_len) {
 		t->bt_rdata.data = t->bt_rdata.data == NULL ?
-		    (void *)malloc(rp->re_len) :
-		    (void *)realloc(t->bt_rdata.data, rp->re_len);
+		    (void *)__db_malloc(rp->re_len) :
+		    (void *)__db_realloc(t->bt_rdata.data, rp->re_len);
 		if (t->bt_rdata.data == NULL) {
 			t->bt_rdata.ulen = 0;
 			return (ENOMEM);
@@ -786,12 +922,16 @@ __bam_fixed(t, dbt)
 	memset((u_int8_t *)t->bt_rdata.data + dbt->size,
 	    rp->re_pad, rp->re_len - dbt->size);
 
-	/* Set the DBT to reference our new record. */
+	/*
+	 * Clean up our flags and other information just in case, and
+	 * change the caller's DBT to reference our created record.
+	 */
 	t->bt_rdata.size = rp->re_len;
 	t->bt_rdata.dlen = 0;
 	t->bt_rdata.doff = 0;
 	t->bt_rdata.flags = 0;
 	*dbt = t->bt_rdata;
+
 	return (0);
 }
 
@@ -800,47 +940,28 @@ __bam_fixed(t, dbt)
  *	Build the real record for a partial put.
  */
 static int
-__bam_partial(dbp, dbt, h, indx)
+__bam_partial(dbp, dbt, h, indx, nbytes)
 	DB *dbp;
 	DBT *dbt;
 	PAGE *h;
-	u_int32_t indx;
+	u_int32_t indx, nbytes;
 {
 	BTREE *t;
 	BKEYDATA *bk, tbk;
 	BOVERFLOW *bo;
 	DBT copy;
-	u_int32_t len, nbytes, tlen;
+	u_int32_t len, tlen;
 	int ret;
 	u_int8_t *p;
 
 	bo = NULL;			/* XXX: Shut the compiler up. */
 	t = dbp->internal;
 
-	/*
-	 * Figure out how much total space we'll need.  Worst case is where
-	 * the record is 0 bytes long, in which case doff causes the record
-	 * to extend, and the put data is appended to it.
-	 */
-	if (indx < NUM_ENT(h)) {
-		bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
-		if (B_TYPE(bk->type) == B_OVERFLOW) {
-			bo = (BOVERFLOW *)bk;
-			nbytes = bo->tlen;
-		} else
-			nbytes = bk->len;
-	} else {
-		bk = &tbk;
-		B_TSET(bk->type, B_KEYDATA, 0);
-		nbytes = bk->len = 0;
-	}
-	nbytes += dbt->doff + dbt->size + dbt->dlen;
-
-	/* Allocate the space. */
+	/* We use the record data return memory, it's only a short-term use. */
 	if (t->bt_rdata.ulen < nbytes) {
 		t->bt_rdata.data = t->bt_rdata.data == NULL ?
-		    (void *)malloc(nbytes) :
-		    (void *)realloc(t->bt_rdata.data, nbytes);
+		    (void *)__db_malloc(nbytes) :
+		    (void *)__db_realloc(t->bt_rdata.data, nbytes);
 		if (t->bt_rdata.data == NULL) {
 			t->bt_rdata.ulen = 0;
 			return (ENOMEM);
@@ -848,6 +969,16 @@ __bam_partial(dbp, dbt, h, indx)
 		t->bt_rdata.ulen = nbytes;
 	}
 
+	/* Find the current record. */
+	if (indx < NUM_ENT(h)) {
+		bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
+		bo = (BOVERFLOW *)bk;
+	} else {
+		bk = &tbk;
+		B_TSET(bk->type, B_KEYDATA, 0);
+		bk->len = 0;
+	}
+
 	/* We use nul bytes for extending the record, get it over with. */
 	memset(t->bt_rdata.data, 0, nbytes);
 
diff --git a/db2/btree/bt_rec.c b/db2/btree/bt_rec.c
index 9aeb395f27..c0b7c8ae4c 100644
--- a/db2/btree/bt_rec.c
+++ b/db2/btree/bt_rec.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_rec.c	10.14 (Sleepycat) 9/6/97";
+static const char sccsid[] = "@(#)bt_rec.c	10.17 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -298,8 +298,8 @@ __bam_split_recover(logp, dbtp, lsnp, redo, info)
 			goto done;
 
 		/* Allocate and initialize new left/right child pages. */
-		if ((_lp = (PAGE *)malloc(file_dbp->pgsize)) == NULL ||
-		    (_rp = (PAGE *)malloc(file_dbp->pgsize)) == NULL) {
+		if ((_lp = (PAGE *)__db_malloc(file_dbp->pgsize)) == NULL ||
+		    (_rp = (PAGE *)__db_malloc(file_dbp->pgsize)) == NULL) {
 			ret = ENOMEM;
 			__db_err(file_dbp->dbenv, "%s", strerror(ret));
 			goto out;
@@ -490,9 +490,9 @@ out:	/* Free any pages that weren't dirtied. */
 
 	/* Free any allocated space. */
 	if (_lp != NULL)
-		free(_lp);
+		__db_free(_lp);
 	if (_rp != NULL)
-		free(_rp);
+		__db_free(_rp);
 
 	REC_CLOSE;
 }
@@ -541,7 +541,8 @@ __bam_rsplit_recover(logp, dbtp, lsnp, redo, info)
 	} else if (cmp_n == 0 && !redo) {
 		/* Need to undo update described. */
 		P_INIT(pagep, file_dbp->pgsize, PGNO_ROOT,
-		    PGNO_INVALID, PGNO_INVALID, pagep->level + 1, TYPE(pagep));
+		    argp->nrec, PGNO_INVALID, pagep->level + 1,
+		    file_dbp->type == DB_BTREE ? P_IBTREE : P_IRECNO);
 		if ((ret = __db_pitem(file_dbp, pagep, 0,
 		    argp->rootent.size, &argp->rootent, NULL)) != 0)
 			goto out;
@@ -764,3 +765,106 @@ __bam_cdel_recover(logp, dbtp, lsnp, redo, info)
 
 out:	REC_CLOSE;
 }
+
+/*
+ * __bam_repl_recover --
+ *	Recovery function for page item replacement.
+ *
+ * PUBLIC: int __bam_repl_recover
+ * PUBLIC:   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__bam_repl_recover(logp, dbtp, lsnp, redo, info)
+	DB_LOG *logp;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	int redo;
+	void *info;
+{
+	__bam_repl_args *argp;
+	BKEYDATA *bk;
+	DB *file_dbp, *mdbp;
+	DBT dbt;
+	DB_MPOOLFILE *mpf;
+	PAGE *pagep;
+	int cmp_n, cmp_p, modified, ret;
+	u_int8_t *p;
+
+	REC_PRINT(__bam_repl_print);
+	REC_INTRO(__bam_repl_read);
+
+	if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+		(void)__db_pgerr(file_dbp, argp->pgno);
+		pagep = NULL;
+		goto out;
+	}
+	bk = GET_BKEYDATA(pagep, argp->indx);
+
+	modified = 0;
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->lsn);
+	if (cmp_p == 0 && redo) {
+		/*
+		 * Need to redo update described.
+		 *
+		 * Re-build the replacement item.
+		 */
+		memset(&dbt, 0, sizeof(dbt));
+		dbt.size = argp->prefix + argp->suffix + argp->repl.size;
+		if ((dbt.data = __db_malloc(dbt.size)) == NULL) {
+			ret = ENOMEM;
+			goto err;
+		}
+		p = dbt.data;
+		memcpy(p, bk->data, argp->prefix);
+		p += argp->prefix;
+		memcpy(p, argp->repl.data, argp->repl.size);
+		p += argp->repl.size;
+		memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix);
+
+		ret = __bam_ritem(file_dbp, pagep, argp->indx, &dbt);
+		__db_free(dbt.data);
+		if (ret != 0)
+			goto err;
+
+		LSN(pagep) = *lsnp;
+		modified = 1;
+	} else if (cmp_n == 0 && !redo) {
+		/*
+		 * Need to undo update described.
+		 *
+		 * Re-build the original item.
+		 */
+		memset(&dbt, 0, sizeof(dbt));
+		dbt.size = argp->prefix + argp->suffix + argp->orig.size;
+		if ((dbt.data = __db_malloc(dbt.size)) == NULL) {
+			ret = ENOMEM;
+			goto err;
+		}
+		p = dbt.data;
+		memcpy(p, bk->data, argp->prefix);
+		p += argp->prefix;
+		memcpy(p, argp->orig.data, argp->orig.size);
+		p += argp->orig.size;
+		memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix);
+
+		ret = __bam_ritem(file_dbp, pagep, argp->indx, &dbt);
+		__db_free(dbt.data);
+		if (ret != 0)
+			goto err;
+
+		/* Reset the deleted flag, if necessary. */
+		if (argp->isdeleted)
+			B_DSET(GET_BKEYDATA(pagep, argp->indx)->type);
+
+		LSN(pagep) = argp->lsn;
+		modified = 1;
+	}
+	if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
+		*lsnp = argp->prev_lsn;
+
+	if (0) {
+err:		(void)memp_fput(mpf, pagep, 0);
+	}
+out:	REC_CLOSE;
+}
diff --git a/db2/btree/bt_recno.c b/db2/btree/bt_recno.c
index f7c5cffdc6..5e1cbc426c 100644
--- a/db2/btree/bt_recno.c
+++ b/db2/btree/bt_recno.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_recno.c	10.19 (Sleepycat) 9/20/97";
+static const char sccsid[] = "@(#)bt_recno.c	10.22 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -75,7 +75,7 @@ __ram_open(dbp, type, dbinfo)
 	ret = 0;
 
 	/* Allocate and initialize the private RECNO structure. */
-	if ((rp = (RECNO *)calloc(1, sizeof(*rp))) == NULL)
+	if ((rp = (RECNO *)__db_calloc(1, sizeof(*rp))) == NULL)
 		return (ENOMEM);
 
 	if (dbinfo != NULL) {
@@ -140,7 +140,7 @@ __ram_open(dbp, type, dbinfo)
 
 err:	/* If we mmap'd a source file, discard it. */
 	if (rp->re_smap != NULL)
-		(void)__db_munmap(rp->re_smap, rp->re_msize);
+		(void)__db_unmap(rp->re_smap, rp->re_msize);
 
 	/* If we opened a source file, discard it. */
 	if (rp->re_fd != -1)
@@ -151,7 +151,7 @@ err:	/* If we mmap'd a source file, discard it. */
 	/* If we allocated room for key/data return, discard it. */
 	t = dbp->internal;
 	if (t != NULL && t->bt_rkey.data != NULL)
-		free(t->bt_rkey.data);
+		__db_free(t->bt_rkey.data);
 
 	FREE(rp, sizeof(*rp));
 
@@ -175,10 +175,10 @@ __ram_cursor(dbp, txn, dbcp)
 
 	DEBUG_LWRITE(dbp, txn, "ram_cursor", NULL, NULL, 0);
 
-	if ((dbc = (DBC *)calloc(1, sizeof(DBC))) == NULL)
+	if ((dbc = (DBC *)__db_calloc(1, sizeof(DBC))) == NULL)
 		return (ENOMEM);
-	if ((cp = (RCURSOR *)calloc(1, sizeof(RCURSOR))) == NULL) {
-		free(dbc);
+	if ((cp = (RCURSOR *)__db_calloc(1, sizeof(RCURSOR))) == NULL) {
+		__db_free(dbc);
 		return (ENOMEM);
 	}
 
@@ -359,7 +359,7 @@ __ram_close(argdbp)
 
 	/* Close any underlying mmap region. */
 	if (rp->re_smap != NULL)
-		(void)__db_munmap(rp->re_smap, rp->re_msize);
+		(void)__db_unmap(rp->re_smap, rp->re_msize);
 
 	/* Close any backing source file descriptor. */
 	if (rp->re_fd != -1)
@@ -814,8 +814,8 @@ __ram_update(dbp, recno, can_create)
 	if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
 		if (t->bt_rdata.ulen < rp->re_len) {
 			t->bt_rdata.data = t->bt_rdata.data == NULL ?
-			    (void *)malloc(rp->re_len) :
-			    (void *)realloc(t->bt_rdata.data, rp->re_len);
+			    (void *)__db_malloc(rp->re_len) :
+			    (void *)__db_realloc(t->bt_rdata.data, rp->re_len);
 			if (t->bt_rdata.data == NULL) {
 				t->bt_rdata.ulen = 0;
 				return (ENOMEM);
@@ -853,7 +853,7 @@ __ram_source(dbp, rp, fname)
 
 	oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0;
 	if ((ret =
-	    __db_fdopen(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) {
+	    __db_open(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) {
 		__db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
 		goto err;
 	}
@@ -866,15 +866,16 @@ __ram_source(dbp, rp, fname)
 	 * compiler will perpetrate, doing the comparison in a portable way is
 	 * flatly impossible.  Hope that mmap fails if the file is too large.
 	 */
-	if ((ret =
-	    __db_stat(dbp->dbenv, rp->re_source, rp->re_fd, &size, NULL)) != 0)
+	if ((ret = __db_ioinfo(rp->re_source, rp->re_fd, &size, NULL)) != 0) {
+		__db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
 		goto err;
+	}
 	if (size == 0) {
 		F_SET(rp, RECNO_EOF);
 		return (0);
 	}
 
-	if ((ret = __db_mmap(rp->re_fd, (size_t)size, 1, 1, &rp->re_smap)) != 0)
+	if ((ret = __db_map(rp->re_fd, (size_t)size, 1, 1, &rp->re_smap)) != 0)
 		goto err;
 	rp->re_cmap = rp->re_smap;
 	rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size);
@@ -940,7 +941,7 @@ __ram_writeback(dbp)
 	 * open will fail.
 	 */
 	if (rp->re_smap != NULL) {
-		(void)__db_munmap(rp->re_smap, rp->re_msize);
+		(void)__db_unmap(rp->re_smap, rp->re_msize);
 		rp->re_smap = NULL;
 	}
 
@@ -951,7 +952,7 @@ __ram_writeback(dbp)
 	}
 
 	/* Open the file, truncating it. */
-	if ((ret = __db_fdopen(rp->re_source,
+	if ((ret = __db_open(rp->re_source,
 	    DB_SEQUENTIAL | DB_TRUNCATE,
 	    DB_SEQUENTIAL | DB_TRUNCATE, 0, &fd)) != 0) {
 		__db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
@@ -974,7 +975,7 @@ __ram_writeback(dbp)
 	 */
 	delim = rp->re_delim;
 	if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
-		if ((pad = malloc(rp->re_len)) == NULL) {
+		if ((pad = (u_int8_t *)__db_malloc(rp->re_len)) == NULL) {
 			ret = ENOMEM;
 			goto err;
 		}
@@ -1051,8 +1052,8 @@ __ram_fmap(dbp, top)
 	rp = t->bt_recno;
 	if (t->bt_rdata.ulen < rp->re_len) {
 		t->bt_rdata.data = t->bt_rdata.data == NULL ?
-		    (void *)malloc(rp->re_len) :
-		    (void *)realloc(t->bt_rdata.data, rp->re_len);
+		    (void *)__db_malloc(rp->re_len) :
+		    (void *)__db_realloc(t->bt_rdata.data, rp->re_len);
 		if (t->bt_rdata.data == NULL) {
 			t->bt_rdata.ulen = 0;
 			return (ENOMEM);
diff --git a/db2/btree/bt_search.c b/db2/btree/bt_search.c
index fa3e018313..a21a8208bc 100644
--- a/db2/btree/bt_search.c
+++ b/db2/btree/bt_search.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_search.c	10.7 (Sleepycat) 9/3/97";
+static const char sccsid[] = "@(#)bt_search.c	10.8 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -323,7 +323,7 @@ __bam_stkgrow(t)
 
 	entries = t->bt_esp - t->bt_sp;
 
-	if ((p = (EPG *)calloc(entries * 2, sizeof(EPG))) == NULL)
+	if ((p = (EPG *)__db_calloc(entries * 2, sizeof(EPG))) == NULL)
 		return (ENOMEM);
 	memcpy(p, t->bt_sp, entries * sizeof(EPG));
 	if (t->bt_sp != t->bt_stack)
diff --git a/db2/btree/bt_split.c b/db2/btree/bt_split.c
index 25cfacc4d0..bc09131b00 100644
--- a/db2/btree/bt_split.c
+++ b/db2/btree/bt_split.c
@@ -44,7 +44,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_split.c	10.14 (Sleepycat) 9/3/97";
+static const char sccsid[] = "@(#)bt_split.c	10.17 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -255,7 +255,7 @@ __bam_page(dbp, pp, cp)
 	    cp->page->level, TYPE(cp->page));
 
 	/* Create new left page for the split. */
-	if ((lp = (PAGE *)malloc(dbp->pgsize)) == NULL) {
+	if ((lp = (PAGE *)__db_malloc(dbp->pgsize)) == NULL) {
 		ret = ENOMEM;
 		goto err;
 	}
@@ -389,6 +389,9 @@ __bam_broot(dbp, rootp, lp, rp)
 	P_INIT(rootp, dbp->pgsize,
 	    PGNO_ROOT, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IBTREE);
 
+	memset(&data, 0, sizeof(data));
+	memset(&hdr, 0, sizeof(hdr));
+
 	/*
 	 * The btree comparison code guarantees that the left-most key on any
 	 * level of the tree is never used, so it doesn't need to be filled in.
@@ -399,15 +402,12 @@ __bam_broot(dbp, rootp, lp, rp)
 	if (F_ISSET(dbp, DB_BT_RECNUM)) {
 		bi.nrecs = __bam_total(lp);
 		RE_NREC_SET(rootp, bi.nrecs);
-	}
-	memset(&hdr, 0, sizeof(hdr));
+	} else
+		bi.nrecs = 0;
 	hdr.data = &bi;
 	hdr.size = SSZA(BINTERNAL, data);
-	memset(&data, 0, sizeof(data));
-	data.data = (char *)"";
-	data.size = 0;
 	if ((ret =
-	    __db_pitem(dbp, rootp, 0, BINTERNAL_SIZE(0), &hdr, &data)) != 0)
+	    __db_pitem(dbp, rootp, 0, BINTERNAL_SIZE(0), &hdr, NULL)) != 0)
 		return (ret);
 
 	switch (TYPE(rp)) {
@@ -431,9 +431,10 @@ __bam_broot(dbp, rootp, lp, rp)
 			return (ret);
 
 		/* Increment the overflow ref count. */
-		if (B_TYPE(child_bi->type) == B_OVERFLOW && (ret =
-		    __db_ioff(dbp, ((BOVERFLOW *)(child_bi->data))->pgno)) != 0)
-			return (ret);
+		if (B_TYPE(child_bi->type) == B_OVERFLOW)
+			if ((ret = __db_ovref(dbp,
+			    ((BOVERFLOW *)(child_bi->data))->pgno, 1)) != 0)
+				return (ret);
 		break;
 	case P_LBTREE:
 		/* Copy the first key of the child page onto the root page. */
@@ -473,9 +474,10 @@ __bam_broot(dbp, rootp, lp, rp)
 				return (ret);
 
 			/* Increment the overflow ref count. */
-			if (B_TYPE(child_bk->type) == B_OVERFLOW && (ret =
-			    __db_ioff(dbp, ((BOVERFLOW *)child_bk)->pgno)) != 0)
-				return (ret);
+			if (B_TYPE(child_bk->type) == B_OVERFLOW)
+				if ((ret = __db_ovref(dbp,
+				    ((BOVERFLOW *)child_bk)->pgno, 1)) != 0)
+					return (ret);
 			break;
 		default:
 			return (__db_pgfmt(dbp, rp->pgno));
@@ -604,9 +606,10 @@ __bam_pinsert(dbp, parent, lchild, rchild)
 			return (ret);
 
 		/* Increment the overflow ref count. */
-		if (B_TYPE(child_bi->type) == B_OVERFLOW && (ret =
-		    __db_ioff(dbp, ((BOVERFLOW *)(child_bi->data))->pgno)) != 0)
-			return (ret);
+		if (B_TYPE(child_bi->type) == B_OVERFLOW)
+			if ((ret = __db_ovref(dbp,
+			    ((BOVERFLOW *)(child_bi->data))->pgno, 1)) != 0)
+				return (ret);
 		break;
 	case P_LBTREE:
 		child_bk = GET_BKEYDATA(rchild, 0);
@@ -673,9 +676,10 @@ noprefix:			nksize = child_bk->len;
 				return (ret);
 
 			/* Increment the overflow ref count. */
-			if (B_TYPE(child_bk->type) == B_OVERFLOW && (ret =
-			    __db_ioff(dbp, ((BOVERFLOW *)child_bk)->pgno)) != 0)
-				return (ret);
+			if (B_TYPE(child_bk->type) == B_OVERFLOW)
+				if ((ret = __db_ovref(dbp,
+				    ((BOVERFLOW *)child_bk)->pgno, 1)) != 0)
+					return (ret);
 			break;
 		default:
 			return (__db_pgfmt(dbp, rchild->pgno));
diff --git a/db2/btree/bt_stat.c b/db2/btree/bt_stat.c
index ab3bc4c431..e88b5dac2d 100644
--- a/db2/btree/bt_stat.c
+++ b/db2/btree/bt_stat.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_stat.c	10.12 (Sleepycat) 9/3/97";
+static const char sccsid[] = "@(#)bt_stat.c	10.14 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -61,7 +61,7 @@ __bam_stat(argdbp, spp, db_malloc, flags)
 
 	/* Allocate and clear the structure. */
 	if ((sp = db_malloc == NULL ?
-	    (DB_BTREE_STAT *)malloc(sizeof(*sp)) :
+	    (DB_BTREE_STAT *)__db_malloc(sizeof(*sp)) :
 	    (DB_BTREE_STAT *)db_malloc(sizeof(*sp))) == NULL) {
 		ret = ENOMEM;
 		goto err;
@@ -100,14 +100,13 @@ __bam_stat(argdbp, spp, db_malloc, flags)
 	if (F_ISSET(meta, BTM_RENUMBER))
 		sp->bt_flags |= DB_RENUMBER;
 
-	/*
-	 * Get the maxkey, minkey, re_len and re_pad fields from the
-	 * metadata.
-	 */
+	/* Get the remaining metadata fields. */
 	sp->bt_minkey = meta->minkey;
 	sp->bt_maxkey = meta->maxkey;
 	sp->bt_re_len = meta->re_len;
 	sp->bt_re_pad = meta->re_pad;
+	sp->bt_magic = meta->magic;
+	sp->bt_version = meta->version;
 
 	/* Get the page size from the DB. */
 	sp->bt_pagesize = dbp->pgsize;
diff --git a/db2/btree/btree.src b/db2/btree/btree.src
index 7c8c4b125f..6145696d28 100644
--- a/db2/btree/btree.src
+++ b/db2/btree/btree.src
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)btree.src	10.4 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)btree.src	10.6 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 PREFIX	bam
@@ -75,6 +75,7 @@ END
  *
  * pgno:	the page number of the page copied over the root.
  * pgdbt:	the page being copied on the root page.
+ * nrec:	the tree's record count.
  * rootent:	last entry on the root page.
  * rootlsn:	the root page's original lsn.
  */
@@ -82,6 +83,7 @@ BEGIN rsplit
 ARG	fileid		u_int32_t	lu
 ARG	pgno		db_pgno_t	lu
 DBT	pgdbt		DBT		s
+ARG	nrec		db_pgno_t	lu
 DBT	rootent		DBT		s
 POINTER rootlsn		DB_LSN *	lu
 END
@@ -135,3 +137,24 @@ ARG	pgno		db_pgno_t	lu
 POINTER	lsn		DB_LSN *	lu
 ARG	indx		u_int32_t	lu
 END
+
+/*
+ * BTREE-repl: used to log the replacement of an item.
+ *
+ * pgno:	the page modified.
+ * lsn:		the page's original lsn.
+ * orig:	the original data.
+ * new:		the replacement data.
+ * duplicate:	the prefix of the replacement that matches the original.
+ */
+BEGIN repl
+ARG	fileid		u_int32_t	lu
+ARG	pgno		db_pgno_t	lu
+POINTER	lsn		DB_LSN *	lu
+ARG	indx		u_int32_t	lu
+ARG	isdeleted	u_int32_t	lu
+DBT	orig		DBT		s
+DBT	repl		DBT		s
+ARG	prefix		u_int32_t	lu
+ARG	suffix		u_int32_t	lu
+END
diff --git a/db2/btree/btree_auto.c b/db2/btree/btree_auto.c
index 353ee7bc27..45232bbc41 100644
--- a/db2/btree/btree_auto.c
+++ b/db2/btree/btree_auto.c
@@ -57,7 +57,7 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(pgno)
 	    + sizeof(ptype)
 	    + sizeof(next);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -92,7 +92,7 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -137,7 +137,7 @@ __bam_pg_alloc_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tptype: %lu\n", (u_long)argp->ptype);
 	printf("\tnext: %lu\n", (u_long)argp->next);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -152,7 +152,7 @@ __bam_pg_alloc_read(recbuf, argpp)
 	__bam_pg_alloc_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_pg_alloc_args *)malloc(sizeof(__bam_pg_alloc_args) +
+	argp = (__bam_pg_alloc_args *)__db_malloc(sizeof(__bam_pg_alloc_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -219,7 +219,7 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(*meta_lsn)
 	    + sizeof(u_int32_t) + (header == NULL ? 0 : header->size)
 	    + sizeof(next);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -257,7 +257,7 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -308,7 +308,7 @@ __bam_pg_free_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\n");
 	printf("\tnext: %lu\n", (u_long)argp->next);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -323,7 +323,7 @@ __bam_pg_free_read(recbuf, argpp)
 	__bam_pg_free_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_pg_free_args *)malloc(sizeof(__bam_pg_free_args) +
+	argp = (__bam_pg_free_args *)__db_malloc(sizeof(__bam_pg_free_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -400,7 +400,7 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(npgno)
 	    + sizeof(*nlsn)
 	    + sizeof(u_int32_t) + (pg == NULL ? 0 : pg->size);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -452,7 +452,7 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -509,7 +509,7 @@ __bam_split_print(notused1, dbtp, lsnp, notused3, notused4)
 	}
 	printf("\n");
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -524,7 +524,7 @@ __bam_split_read(recbuf, argpp)
 	__bam_split_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_split_args *)malloc(sizeof(__bam_split_args) +
+	argp = (__bam_split_args *)__db_malloc(sizeof(__bam_split_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -563,11 +563,11 @@ __bam_split_read(recbuf, argpp)
 /*
  * PUBLIC: int __bam_rsplit_log
  * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
- * PUBLIC:     u_int32_t, db_pgno_t, DBT *, DBT *,
- * PUBLIC:     DB_LSN *));
+ * PUBLIC:     u_int32_t, db_pgno_t, DBT *, db_pgno_t,
+ * PUBLIC:     DBT *, DB_LSN *));
  */
 int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
-	fileid, pgno, pgdbt, rootent, rootlsn)
+	fileid, pgno, pgdbt, nrec, rootent, rootlsn)
 	DB_LOG *logp;
 	DB_TXN *txnid;
 	DB_LSN *ret_lsnp;
@@ -575,6 +575,7 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
 	u_int32_t fileid;
 	db_pgno_t pgno;
 	DBT *pgdbt;
+	db_pgno_t nrec;
 	DBT *rootent;
 	DB_LSN * rootlsn;
 {
@@ -597,9 +598,10 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(fileid)
 	    + sizeof(pgno)
 	    + sizeof(u_int32_t) + (pgdbt == NULL ? 0 : pgdbt->size)
+	    + sizeof(nrec)
 	    + sizeof(u_int32_t) + (rootent == NULL ? 0 : rootent->size)
 	    + sizeof(*rootlsn);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -623,6 +625,8 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
 		memcpy(bp, pgdbt->data, pgdbt->size);
 		bp += pgdbt->size;
 	}
+	memcpy(bp, &nrec, sizeof(nrec));
+	bp += sizeof(nrec);
 	if (rootent == NULL) {
 		zero = 0;
 		memcpy(bp, &zero, sizeof(u_int32_t));
@@ -645,7 +649,7 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -692,6 +696,7 @@ __bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4)
 			printf("%#x ", c);
 	}
 	printf("\n");
+	printf("\tnrec: %lu\n", (u_long)argp->nrec);
 	printf("\trootent: ");
 	for (i = 0; i < argp->rootent.size; i++) {
 		c = ((char *)argp->rootent.data)[i];
@@ -704,7 +709,7 @@ __bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\trootlsn: [%lu][%lu]\n",
 	    (u_long)argp->rootlsn.file, (u_long)argp->rootlsn.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -719,7 +724,7 @@ __bam_rsplit_read(recbuf, argpp)
 	__bam_rsplit_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_rsplit_args *)malloc(sizeof(__bam_rsplit_args) +
+	argp = (__bam_rsplit_args *)__db_malloc(sizeof(__bam_rsplit_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -739,6 +744,8 @@ __bam_rsplit_read(recbuf, argpp)
 	bp += sizeof(u_int32_t);
 	argp->pgdbt.data = bp;
 	bp += argp->pgdbt.size;
+	memcpy(&argp->nrec, bp, sizeof(argp->nrec));
+	bp += sizeof(argp->nrec);
 	memcpy(&argp->rootent.size, bp, sizeof(u_int32_t));
 	bp += sizeof(u_int32_t);
 	argp->rootent.data = bp;
@@ -789,7 +796,7 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(indx)
 	    + sizeof(indx_copy)
 	    + sizeof(is_insert);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -821,7 +828,7 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -865,7 +872,7 @@ __bam_adj_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tindx_copy: %lu\n", (u_long)argp->indx_copy);
 	printf("\tis_insert: %lu\n", (u_long)argp->is_insert);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -880,7 +887,7 @@ __bam_adj_read(recbuf, argpp)
 	__bam_adj_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_adj_args *)malloc(sizeof(__bam_adj_args) +
+	argp = (__bam_adj_args *)__db_malloc(sizeof(__bam_adj_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -948,7 +955,7 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(indx)
 	    + sizeof(adjust)
 	    + sizeof(total);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -980,7 +987,7 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -1024,7 +1031,7 @@ __bam_cadjust_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tadjust: %ld\n", (long)argp->adjust);
 	printf("\ttotal: %ld\n", (long)argp->total);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -1039,7 +1046,7 @@ __bam_cadjust_read(recbuf, argpp)
 	__bam_cadjust_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_cadjust_args *)malloc(sizeof(__bam_cadjust_args) +
+	argp = (__bam_cadjust_args *)__db_malloc(sizeof(__bam_cadjust_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -1102,7 +1109,7 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(pgno)
 	    + sizeof(*lsn)
 	    + sizeof(indx);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -1130,7 +1137,7 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -1172,7 +1179,7 @@ __bam_cdel_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
 	printf("\tindx: %lu\n", (u_long)argp->indx);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -1187,7 +1194,7 @@ __bam_cdel_read(recbuf, argpp)
 	__bam_cdel_args *argp;
 	u_int8_t *bp;
 
-	argp = (__bam_cdel_args *)malloc(sizeof(__bam_cdel_args) +
+	argp = (__bam_cdel_args *)__db_malloc(sizeof(__bam_cdel_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -1212,6 +1219,225 @@ __bam_cdel_read(recbuf, argpp)
 }
 
 /*
+ * PUBLIC: int __bam_repl_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, DBT *, DBT *, u_int32_t,
+ * PUBLIC:     u_int32_t));
+ */
+int __bam_repl_log(logp, txnid, ret_lsnp, flags,
+	fileid, pgno, lsn, indx, isdeleted, orig,
+	repl, prefix, suffix)
+	DB_LOG *logp;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	u_int32_t fileid;
+	db_pgno_t pgno;
+	DB_LSN * lsn;
+	u_int32_t indx;
+	u_int32_t isdeleted;
+	DBT *orig;
+	DBT *repl;
+	u_int32_t prefix;
+	u_int32_t suffix;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t zero;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_bam_repl;
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		null_lsn.file = 0;
+		null_lsn.offset = 0;
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(fileid)
+	    + sizeof(pgno)
+	    + sizeof(*lsn)
+	    + sizeof(indx)
+	    + sizeof(isdeleted)
+	    + sizeof(u_int32_t) + (orig == NULL ? 0 : orig->size)
+	    + sizeof(u_int32_t) + (repl == NULL ? 0 : repl->size)
+	    + sizeof(prefix)
+	    + sizeof(suffix);
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
+		return (ENOMEM);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(bp, &fileid, sizeof(fileid));
+	bp += sizeof(fileid);
+	memcpy(bp, &pgno, sizeof(pgno));
+	bp += sizeof(pgno);
+	if (lsn != NULL)
+		memcpy(bp, lsn, sizeof(*lsn));
+	else
+		memset(bp, 0, sizeof(*lsn));
+	bp += sizeof(*lsn);
+	memcpy(bp, &indx, sizeof(indx));
+	bp += sizeof(indx);
+	memcpy(bp, &isdeleted, sizeof(isdeleted));
+	bp += sizeof(isdeleted);
+	if (orig == NULL) {
+		zero = 0;
+		memcpy(bp, &zero, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else {
+		memcpy(bp, &orig->size, sizeof(orig->size));
+		bp += sizeof(orig->size);
+		memcpy(bp, orig->data, orig->size);
+		bp += orig->size;
+	}
+	if (repl == NULL) {
+		zero = 0;
+		memcpy(bp, &zero, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else {
+		memcpy(bp, &repl->size, sizeof(repl->size));
+		bp += sizeof(repl->size);
+		memcpy(bp, repl->data, repl->size);
+		bp += repl->size;
+	}
+	memcpy(bp, &prefix, sizeof(prefix));
+	bp += sizeof(prefix);
+	memcpy(bp, &suffix, sizeof(suffix));
+	bp += sizeof(suffix);
+#ifdef DEBUG
+	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+		fprintf(stderr, "Error in log record length");
+#endif
+	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	__db_free(logrec.data);
+	return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_repl_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__bam_repl_print(notused1, dbtp, lsnp, notused3, notused4)
+	DB_LOG *notused1;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	int notused3;
+	void *notused4;
+{
+	__bam_repl_args *argp;
+	u_int32_t i;
+	int c, ret;
+
+	i = 0;
+	c = 0;
+	notused1 = NULL;
+	notused3 = 0;
+	notused4 = NULL;
+
+	if ((ret = __bam_repl_read(dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]bam_repl: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %lu\n", (u_long)argp->fileid);
+	printf("\tpgno: %lu\n", (u_long)argp->pgno);
+	printf("\tlsn: [%lu][%lu]\n",
+	    (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
+	printf("\tindx: %lu\n", (u_long)argp->indx);
+	printf("\tisdeleted: %lu\n", (u_long)argp->isdeleted);
+	printf("\torig: ");
+	for (i = 0; i < argp->orig.size; i++) {
+		c = ((char *)argp->orig.data)[i];
+		if (isprint(c) || c == 0xa)
+			putchar(c);
+		else
+			printf("%#x ", c);
+	}
+	printf("\n");
+	printf("\trepl: ");
+	for (i = 0; i < argp->repl.size; i++) {
+		c = ((char *)argp->repl.data)[i];
+		if (isprint(c) || c == 0xa)
+			putchar(c);
+		else
+			printf("%#x ", c);
+	}
+	printf("\n");
+	printf("\tprefix: %lu\n", (u_long)argp->prefix);
+	printf("\tsuffix: %lu\n", (u_long)argp->suffix);
+	printf("\n");
+	__db_free(argp);
+	return (0);
+}
+
+/*
+ * PUBLIC: int __bam_repl_read __P((void *, __bam_repl_args **));
+ */
+int
+__bam_repl_read(recbuf, argpp)
+	void *recbuf;
+	__bam_repl_args **argpp;
+{
+	__bam_repl_args *argp;
+	u_int8_t *bp;
+
+	argp = (__bam_repl_args *)__db_malloc(sizeof(__bam_repl_args) +
+	    sizeof(DB_TXN));
+	if (argp == NULL)
+		return (ENOMEM);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+	bp += sizeof(argp->pgno);
+	memcpy(&argp->lsn, bp,  sizeof(argp->lsn));
+	bp += sizeof(argp->lsn);
+	memcpy(&argp->indx, bp, sizeof(argp->indx));
+	bp += sizeof(argp->indx);
+	memcpy(&argp->isdeleted, bp, sizeof(argp->isdeleted));
+	bp += sizeof(argp->isdeleted);
+	memcpy(&argp->orig.size, bp, sizeof(u_int32_t));
+	bp += sizeof(u_int32_t);
+	argp->orig.data = bp;
+	bp += argp->orig.size;
+	memcpy(&argp->repl.size, bp, sizeof(u_int32_t));
+	bp += sizeof(u_int32_t);
+	argp->repl.data = bp;
+	bp += argp->repl.size;
+	memcpy(&argp->prefix, bp, sizeof(argp->prefix));
+	bp += sizeof(argp->prefix);
+	memcpy(&argp->suffix, bp, sizeof(argp->suffix));
+	bp += sizeof(argp->suffix);
+	*argpp = argp;
+	return (0);
+}
+
+/*
  * PUBLIC: int __bam_init_print __P((DB_ENV *));
  */
 int
@@ -1241,6 +1467,9 @@ __bam_init_print(dbenv)
 	if ((ret = __db_add_recovery(dbenv,
 	    __bam_cdel_print, DB_bam_cdel)) != 0)
 		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __bam_repl_print, DB_bam_repl)) != 0)
+		return (ret);
 	return (0);
 }
 
@@ -1274,6 +1503,9 @@ __bam_init_recover(dbenv)
 	if ((ret = __db_add_recovery(dbenv,
 	    __bam_cdel_recover, DB_bam_cdel)) != 0)
 		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __bam_repl_recover, DB_bam_repl)) != 0)
+		return (ret);
 	return (0);
 }
 
diff --git a/db2/common/db_appinit.c b/db2/common/db_appinit.c
index 51d9262859..74ba9ff426 100644
--- a/db2/common/db_appinit.c
+++ b/db2/common/db_appinit.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_appinit.c	10.33 (Sleepycat) 8/28/97";
+static const char sccsid[] = "@(#)db_appinit.c	10.36 (Sleepycat) 10/28/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -93,6 +93,10 @@ db_appinit(db_home, db_config, dbenv, flags)
 	    LF_ISSET(RECOVERY_FLAGS) != RECOVERY_FLAGS)
 		return (__db_ferr(dbenv, "db_appinit", 1));
 
+	/* Convert the db_appinit(3) flags. */
+	if (LF_ISSET(DB_THREAD))
+		F_SET(dbenv, DB_ENV_THREAD);
+
 	fp = NULL;
 
 	/* Set the database home. */
@@ -126,7 +130,7 @@ db_appinit(db_home, db_config, dbenv, flags)
 		goto err;
 
 	/* Indicate that the path names have been set. */
-	F_SET(dbenv, DB_APP_INIT);
+	F_SET(dbenv, DB_ENV_APPINIT);
 
 	/*
 	 * If we are doing recovery, remove all the regions.
@@ -300,7 +304,8 @@ __db_appname(dbenv, appname, dir, file, fdp, namep)
 	 * return.
 	 */
 	if (file != NULL && __db_abspath(file))
-		return ((*namep = (char *)strdup(file)) == NULL ? ENOMEM : 0);
+		return ((*namep =
+		    (char *)__db_strdup(file)) == NULL ? ENOMEM : 0);
 	if (dir != NULL && __db_abspath(dir)) {
 		a = dir;
 		goto done;
@@ -335,7 +340,7 @@ __db_appname(dbenv, appname, dir, file, fdp, namep)
 	 */
 retry:	switch (appname) {
 	case DB_APP_NONE:
-		if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) {
+		if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_APPINIT)) {
 			if (dir == NULL)
 				goto tmp;
 			a = dir;
@@ -355,7 +360,7 @@ retry:	switch (appname) {
 			tmp_create = 1;
 			goto tmp;
 		}
-		if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT))
+		if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_APPINIT))
 			a = PATH_DOT;
 		else {
 			a = dbenv->db_home;
@@ -367,7 +372,7 @@ retry:	switch (appname) {
 		}
 		break;
 	case DB_APP_LOG:
-		if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) {
+		if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_APPINIT)) {
 			if (dir == NULL)
 				goto tmp;
 			a = dir;
@@ -385,7 +390,7 @@ retry:	switch (appname) {
 		}
 
 		tmp_create = 1;
-		if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT))
+		if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_APPINIT))
 			goto tmp;
 		else {
 			a = dbenv->db_home;
@@ -396,7 +401,7 @@ retry:	switch (appname) {
 
 	/* Reference a file from the appropriate temporary directory. */
 	if (0) {
-tmp:		if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) {
+tmp:		if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_APPINIT)) {
 			memset(&etmp, 0, sizeof(etmp));
 			if ((ret = __db_tmp_dir(&etmp, DB_USE_ENVIRON)) != 0)
 				return (ret);
@@ -412,7 +417,7 @@ done:	len =
 	    (c == NULL ? 0 : strlen(c) + 1) +
 	    (file == NULL ? 0 : strlen(file) + 1);
 
-	if ((start = (char *)malloc(len)) == NULL) {
+	if ((start = (char *)__db_malloc(len)) == NULL) {
 		__db_err(dbenv, "%s", strerror(ENOMEM));
 		if (tmp_free)
 			FREES(etmp.db_tmp_dir);
@@ -484,7 +489,7 @@ __db_home(dbenv, db_home, flags)
 	if (p == NULL)
 		return (0);
 
-	if ((dbenv->db_home = (char *)strdup(p)) == NULL) {
+	if ((dbenv->db_home = (char *)__db_strdup(p)) == NULL) {
 		__db_err(dbenv, "%s", strerror(ENOMEM));
 		return (ENOMEM);
 	}
@@ -509,7 +514,7 @@ __db_parse(dbenv, s)
 	 * We need to strdup the argument in case the caller passed us
 	 * static data.
 	 */
-	if ((local_s = (char *)strdup(s)) == NULL)
+	if ((local_s = (char *)__db_strdup(s)) == NULL)
 		return (ENOMEM);
 
 	tp = local_s;
@@ -526,14 +531,15 @@ illegal:	ret = EINVAL;
 #define	DATA_INIT_CNT	20			/* Start with 20 data slots. */
 	if (!strcmp(name, "DB_DATA_DIR")) {
 		if (dbenv->db_data_dir == NULL) {
-			if ((dbenv->db_data_dir = (char **)calloc(DATA_INIT_CNT,
+			if ((dbenv->db_data_dir =
+			    (char **)__db_calloc(DATA_INIT_CNT,
 			    sizeof(char **))) == NULL)
 				goto nomem;
 			dbenv->data_cnt = DATA_INIT_CNT;
 		} else if (dbenv->data_next == dbenv->data_cnt - 1) {
 			dbenv->data_cnt *= 2;
 			if ((dbenv->db_data_dir =
-			    (char **)realloc(dbenv->db_data_dir,
+			    (char **)__db_realloc(dbenv->db_data_dir,
 			    dbenv->data_cnt * sizeof(char **))) == NULL)
 				goto nomem;
 		}
@@ -549,7 +555,7 @@ illegal:	ret = EINVAL;
 	} else
 		goto err;
 
-	if ((*p = (char *)strdup(value)) == NULL) {
+	if ((*p = (char *)__db_strdup(value)) == NULL) {
 nomem:		ret = ENOMEM;
 		__db_err(dbenv, "%s", strerror(ENOMEM));
 	}
@@ -623,7 +629,7 @@ __db_tmp_dir(dbenv, flags)
 		if (!Special2FSSpec(kTemporaryFolderType,
 		    kOnSystemDisk, 0, &spec)) {
 			p = FSp2FullPath(&spec);
-			sTempFolder = malloc(strlen(p) + 1);
+			sTempFolder = __db_malloc(strlen(p) + 1);
 			strcpy(sTempFolder, p);
 			p = sTempFolder;
 		}
@@ -639,7 +645,7 @@ __db_tmp_dir(dbenv, flags)
 	if (p == NULL)
 		return (0);
 
-	if ((dbenv->db_tmp_dir = (char *)strdup(p)) == NULL) {
+	if ((dbenv->db_tmp_dir = (char *)__db_strdup(p)) == NULL) {
 		__db_err(dbenv, "%s", strerror(ENOMEM));
 		return (ENOMEM);
 	}
@@ -722,7 +728,7 @@ __db_tmp_open(dbenv, dir, fdp)
 		(void)sigprocmask(SIG_BLOCK, &set, &oset);
 #endif
 #define	DB_TEMPOPEN	DB_CREATE | DB_EXCL | DB_TEMPORARY
-		if ((ret = __db_fdopen(buf,
+		if ((ret = __db_open(buf,
 		    DB_TEMPOPEN, DB_TEMPOPEN, S_IRUSR | S_IWUSR, fdp)) == 0) {
 #ifdef HAVE_SIGFILLSET
 			(void)sigprocmask(SIG_SETMASK, &oset, NULL);
diff --git a/db2/common/db_apprec.c b/db2/common/db_apprec.c
index 2e94673731..ac0176d70f 100644
--- a/db2/common/db_apprec.c
+++ b/db2/common/db_apprec.c
@@ -11,7 +11,7 @@
 static const char copyright[] =
 "@(#) Copyright (c) 1997\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_apprec.c	10.16 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)db_apprec.c	10.18 (Sleepycat) 9/30/97";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -31,12 +31,6 @@ static const char sccsid[] = "@(#)db_apprec.c	10.16 (Sleepycat) 8/27/97";
 #include "txn.h"
 #include "common_ext.h"
 
-#define	FREE_DBT(L, D) {						\
-	if (F_ISSET((L), DB_AM_THREAD) && (D).data != NULL)		\
-		free((D).data);						\
-		(D).data = NULL;					\
-	}								\
-
 /*
  * __db_apprec --
  *	Perform recovery.
@@ -52,34 +46,41 @@ __db_apprec(dbenv, flags)
 	DB_LOG *lp;
 	DB_LSN ckp_lsn, first_lsn, lsn, tmp_lsn;
 	time_t now;
-	int first_flag, ret;
+	int first_flag, is_thread, ret;
 	void *txninfo;
 
+	lp = dbenv->lg_info;
+
 	/* Initialize the transaction list. */
 	if ((ret = __db_txnlist_init(&txninfo)) != 0)
 		return (ret);
 
 	/*
+	 * Save the state of the thread flag -- we don't need it on at the
+	 * moment because we're single-threaded until recovery is complete.
+	 */
+	is_thread = F_ISSET(lp, DB_AM_THREAD);
+	F_CLR(lp, DB_AM_THREAD);
+
+	/*
 	 * Read forward through the log, opening the appropriate files so that
 	 * we can call recovery routines.  In general, we start at the last
 	 * checkpoint prior to the last checkpointed LSN.  For catastrophic
 	 * recovery, we begin at the first LSN that appears in any log file
 	 * (log_get figures this out for us when we pass it the DB_FIRST flag).
 	 */
-	lp = dbenv->lg_info;
 	if (LF_ISSET(DB_RECOVER_FATAL))
 		first_flag = DB_FIRST;
 	else {
-		if ((ret = __log_findckp(lp, &lsn)) == DB_NOTFOUND)
+		if ((ret = __log_findckp(lp, &lsn)) == DB_NOTFOUND) {
+			F_SET(lp, is_thread);
 			return (0);
+		}
 		first_flag = DB_SET;
 	}
 
 	/* If we're a threaded application, we have to allocate space. */
 	memset(&data, 0, sizeof(data));
-	if (F_ISSET(lp, DB_AM_THREAD))
-		F_SET(&data, DB_DBT_MALLOC);
-
 	if ((ret = log_get(lp, &lsn, &data, first_flag)) != 0) {
 		__db_err(dbenv, "Failure: unable to get log record");
 		if (first_flag == DB_SET)
@@ -93,7 +94,6 @@ __db_apprec(dbenv, flags)
 	first_lsn = lsn;
 	for (;;) {
 		ret = __db_dispatch(lp, &data, &lsn, TXN_OPENFILES, txninfo);
-		FREE_DBT(lp, data);
 		if (ret != 0 && ret != DB_TXN_CKP)
 			goto msgerr;
 		if ((ret =
@@ -103,7 +103,6 @@ __db_apprec(dbenv, flags)
 			break;
 		}
 	}
-	FREE_DBT(lp, data);
 
 	/*
 	 * Initialize the ckp_lsn to 0,0.  If we never find a valid
@@ -116,7 +115,6 @@ __db_apprec(dbenv, flags)
 		tmp_lsn = lsn;
 		ret = __db_dispatch(lp,
 		    &data, &lsn, TXN_BACKWARD_ROLL, txninfo);
-		FREE_DBT(lp, data);
 		if (ret == DB_TXN_CKP) {
 			if (IS_ZERO_LSN(ckp_lsn))
 				ckp_lsn = tmp_lsn;
@@ -124,20 +122,17 @@ __db_apprec(dbenv, flags)
 		} else if (ret != 0)
 			goto msgerr;
 	}
-	FREE_DBT(lp, data);
 	if (ret != 0 && ret != DB_NOTFOUND)
 		goto err;
 
 	for (ret = log_get(lp, &lsn, &data, DB_NEXT);
 	    ret == 0; ret = log_get(lp, &lsn, &data, DB_NEXT)) {
 		ret = __db_dispatch(lp, &data, &lsn, TXN_FORWARD_ROLL, txninfo);
-		FREE_DBT(lp, data);
 		if (ret == DB_TXN_CKP)
 			ret = 0;
 		else if (ret != 0)
 			goto msgerr;
 	}
-	FREE_DBT(lp, data);
 	if (ret != DB_NOTFOUND)
 		goto err;
 
@@ -165,11 +160,12 @@ __db_apprec(dbenv, flags)
 		    (u_long)dbenv->tx_info->region->last_ckp.offset);
 	}
 
+	F_SET(lp, is_thread);
 	return (0);
 
 msgerr:	__db_err(dbenv, "Recovery function for LSN %lu %lu failed",
 	    (u_long)lsn.file, (u_long)lsn.offset);
 
-err:	FREE_DBT(lp, data);
+err:	F_SET(lp, is_thread);
 	return (ret);
 }
diff --git a/db2/common/db_byteorder.c b/db2/common/db_byteorder.c
index a8d7715455..e486132073 100644
--- a/db2/common/db_byteorder.c
+++ b/db2/common/db_byteorder.c
@@ -8,20 +8,20 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_byteorder.c	10.3 (Sleepycat) 6/21/97";
+static const char sccsid[] = "@(#)db_byteorder.c	10.4 (Sleepycat) 9/4/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
-#include <errno.h>
+#ifdef HAVE_ENDIAN_H
+#include <endian.h>
+#if BYTE_ORDER == BIG_ENDIAN
+#define	WORDS_BIGENDIAN	1
+#endif
 #endif
 
-#ifdef HAVE_ENDIAN_H
-# include <endian.h>
-# if BYTE_ORDER == BIG_ENDIAN
-#  define WORDS_BIGENDIAN 1
-# endif
+#include <errno.h>
 #endif
 
 #include "db_int.h"
diff --git a/db2/common/db_region.c b/db2/common/db_region.c
index 86d79a8148..3e8cd2dc66 100644
--- a/db2/common/db_region.c
+++ b/db2/common/db_region.c
@@ -43,7 +43,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_region.c	10.13 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)db_region.c	10.15 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -114,7 +114,7 @@ __db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp)
 	 * attempts to create the region will return failure in one of the
 	 * attempts.
 	 */
-	if (fd == -1 && (ret = __db_fdopen(name,
+	if (fd == -1 && (ret = __db_open(name,
 	    DB_CREATE | DB_EXCL, DB_CREATE | DB_EXCL, mode, &fd)) != 0) {
 		if (ret != EEXIST)
 			__db_err(dbenv,
@@ -131,6 +131,42 @@ __db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp)
 	if ((ret = __db_rmap(dbenv, fd, size, &rp)) != 0)
 		goto err;
 
+	/* Initialize the region. */
+	if ((ret = __db_rinit(dbenv, rp, fd, size, 1)) != 0)
+		goto err;
+
+	if (name != NULL)
+		FREES(name);
+
+	*(void **)retp = rp;
+	return (0);
+
+err:	if (fd != -1) {
+		if (rp != NULL)
+			(void)__db_unmap(rp, rp->size);
+		(void)__db_unlink(name);
+		(void)__db_close(fd);
+	}
+	if (name != NULL)
+		FREES(name);
+	return (ret);
+}
+
+/*
+ * __db_rinit --
+ *	Initialize the region.
+ *
+ * PUBLIC: int __db_rinit __P((DB_ENV *, RLAYOUT *, int, size_t, int));
+ */
+int
+__db_rinit(dbenv, rp, fd, size, lock_region)
+	DB_ENV *dbenv;
+	RLAYOUT *rp;
+	size_t size;
+	int fd, lock_region;
+{
+	int ret;
+
 	/*
 	 * Initialize the common information.
 	 *
@@ -141,9 +177,12 @@ __db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp)
 	 * file permissions games, but we can't because WNT filesystems won't
 	 * open a file mode 0.
 	 *
-	 * So, the process that's creating the region always acquires the lock
-	 * before the setting the version number.  Any process joining always
-	 * checks the version number before attempting to acquire the lock.
+	 * If the lock_region flag is set, the process creating the region
+	 * acquires the lock before the setting the version number.  Any
+	 * process joining the region checks the version number before
+	 * attempting to acquire the lock.  (The lock_region flag may not be
+	 * set -- the mpool code sometimes malloc's private regions but still
+	 * needs to initialize them, specifically, the mutex for threads.)
 	 *
 	 * We have to check the version number first, because if the version
 	 * number has not been written, it's possible that the mutex has not
@@ -151,30 +190,16 @@ __db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp)
 	 * random behavior.  If the version number isn't there (the file size
 	 * is too small) or it's 0, we know that the region is being created.
 	 */
-	(void)__db_mutex_init(&rp->lock, MUTEX_LOCK_OFFSET(rp, &rp->lock));
-	(void)__db_mutex_lock(&rp->lock,
-	    fd, dbenv == NULL ? NULL : dbenv->db_yield);
+	__db_mutex_init(&rp->lock, MUTEX_LOCK_OFFSET(rp, &rp->lock));
+	if (lock_region && (ret = __db_mutex_lock(&rp->lock, fd)) != 0)
+		return (ret);
 
 	rp->refcnt = 1;
 	rp->size = size;
 	rp->flags = 0;
 	db_version(&rp->majver, &rp->minver, &rp->patch);
 
-	if (name != NULL)
-		FREES(name);
-
-	*(void **)retp = rp;
 	return (0);
-
-err:	if (fd != -1) {
-		if (rp != NULL)
-			(void)__db_munmap(rp, rp->size);
-		(void)__db_unlink(name);
-		(void)__db_close(fd);
-	}
-	if (name != NULL)
-		FREES(name);
-	return (ret);
 }
 
 /*
@@ -205,7 +230,7 @@ __db_ropen(dbenv, appname, path, file, flags, fdp, retp)
 		return (ret);
 
 	/* Open the file. */
-	if ((ret = __db_fdopen(name, flags, DB_MUTEXDEBUG, 0, &fd)) != 0) {
+	if ((ret = __db_open(name, flags, DB_MUTEXDEBUG, 0, &fd)) != 0) {
 		__db_err(dbenv, "region open: %s: %s", name, strerror(ret));
 		goto err2;
 	}
@@ -225,8 +250,10 @@ __db_ropen(dbenv, appname, path, file, flags, fdp, retp)
 	 * flatly impossible.  Hope that mmap fails if the file is too large.
 	 *
 	 */
-	if ((ret = __db_stat(dbenv, name, fd, &size1, NULL)) != 0)
+	if ((ret = __db_ioinfo(name, fd, &size1, NULL)) != 0) {
+		__db_err(dbenv, "%s: %s", name, strerror(ret));
 		goto err2;
+	}
 
 	/* Check to make sure the first block has been written. */
 	if ((size_t)size1 < sizeof(RLAYOUT)) {
@@ -249,16 +276,17 @@ __db_ropen(dbenv, appname, path, file, flags, fdp, retp)
 
 	/* Get the region lock. */
 	if (!LF_ISSET(DB_MUTEXDEBUG))
-		(void)__db_mutex_lock(&rp->lock,
-		    fd, dbenv == NULL ? NULL : dbenv->db_yield);
+		(void)__db_mutex_lock(&rp->lock, fd);
 
 	/*
 	 * The file may have been half-written if we were descheduled between
 	 * getting the size of the file and checking the major version.  Check
 	 * to make sure we got the entire file.
 	 */
-	if ((ret = __db_stat(dbenv, name, fd, &size2, NULL)) != 0)
+	if ((ret = __db_ioinfo(name, fd, &size2, NULL)) != 0) {
+		__db_err(dbenv, "%s: %s", name, strerror(ret));
 		goto err1;
+	}
 	if (size1 != size2) {
 		ret = EAGAIN;
 		goto err1;
@@ -285,7 +313,7 @@ __db_ropen(dbenv, appname, path, file, flags, fdp, retp)
 err1:	if (!LF_ISSET(DB_MUTEXDEBUG))
 		(void)__db_mutex_unlock(&rp->lock, fd);
 err2:	if (rp != NULL)
-		(void)__db_munmap(rp, rp->size);
+		(void)__db_unmap(rp, rp->size);
 	if (fd != -1)
 		(void)__db_close(fd);
 	FREES(name);
@@ -312,8 +340,7 @@ __db_rclose(dbenv, fd, ptr)
 	fail = NULL;
 
 	/* Get the lock. */
-	if ((ret = __db_mutex_lock(&rp->lock,
-	    fd, dbenv == NULL ? NULL : dbenv->db_yield)) != 0) {
+	if ((ret = __db_mutex_lock(&rp->lock, fd)) != 0) {
 		fail = "lock get";
 		goto err;
 	}
@@ -328,7 +355,7 @@ __db_rclose(dbenv, fd, ptr)
 	}
 
 	/* Discard the region. */
-	if ((t_ret = __db_munmap(ptr, rp->size)) != 0 && fail == NULL) {
+	if ((t_ret = __db_unmap(ptr, rp->size)) != 0 && fail == NULL) {
 		ret = t_ret;
 		fail = "munmap";
 	}
@@ -392,8 +419,7 @@ __db_runlink(dbenv, appname, path, file, force)
 	/* Open and lock the region. */
 	if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
 		goto err1;
-	(void)__db_mutex_lock(&rp->lock,
-	    fd, dbenv == NULL ? NULL : dbenv->db_yield);
+	(void)__db_mutex_lock(&rp->lock, fd);
 
 	/* If the region is currently being deleted, fail. */
 	if (F_ISSET(rp, DB_R_DELETED)) {
@@ -434,8 +460,7 @@ __db_runlink(dbenv, appname, path, file, force)
 	/* Not a clue.  Try to clear the DB_R_DELETED flag. */
 	if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
 		goto err1;
-	(void)__db_mutex_lock(&rp->lock,
-	    fd, dbenv == NULL ? NULL : dbenv->db_yield);
+	(void)__db_mutex_lock(&rp->lock, fd);
 	F_CLR(rp, DB_R_DELETED);
 	/* FALLTHROUGH */
 
@@ -472,7 +497,7 @@ __db_rgrow(dbenv, fd, incr)
 	char buf[__DB_VMPAGESIZE];
 
 	/* Seek to the end of the region. */
-	if ((ret = __db_lseek(fd, 0, 0, 0, SEEK_END)) != 0)
+	if ((ret = __db_seek(fd, 0, 0, 0, SEEK_END)) != 0)
 		goto err;
 
 	/* Write nuls to the new bytes. */
@@ -500,7 +525,7 @@ __db_rgrow(dbenv, fd, incr)
 	incr -= incr % __DB_VMPAGESIZE;
 
 	/* Write the last page, not the page after the last. */
-	if ((ret = __db_lseek(fd, 0, 0, incr - __DB_VMPAGESIZE, SEEK_CUR)) != 0)
+	if ((ret = __db_seek(fd, 0, 0, incr - __DB_VMPAGESIZE, SEEK_CUR)) != 0)
 		goto err;
 	if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0)
 		goto err;
@@ -531,7 +556,7 @@ __db_rremap(dbenv, ptr, oldsize, newsize, fd, retp)
 {
 	int ret;
 
-	if ((ret = __db_munmap(ptr, oldsize)) != 0) {
+	if ((ret = __db_unmap(ptr, oldsize)) != 0) {
 		__db_err(dbenv, "region remap: munmap: %s", strerror(ret));
 		return (ret);
 	}
@@ -553,7 +578,7 @@ __db_rmap(dbenv, fd, size, retp)
 	RLAYOUT *rp;
 	int ret;
 
-	if ((ret = __db_mmap(fd, size, 0, 0, &rp)) != 0) {
+	if ((ret = __db_map(fd, size, 0, 0, (void **)&rp)) != 0) {
 		__db_err(dbenv, "region map: mmap %s", strerror(ret));
 		return (ret);
 	}
diff --git a/db2/db.h b/db2/db.h
index 6911002ed5..fb2d6bb3da 100644
--- a/db2/db.h
+++ b/db2/db.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db.h.src	10.77 (Sleepycat) 9/24/97
+ *	@(#)db.h.src	10.91 (Sleepycat) 11/3/97
  */
 
 #ifndef _DB_H_
@@ -28,9 +28,15 @@
  * XXX
  * Handle function prototypes and the keyword "const".  This steps on name
  * space that DB doesn't control, but all of the other solutions are worse.
+ *
+ * XXX
+ * While Microsoft's compiler is ANSI C compliant, it doesn't have _STDC_
+ * defined by default, you specify a command line flag or #pragma to turn
+ * it on.  Don't do that, however, because some of Microsoft's own header
+ * files won't compile.
  */
 #undef	__P
-#if defined(__STDC__) || defined(__cplusplus)
+#if defined(__STDC__) || defined(__cplusplus) || defined(_MSC_VER)
 #define	__P(protos)	protos		/* ANSI C prototypes */
 #else
 #define	const
@@ -67,8 +73,8 @@
 
 #define	DB_VERSION_MAJOR	2
 #define	DB_VERSION_MINOR	3
-#define	DB_VERSION_PATCH	10
-#define	DB_VERSION_STRING	"Sleepycat Software: DB 2.3.10: (9/24/97)"
+#define	DB_VERSION_PATCH	12
+#define	DB_VERSION_STRING	"Sleepycat Software: DB 2.3.12: (11/3/97)"
 
 typedef	u_int32_t	db_pgno_t;	/* Page number type. */
 typedef	u_int16_t	db_indx_t;	/* Page offset type. */
@@ -93,6 +99,7 @@ struct __db_lockregion;	typedef struct __db_lockregion DB_LOCKREGION;
 struct __db_lockreq;	typedef struct __db_lockreq DB_LOCKREQ;
 struct __db_locktab;	typedef struct __db_locktab DB_LOCKTAB;
 struct __db_log;	typedef struct __db_log DB_LOG;
+struct __db_log_stat;	typedef struct __db_log_stat DB_LOG_STAT;
 struct __db_lsn;	typedef struct __db_lsn DB_LSN;
 struct __db_mpool;	typedef struct __db_mpool DB_MPOOL;
 struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT;
@@ -122,6 +129,31 @@ struct __db_dbt {
 };
 
 /*
+ * DB configuration.  There are a set of functions which the application
+ * can replace with its own versions.
+ */
+#define	DB_FUNC_CALLOC	 1		/* ANSI C calloc. */
+#define	DB_FUNC_CLOSE	 2		/* POSIX 1003.1 close. */
+#define	DB_FUNC_DIRFREE	 3		/* DB: free directory list. */
+#define	DB_FUNC_DIRLIST	 4		/* DB: create directory list. */
+#define	DB_FUNC_EXISTS	 5		/* DB: return if file exists. */
+#define	DB_FUNC_FREE	 6		/* ANSI C free. */
+#define	DB_FUNC_FSYNC	 7		/* POSIX 1003.1 fsync. */
+#define	DB_FUNC_IOINFO	 8		/* DB: return file I/O information. */
+#define	DB_FUNC_MALLOC	 9		/* ANSI C malloc. */
+#define	DB_FUNC_MAP	10		/* DB: map file into shared memory. */
+#define	DB_FUNC_OPEN	11		/* POSIX 1003.1 open. */
+#define	DB_FUNC_READ	12		/* POSIX 1003.1 read. */
+#define	DB_FUNC_REALLOC	13		/* ANSI C realloc. */
+#define	DB_FUNC_SEEK	14		/* POSIX 1003.1 lseek. */
+#define	DB_FUNC_SLEEP	15		/* DB: sleep secs/usecs. */
+#define	DB_FUNC_STRDUP	16		/* ANSI C strdup. */
+#define	DB_FUNC_UNLINK	17		/* POSIX 1003.1 unlink. */
+#define	DB_FUNC_UNMAP	18		/* DB: unmap shared memory file. */
+#define	DB_FUNC_WRITE	19		/* POSIX 1003.1 write. */
+#define	DB_FUNC_YIELD	20		/* DB: yield thread to scheduler. */
+
+/*
  * Database configuration and initialization.
  */
  /*
@@ -134,21 +166,20 @@ struct __db_dbt {
 /*
  * Flags understood by db_appinit(3).
  *
- * DB_APP_INIT and DB_MUTEXDEBUG are internal only, and not documented.
+ * DB_MUTEXDEBUG is internal only, and not documented.
  */
 /*				0x00007	   COMMON MASK. */
-#define	DB_APP_INIT		0x00008	/* Appinit called, paths initialized. */
-#define	DB_INIT_LOCK		0x00010	/* Initialize locking. */
-#define	DB_INIT_LOG		0x00020	/* Initialize logging. */
-#define	DB_INIT_MPOOL		0x00040	/* Initialize mpool. */
-#define	DB_INIT_TXN		0x00080	/* Initialize transactions. */
-#define	DB_MPOOL_PRIVATE	0x00100	/* Mpool: private memory pool. */
-#define	DB_MUTEXDEBUG		0x00200	/* Do not get/set mutexes in regions. */
-#define	DB_RECOVER		0x00400	/* Run normal recovery. */
-#define	DB_RECOVER_FATAL	0x00800 /* Run catastrophic recovery. */
-#define	DB_TXN_NOSYNC		0x01000	/* Do not sync log on commit. */
-#define	DB_USE_ENVIRON		0x02000	/* Use the environment. */
-#define	DB_USE_ENVIRON_ROOT	0x04000	/* Use the environment if root. */
+#define	DB_INIT_LOCK		0x00008	/* Initialize locking. */
+#define	DB_INIT_LOG		0x00010	/* Initialize logging. */
+#define	DB_INIT_MPOOL		0x00020	/* Initialize mpool. */
+#define	DB_INIT_TXN		0x00040	/* Initialize transactions. */
+#define	DB_MPOOL_PRIVATE	0x00080	/* Mpool: private memory pool. */
+#define	DB_MUTEXDEBUG		0x00100	/* Do not get/set mutexes in regions. */
+#define	DB_RECOVER		0x00200	/* Run normal recovery. */
+#define	DB_RECOVER_FATAL	0x00400 /* Run catastrophic recovery. */
+#define	DB_TXN_NOSYNC		0x00800	/* Do not sync log on commit. */
+#define	DB_USE_ENVIRON		0x01000	/* Use the environment. */
+#define	DB_USE_ENVIRON_ROOT	0x02000	/* Use the environment if root. */
 
 /* CURRENTLY UNUSED LOCK FLAGS. */
 #define	DB_TXN_LOCK_2PL		0x00000	/* Two-phase locking. */
@@ -209,7 +240,6 @@ struct __db_env {
 	int		 lk_modes;	/* Number of lock modes in table. */
 	unsigned int	 lk_max;	/* Maximum number of locks. */
 	u_int32_t	 lk_detect;	/* Deadlock detect on every conflict. */
-	int (*db_yield) __P((void));	/* Yield function for threads. */
 
 	/* Logging. */
 	DB_LOG		*lg_info;	/* Return from log_open(). */
@@ -226,6 +256,9 @@ struct __db_env {
 	int (*tx_recover)		/* Dispatch function for recovery. */
 	    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 
+#define	DB_ENV_APPINIT		0x01	/* Paths initialized by db_appinit(). */
+#define	DB_ENV_STANDALONE	0x02	/* Test: freestanding environment. */
+#define	DB_ENV_THREAD		0x04	/* DB_ENV is multi-threaded. */
 	u_int32_t	 flags;		/* Flags. */
 };
 
@@ -301,7 +334,7 @@ struct __db_info {
 #define	DB_CURRENT	0x000010	/* c_get(), c_put(), log_get() */
 #define	DB_FIRST	0x000020	/* c_get(), log_get() */
 #define	DB_FLUSH	0x000040	/* log_put() */
-#define	DB_GET_RECNO	0x000080	/* c_get() */
+#define	DB_GET_RECNO	0x000080	/* get(), c_get() */
 #define	DB_KEYFIRST	0x000100	/* c_put() */
 #define	DB_KEYLAST	0x000200	/* c_put() */
 #define	DB_LAST		0x000400	/* c_get(), log_get() */
@@ -312,7 +345,7 @@ struct __db_info {
 #define	DB_RECORDCOUNT	0x008000	/* stat() */
 #define	DB_SET		0x010000	/* c_get(), log_get() */
 #define	DB_SET_RANGE	0x020000	/* c_get() */
-#define	DB_SET_RECNO	0x040000	/* get(), c_get() */
+#define	DB_SET_RECNO	0x040000	/* c_get() */
 
 /* DB (user visible) error return codes. */
 #define	DB_INCOMPLETE		( -1)	/* Sync didn't finish. */
@@ -472,6 +505,8 @@ struct __db_bt_stat {
 	u_int32_t bt_get;		/* Items retrieved. */
 	u_int32_t bt_cache_hit;		/* Hits in fast-insert code. */
 	u_int32_t bt_cache_miss;	/* Misses in fast-insert code. */
+	u_int32_t bt_magic;		/* Magic number. */
+	u_int32_t bt_version;		/* Version number. */
 };
 
 #if defined(__cplusplus)
@@ -479,6 +514,7 @@ extern "C" {
 #endif
 int   db_appinit __P((const char *, char * const *, DB_ENV *, int));
 int   db_appexit __P((DB_ENV *));
+int   db_jump_set __P((void *, int));
 int   db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **));
 char *db_version __P((int *, int *, int *));
 #if defined(__cplusplus)
@@ -576,6 +612,22 @@ struct __db_lsn {
 	u_int32_t	offset;		/* File offset. */
 };
 
+/* Log statistics structure. */
+struct __db_log_stat {
+	u_int32_t st_magic;		/* Log file magic number. */
+	u_int32_t st_version;		/* Log file version number. */
+	int st_mode;			/* Log file mode. */
+	u_int32_t st_lg_max;		/* Maximum log file size. */
+	u_int32_t st_w_bytes;		/* Bytes to log. */
+	u_int32_t st_w_mbytes;		/* Megabytes to log. */
+	u_int32_t st_wc_bytes;		/* Bytes to log since checkpoint. */
+	u_int32_t st_wc_mbytes;		/* Megabytes to log since checkpoint. */
+	u_int32_t st_wcount;		/* Total syncs to the log. */
+	u_int32_t st_scount;		/* Total writes to the log. */
+	u_int32_t st_region_wait;	/* Region lock granted after wait. */
+	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
+};
+
 #if defined(__cplusplus)
 extern "C" {
 #endif
@@ -588,6 +640,7 @@ int	 log_get __P((DB_LOG *, DB_LSN *, DBT *, int));
 int	 log_open __P((const char *, int, int, DB_ENV *, DB_LOG **));
 int	 log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
 int	 log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *));
+int	 log_stat __P((DB_LOG *, DB_LOG_STAT **, void *(*)(size_t)));
 int	 log_unlink __P((const char *, int, DB_ENV *));
 int	 log_unregister __P((DB_LOG *, u_int32_t));
 #if defined(__cplusplus)
@@ -610,30 +663,35 @@ int	 log_unregister __P((DB_LOG *, u_int32_t));
 /* Mpool statistics structure. */
 struct __db_mpool_stat {
 	size_t st_cachesize;		/* Cache size. */
-	unsigned long st_cache_hit;	/* Pages found in the cache. */
-	unsigned long st_cache_miss;	/* Pages not found in the cache. */
-	unsigned long st_map;		/* Pages from mapped files. */
-	unsigned long st_page_create;	/* Pages created in the cache. */
-	unsigned long st_page_in;	/* Pages read in. */
-	unsigned long st_page_out;	/* Pages written out. */
-	unsigned long st_ro_evict;	/* Read-only pages evicted. */
-	unsigned long st_rw_evict;	/* Read-write pages evicted. */
-	unsigned long st_hash_buckets;	/* Number of hash buckets. */
-	unsigned long st_hash_searches;	/* Total hash chain searches. */
-	unsigned long st_hash_longest;	/* Longest hash chain searched. */
-	unsigned long st_hash_examined;	/* Total hash entries searched. */
+	u_int32_t st_cache_hit;		/* Pages found in the cache. */
+	u_int32_t st_cache_miss;	/* Pages not found in the cache. */
+	u_int32_t st_map;		/* Pages from mapped files. */
+	u_int32_t st_page_create;	/* Pages created in the cache. */
+	u_int32_t st_page_in;		/* Pages read in. */
+	u_int32_t st_page_out;		/* Pages written out. */
+	u_int32_t st_ro_evict;		/* Clean pages forced from the cache. */
+	u_int32_t st_rw_evict;		/* Dirty pages forced from the cache. */
+	u_int32_t st_hash_buckets;	/* Number of hash buckets. */
+	u_int32_t st_hash_searches;	/* Total hash chain searches. */
+	u_int32_t st_hash_longest;	/* Longest hash chain searched. */
+	u_int32_t st_hash_examined;	/* Total hash entries searched. */
+	u_int32_t st_page_clean;	/* Clean pages. */
+	u_int32_t st_page_dirty;	/* Dirty pages. */
+	u_int32_t st_page_trickle;	/* Pages written by memp_trickle. */
+	u_int32_t st_region_wait;	/* Region lock granted after wait. */
+	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
 };
 
 /* Mpool file statistics structure. */
 struct __db_mpool_fstat {
 	char *file_name;		/* File name. */
 	size_t st_pagesize;		/* Page size. */
-	unsigned long st_cache_hit;	/* Pages found in the cache. */
-	unsigned long st_cache_miss;	/* Pages not found in the cache. */
-	unsigned long st_map;		/* Pages from mapped files. */
-	unsigned long st_page_create;	/* Pages created in the cache. */
-	unsigned long st_page_in;	/* Pages read in. */
-	unsigned long st_page_out;	/* Pages written out. */
+	u_int32_t st_cache_hit;		/* Pages found in the cache. */
+	u_int32_t st_cache_miss;	/* Pages not found in the cache. */
+	u_int32_t st_map;		/* Pages from mapped files. */
+	u_int32_t st_page_create;	/* Pages created in the cache. */
+	u_int32_t st_page_in;		/* Pages read in. */
+	u_int32_t st_page_out;		/* Pages written out. */
 };
 
 #if defined(__cplusplus)
@@ -654,6 +712,7 @@ int	memp_register __P((DB_MPOOL *, int,
 int	memp_stat __P((DB_MPOOL *,
 	    DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, void *(*)(size_t)));
 int	memp_sync __P((DB_MPOOL *, DB_LSN *));
+int	memp_trickle __P((DB_MPOOL *, int, int *));
 int	memp_unlink __P((const char *, int, DB_ENV *));
 #if defined(__cplusplus)
 };
diff --git a/db2/db/db.c b/db2/db/db.c
index 9ebe73cf6e..50b14eba7c 100644
--- a/db2/db/db.c
+++ b/db2/db/db.c
@@ -44,7 +44,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db.c	10.41 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)db.c	10.44 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -125,13 +125,19 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
 	if ((ret = __db_fchk(dbenv, "db_open", flags, OKFLAGS)) != 0)
 		return (ret);
 
+	if (dbenv != NULL &&
+	    LF_ISSET(DB_THREAD) && !F_ISSET(dbenv, DB_ENV_THREAD)) {
+		__db_err(dbenv, "environment not created using DB_THREAD");
+		return (EINVAL);
+	}
+
 	/* Initialize for error return. */
 	fd = -1;
 	need_fileid = 1;
 	real_name = NULL;
 
 	/* Allocate the DB structure, reference the DB_ENV structure. */
-	if ((dbp = (DB *)calloc(1, sizeof(DB))) == NULL) {
+	if ((dbp = (DB *)__db_calloc(1, sizeof(DB))) == NULL) {
 		__db_err(dbenv, "%s", strerror(ENOMEM));
 		return (ENOMEM);
 	}
@@ -239,7 +245,7 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
 		 */
 		retry_cnt = 0;
 open_retry:	if (LF_ISSET(DB_CREATE)) {
-			if ((ret = __db_fdopen(real_name, flags | DB_EXCL,
+			if ((ret = __db_open(real_name, flags | DB_EXCL,
 			    OKFLAGS | DB_EXCL, mode, &fd)) != 0)
 				if (ret == EEXIST) {
 					LF_CLR(DB_CREATE);
@@ -250,7 +256,7 @@ open_retry:	if (LF_ISSET(DB_CREATE)) {
 					goto err;
 				}
 		} else
-			if ((ret = __db_fdopen(real_name,
+			if ((ret = __db_open(real_name,
 			    flags, OKFLAGS, mode, &fd)) != 0) {
 				__db_err(dbenv, "%s: %s", fname, strerror(ret));
 				goto err;
@@ -264,8 +270,11 @@ open_retry:	if (LF_ISSET(DB_CREATE)) {
 		 */
 		if (dbp->pgsize == 0) {
 			if ((ret =
-			    __db_stat(dbenv, real_name, fd, NULL, &io)) != 0)
+			    __db_ioinfo(real_name, fd, NULL, &io)) != 0) {
+				__db_err(dbenv,
+				    "%s: %s", real_name, strerror(ret));
 				goto err;
+			}
 			if (io < 512)
 				io = 512;
 			if (io > 16 * 1024)
@@ -477,7 +486,7 @@ empty:	/*
 
 		if (dbenv == NULL) {
 			if ((dbp->mp_dbenv =
-			    (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) {
+			    (DB_ENV *)__db_calloc(sizeof(DB_ENV), 1)) == NULL) {
 				ret = ENOMEM;
 				goto err;
 			}
@@ -491,9 +500,9 @@ empty:	/*
 			restore = 1;
 		}
 		envp->mp_size = cachesize;
-		F_SET(envp, DB_MPOOL_PRIVATE);
-		if ((ret = memp_open(NULL,
-		    DB_CREATE, S_IRUSR | S_IWUSR, envp, &dbp->mp)) != 0)
+		if ((ret = memp_open(NULL, DB_CREATE | DB_MPOOL_PRIVATE |
+		    (F_ISSET(dbp, DB_AM_THREAD) ? DB_THREAD : 0),
+		    S_IRUSR | S_IWUSR, envp, &dbp->mp)) != 0)
 			goto err;
 		if (restore)
 			*dbenv = t_dbenv;
@@ -725,7 +734,8 @@ db_close(dbp, flags)
 	}
 
 	/* Sync the memory pool. */
-	if ((t_ret = memp_fsync(dbp->mpf)) != 0 && ret == 0)
+	if ((t_ret = memp_fsync(dbp->mpf)) != 0 &&
+	    t_ret != DB_INCOMPLETE && ret == 0)
 		ret = t_ret;
 
 	/* Close the memory pool file. */
diff --git a/db2/db/db.src b/db2/db/db.src
index a3e2f7b75c..07d98123ac 100644
--- a/db2/db/db.src
+++ b/db2/db/db.src
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
- *	@(#)db.src	10.3 (Sleepycat) 8/18/97
+ *	@(#)db.src	10.4 (Sleepycat) 11/2/97
  */
 #include "config.h"
 
@@ -81,15 +81,17 @@ POINTER	nextlsn		DB_LSN *	lu
 END
 
 /*
- * ovref -- Handles increment of overflow page reference count.
+ * ovref -- Handles increment/decrement of overflow page reference count.
  *
  * fileid:	identifies the file being modified.
- * pgno:	page number being incremented.
- * lsn		the page's original lsn.
+ * pgno:	page number whose ref count is being incremented/decremented.
+ * adjust:	the adjustment being made.
+ * lsn:		the page's original lsn.
  */
 BEGIN ovref
 ARG	fileid		u_int32_t	lu
 ARG	pgno		db_pgno_t	lu
+ARG	adjust		int32_t		ld
 POINTER	lsn		DB_LSN *	lu
 END
 
diff --git a/db2/db/db_auto.c b/db2/db/db_auto.c
index 6922504383..d40d964542 100644
--- a/db2/db/db_auto.c
+++ b/db2/db/db_auto.c
@@ -62,7 +62,7 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(u_int32_t) + (hdr == NULL ? 0 : hdr->size)
 	    + sizeof(u_int32_t) + (dbt == NULL ? 0 : dbt->size)
 	    + sizeof(*pagelsn);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -114,7 +114,7 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -176,7 +176,7 @@ __db_addrem_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tpagelsn: [%lu][%lu]\n",
 	    (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -191,7 +191,7 @@ __db_addrem_read(recbuf, argpp)
 	__db_addrem_args *argp;
 	u_int8_t *bp;
 
-	argp = (__db_addrem_args *)malloc(sizeof(__db_addrem_args) +
+	argp = (__db_addrem_args *)__db_malloc(sizeof(__db_addrem_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -266,7 +266,7 @@ int __db_split_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(pgno)
 	    + sizeof(u_int32_t) + (pageimage == NULL ? 0 : pageimage->size)
 	    + sizeof(*pagelsn);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -304,7 +304,7 @@ int __db_split_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -355,7 +355,7 @@ __db_split_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tpagelsn: [%lu][%lu]\n",
 	    (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -370,7 +370,7 @@ __db_split_read(recbuf, argpp)
 	__db_split_args *argp;
 	u_int8_t *bp;
 
-	argp = (__db_split_args *)malloc(sizeof(__db_split_args) +
+	argp = (__db_split_args *)__db_malloc(sizeof(__db_split_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -447,7 +447,7 @@ int __db_big_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(*pagelsn)
 	    + sizeof(*prevlsn)
 	    + sizeof(*nextlsn);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -499,7 +499,7 @@ int __db_big_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -556,7 +556,7 @@ __db_big_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tnextlsn: [%lu][%lu]\n",
 	    (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -571,7 +571,7 @@ __db_big_read(recbuf, argpp)
 	__db_big_args *argp;
 	u_int8_t *bp;
 
-	argp = (__db_big_args *)malloc(sizeof(__db_big_args) +
+	argp = (__db_big_args *)__db_malloc(sizeof(__db_big_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -610,16 +610,17 @@ __db_big_read(recbuf, argpp)
 /*
  * PUBLIC: int __db_ovref_log
  * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
- * PUBLIC:     u_int32_t, db_pgno_t, DB_LSN *));
+ * PUBLIC:     u_int32_t, db_pgno_t, int32_t, DB_LSN *));
  */
 int __db_ovref_log(logp, txnid, ret_lsnp, flags,
-	fileid, pgno, lsn)
+	fileid, pgno, adjust, lsn)
 	DB_LOG *logp;
 	DB_TXN *txnid;
 	DB_LSN *ret_lsnp;
 	u_int32_t flags;
 	u_int32_t fileid;
 	db_pgno_t pgno;
+	int32_t adjust;
 	DB_LSN * lsn;
 {
 	DBT logrec;
@@ -639,8 +640,9 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags,
 	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
 	    + sizeof(fileid)
 	    + sizeof(pgno)
+	    + sizeof(adjust)
 	    + sizeof(*lsn);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -654,6 +656,8 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(fileid);
 	memcpy(bp, &pgno, sizeof(pgno));
 	bp += sizeof(pgno);
+	memcpy(bp, &adjust, sizeof(adjust));
+	bp += sizeof(adjust);
 	if (lsn != NULL)
 		memcpy(bp, lsn, sizeof(*lsn));
 	else
@@ -666,7 +670,7 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -704,10 +708,11 @@ __db_ovref_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->prev_lsn.offset);
 	printf("\tfileid: %lu\n", (u_long)argp->fileid);
 	printf("\tpgno: %lu\n", (u_long)argp->pgno);
+	printf("\tadjust: %ld\n", (long)argp->adjust);
 	printf("\tlsn: [%lu][%lu]\n",
 	    (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -722,7 +727,7 @@ __db_ovref_read(recbuf, argpp)
 	__db_ovref_args *argp;
 	u_int8_t *bp;
 
-	argp = (__db_ovref_args *)malloc(sizeof(__db_ovref_args) +
+	argp = (__db_ovref_args *)__db_malloc(sizeof(__db_ovref_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -738,6 +743,8 @@ __db_ovref_read(recbuf, argpp)
 	bp += sizeof(argp->fileid);
 	memcpy(&argp->pgno, bp, sizeof(argp->pgno));
 	bp += sizeof(argp->pgno);
+	memcpy(&argp->adjust, bp, sizeof(argp->adjust));
+	bp += sizeof(argp->adjust);
 	memcpy(&argp->lsn, bp,  sizeof(argp->lsn));
 	bp += sizeof(argp->lsn);
 	*argpp = argp;
@@ -787,7 +794,7 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(*lsn_prev)
 	    + sizeof(next)
 	    + sizeof(*lsn_next);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -827,7 +834,7 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -874,7 +881,7 @@ __db_relink_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tlsn_next: [%lu][%lu]\n",
 	    (u_long)argp->lsn_next.file, (u_long)argp->lsn_next.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -889,7 +896,7 @@ __db_relink_read(recbuf, argpp)
 	__db_relink_args *argp;
 	u_int8_t *bp;
 
-	argp = (__db_relink_args *)malloc(sizeof(__db_relink_args) +
+	argp = (__db_relink_args *)__db_malloc(sizeof(__db_relink_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -957,7 +964,7 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(*lsn)
 	    + sizeof(nextpgno)
 	    + sizeof(*nextlsn);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -990,7 +997,7 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -1034,7 +1041,7 @@ __db_addpage_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tnextlsn: [%lu][%lu]\n",
 	    (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -1049,7 +1056,7 @@ __db_addpage_read(recbuf, argpp)
 	__db_addpage_args *argp;
 	u_int8_t *bp;
 
-	argp = (__db_addpage_args *)malloc(sizeof(__db_addpage_args) +
+	argp = (__db_addpage_args *)__db_malloc(sizeof(__db_addpage_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -1114,7 +1121,7 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(u_int32_t) + (key == NULL ? 0 : key->size)
 	    + sizeof(u_int32_t) + (data == NULL ? 0 : data->size)
 	    + sizeof(arg_flags);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -1165,7 +1172,7 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -1231,7 +1238,7 @@ __db_debug_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\n");
 	printf("\targ_flags: %lu\n", (u_long)argp->arg_flags);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -1246,7 +1253,7 @@ __db_debug_read(recbuf, argpp)
 	__db_debug_args *argp;
 	u_int8_t *bp;
 
-	argp = (__db_debug_args *)malloc(sizeof(__db_debug_args) +
+	argp = (__db_debug_args *)__db_malloc(sizeof(__db_debug_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -1303,7 +1310,7 @@ int __db_noop_log(logp, txnid, ret_lsnp, flags)
 	} else
 		lsnp = &txnid->last_lsn;
 	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -1320,7 +1327,7 @@ int __db_noop_log(logp, txnid, ret_lsnp, flags)
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -1357,7 +1364,7 @@ __db_noop_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->prev_lsn.file,
 	    (u_long)argp->prev_lsn.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -1372,7 +1379,7 @@ __db_noop_read(recbuf, argpp)
 	__db_noop_args *argp;
 	u_int8_t *bp;
 
-	argp = (__db_noop_args *)malloc(sizeof(__db_noop_args) +
+	argp = (__db_noop_args *)__db_malloc(sizeof(__db_noop_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
diff --git a/db2/db/db_dispatch.c b/db2/db/db_dispatch.c
index 3d7b162d75..a4bcdb7628 100644
--- a/db2/db/db_dispatch.c
+++ b/db2/db/db_dispatch.c
@@ -43,7 +43,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_dispatch.c	10.5 (Sleepycat) 7/2/97";
+static const char sccsid[] = "@(#)db_dispatch.c	10.6 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -155,12 +155,12 @@ __db_add_recovery(dbenv, func, ndx)
 	if (ndx >= dispatch_size) {
 		if (dispatch_table == NULL)
 			dispatch_table = (int (**)
-			    __P((DB_LOG *, DBT *, DB_LSN *, int, void *)))
-			    malloc(DB_user_BEGIN * sizeof(dispatch_table[0]));
+			 __P((DB_LOG *, DBT *, DB_LSN *, int, void *)))
+			 __db_malloc(DB_user_BEGIN * sizeof(dispatch_table[0]));
 		else
 			dispatch_table = (int (**)
 			    __P((DB_LOG *, DBT *, DB_LSN *, int, void *)))
-			    realloc(dispatch_table, (DB_user_BEGIN +
+			    __db_realloc(dispatch_table, (DB_user_BEGIN +
 			    dispatch_size) * sizeof(dispatch_table[0]));
 		if (dispatch_table == NULL) {
 			__db_err(dbenv, "%s", strerror(ENOMEM));
@@ -187,8 +187,8 @@ __db_txnlist_init(retp)
 {
 	__db_txnhead *headp;
 
-	if ((headp =
-	    (struct __db_txnhead *)malloc(sizeof(struct __db_txnhead))) == NULL)
+	if ((headp = (struct __db_txnhead *)
+	    __db_malloc(sizeof(struct __db_txnhead))) == NULL)
 		return (ENOMEM);
 
 	LIST_INIT(&headp->head);
@@ -212,7 +212,7 @@ __db_txnlist_add(listp, txnid)
 	__db_txnhead *hp;
 	__db_txnlist *elp;
 
-	if ((elp = (__db_txnlist *)malloc(sizeof(__db_txnlist))) == NULL)
+	if ((elp = (__db_txnlist *)__db_malloc(sizeof(__db_txnlist))) == NULL)
 		return (ENOMEM);
 
 	elp->txnid = txnid;
diff --git a/db2/db/db_dup.c b/db2/db/db_dup.c
index 66c6c2616a..faeefa0744 100644
--- a/db2/db/db_dup.c
+++ b/db2/db/db_dup.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_dup.c	10.9 (Sleepycat) 9/3/97";
+static const char sccsid[] = "@(#)db_dup.c	10.10 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -217,7 +217,7 @@ __db_dsplit(dbp, hp, indxp, size, newfunc)
 	indx = *indxp;
 
 	/* Create a temporary page to do compaction onto. */
-	if ((tp = (PAGE *)malloc(dbp->pgsize)) == NULL)
+	if ((tp = (PAGE *)__db_malloc(dbp->pgsize)) == NULL)
 		return (ENOMEM);
 #ifdef DEBUG
 	memset(tp, 0xff, dbp->pgsize);
diff --git a/db2/db/db_overflow.c b/db2/db/db_overflow.c
index 2340e9e358..8c6619f228 100644
--- a/db2/db/db_overflow.c
+++ b/db2/db/db_overflow.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_overflow.c	10.4 (Sleepycat) 7/2/97";
+static const char sccsid[] = "@(#)db_overflow.c	10.7 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -121,14 +121,14 @@ __db_goff(dbp, dbt, tlen, pgno, bpp, bpsz)
 		}
 	} else if (F_ISSET(dbt, DB_DBT_MALLOC)) {
 		dbt->data = dbp->db_malloc == NULL ?
-		    (void *)malloc(needed + 1) :
+		    (void *)__db_malloc(needed + 1) :
 		    (void *)dbp->db_malloc(needed + 1);
 		if (dbt->data == NULL)
 			return (ENOMEM);
 	} else if (*bpsz == 0 || *bpsz < needed) {
 		*bpp = (*bpp == NULL ?
-		    (void *)malloc(needed + 1) :
-		    (void *)realloc(*bpp, needed + 1));
+		    (void *)__db_malloc(needed + 1) :
+		    (void *)__db_realloc(*bpp, needed + 1));
 		if (*bpp == NULL)
 			return (ENOMEM);
 		*bpsz = needed + 1;
@@ -256,15 +256,16 @@ __db_poff(dbp, dbt, pgnop, newfunc)
 }
 
 /*
- * __db_ioff --
- *	Increment the reference count on an overflow page.
+ * __db_ovref --
+ *	Increment/decrement the reference count on an overflow page.
  *
- * PUBLIC: int __db_ioff __P((DB *, db_pgno_t));
+ * PUBLIC: int __db_ovref __P((DB *, db_pgno_t, int));
  */
 int
-__db_ioff(dbp, pgno)
+__db_ovref(dbp, pgno, adjust)
 	DB *dbp;
 	db_pgno_t pgno;
+	int adjust;
 {
 	PAGE *h;
 	int ret;
@@ -274,10 +275,12 @@ __db_ioff(dbp, pgno)
 		return (ret);
 	}
 
-	++OV_REF(h);
-	if (DB_LOGGING(dbp) && (ret = __db_ovref_log(dbp->dbenv->lg_info,
-	    dbp->txn, &LSN(h), 0, dbp->log_fileid, h->pgno, &LSN(h))) != 0)
-		return (ret);
+	if (DB_LOGGING(dbp))
+		if ((ret = __db_ovref_log(dbp->dbenv->lg_info, dbp->txn,
+		    &LSN(h), 0, dbp->log_fileid, h->pgno, (int32_t)adjust,
+		    &LSN(h))) != 0)
+			return (ret);
+	OV_REF(h) += adjust;
 
 	(void)memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
 	return (0);
@@ -311,9 +314,8 @@ __db_doff(dbp, pgno, freefunc)
 		 * one key/data item, decrement the reference count and return.
 		 */
 		if (TYPE(pagep) == P_OVERFLOW && OV_REF(pagep) > 1) {
-			--OV_REF(pagep);
-			(void)memp_fput(dbp->mpf, pagep, DB_MPOOL_DIRTY);
-			return (0);
+			(void)memp_fput(dbp->mpf, pagep, 0);
+			return (__db_ovref(dbp, pgno, -1));
 		}
 
 		if (DB_LOGGING(dbp)) {
diff --git a/db2/db/db_pr.c b/db2/db/db_pr.c
index 09d8057da4..6b6171a13c 100644
--- a/db2/db/db_pr.c
+++ b/db2/db/db_pr.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_pr.c	10.17 (Sleepycat) 9/15/97";
+static const char sccsid[] = "@(#)db_pr.c	10.19 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -183,7 +183,6 @@ __db_prbtree(dbp)
 	};
 	BTMETA *mp;
 	BTREE *t;
-	DB_LOCK lock;
 	EPG *epg;
 	FILE *fp;
 	RECNO *rp;
@@ -195,8 +194,6 @@ __db_prbtree(dbp)
 
 	(void)fprintf(fp, "%s\nOn-page metadata:\n", DB_LINE);
 	i = PGNO_METADATA;
-	if ((ret = __bam_lget(dbp, 0, PGNO_METADATA, DB_LOCK_READ, &lock)) != 0)
-		return (ret);
 
 	if ((ret = __bam_pget(dbp, (PAGE **)&mp, &i, 0)) != 0)
 		return (ret);
@@ -211,7 +208,6 @@ __db_prbtree(dbp)
 	__db_prflags(mp->flags, mfn);
 	(void)fprintf(fp, "\n");
 	(void)memp_fput(dbp->mpf, mp, 0);
-	(void)__bam_lput(dbp, lock);
 
 	(void)fprintf(fp, "%s\nDB_INFO:\n", DB_LINE);
 	(void)fprintf(fp, "bt_maxkey: %lu bt_minkey: %lu\n",
@@ -416,7 +412,8 @@ __db_prpage(h, all)
 	    (TYPE(h) == P_LRECNO && h->pgno == PGNO_ROOT))
 		fprintf(fp, " total records: %4lu", (u_long)RE_NREC(h));
 	fprintf(fp, "\n");
-	if (TYPE(h) == P_LBTREE || TYPE(h) == P_LRECNO)
+	if (TYPE(h) == P_LBTREE || TYPE(h) == P_LRECNO ||
+	    TYPE(h) == P_DUPLICATE || TYPE(h) == P_OVERFLOW)
 		fprintf(fp, "    prev: %4lu next: %4lu",
 		    (u_long)PREV_PGNO(h), (u_long)NEXT_PGNO(h));
 	if (TYPE(h) == P_IBTREE || TYPE(h) == P_LBTREE)
diff --git a/db2/db/db_rec.c b/db2/db/db_rec.c
index 900b0ed579..2c9ca9abe0 100644
--- a/db2/db/db_rec.c
+++ b/db2/db/db_rec.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_rec.c	10.8 (Sleepycat) 8/22/97";
+static const char sccsid[] = "@(#)db_rec.c	10.10 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -330,7 +330,7 @@ out:	REC_CLOSE;
 
 /*
  * __db_ovref_recover --
- *	Recovery function for __db_ioff().
+ *	Recovery function for __db_ovref().
  *
  * PUBLIC: int __db_ovref_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
@@ -357,22 +357,21 @@ __db_ovref_recover(logp, dbtp, lsnp, redo, info)
 	}
 
 	modified = 0;
-	if (log_compare(lsnp, &argp->lsn) == 0 && redo) {
+	if (log_compare(&LSN(pagep), &argp->lsn) == 0 && redo) {
 		/* Need to redo update described. */
-		++OV_REF(pagep);
+		OV_REF(pagep) += argp->adjust;
 
 		pagep->lsn = *lsnp;
 		modified = 1;
 	} else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) {
 		/* Need to undo update described. */
-		--OV_REF(pagep);
+		OV_REF(pagep) -= argp->adjust;
 
 		pagep->lsn = argp->lsn;
 		modified = 1;
 	}
-	ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0);
-
-	*lsnp = argp->prev_lsn;
+	if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
+		*lsnp = argp->prev_lsn;
 
 out:	REC_CLOSE;
 }
@@ -413,7 +412,7 @@ __db_relink_recover(logp, dbtp, lsnp, redo, info)
 		goto next;
 	}
 	modified = 0;
-	if (log_compare(lsnp, &argp->lsn) == 0 && redo) {
+	if (log_compare(&LSN(pagep), &argp->lsn) == 0 && redo) {
 		/* Redo the relink. */
 		pagep->lsn = *lsnp;
 		modified = 1;
@@ -438,7 +437,7 @@ next:	if ((ret = memp_fget(mpf, &argp->next, 0, &pagep)) != 0) {
 		goto prev;
 	}
 	modified = 0;
-	if (log_compare(lsnp, &argp->lsn_next) == 0 && redo) {
+	if (log_compare(&LSN(pagep), &argp->lsn_next) == 0 && redo) {
 		/* Redo the relink. */
 		pagep->prev_pgno = argp->prev;
 
@@ -464,7 +463,7 @@ prev:	if ((ret = memp_fget(mpf, &argp->prev, 0, &pagep)) != 0) {
 		goto done;
 	}
 	modified = 0;
-	if (log_compare(lsnp, &argp->lsn_prev) == 0 && redo) {
+	if (log_compare(&LSN(pagep), &argp->lsn_prev) == 0 && redo) {
 		/* Redo the relink. */
 		pagep->next_pgno = argp->next;
 
diff --git a/db2/db/db_ret.c b/db2/db/db_ret.c
index ee2bc82f87..bcec308b95 100644
--- a/db2/db/db_ret.c
+++ b/db2/db/db_ret.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_ret.c	10.7 (Sleepycat) 9/15/97";
+static const char sccsid[] = "@(#)db_ret.c	10.8 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -122,7 +122,7 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc)
 	 */
 	if (F_ISSET(dbt, DB_DBT_MALLOC)) {
 		dbt->data = db_malloc == NULL ?
-		    (void *)malloc(len + 1) :
+		    (void *)__db_malloc(len + 1) :
 		    (void *)db_malloc(len + 1);
 		if (dbt->data == NULL)
 			return (ENOMEM);
@@ -134,8 +134,8 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc)
 	} else {
 		if (*memsize == 0 || *memsize < len) {
 			*memp = *memp == NULL ?
-			    (void *)malloc(len + 1) :
-			    (void *)realloc(*memp, len + 1);
+			    (void *)__db_malloc(len + 1) :
+			    (void *)__db_realloc(*memp, len + 1);
 			if (*memp == NULL) {
 				*memsize = 0;
 				return (ENOMEM);
diff --git a/db2/db/db_thread.c b/db2/db/db_thread.c
index 170baf5345..d9086918dd 100644
--- a/db2/db/db_thread.c
+++ b/db2/db/db_thread.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_thread.c	8.12 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)db_thread.c	8.13 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -42,8 +42,7 @@ __db_gethandle(dbp, am_func, dbpp)
 	DB *ret_dbp;
 	int ret, t_ret;
 
-	if ((ret = __db_mutex_lock((db_mutex_t *)dbp->mutexp, -1,
-	    dbp->dbenv == NULL ? NULL : dbp->dbenv->db_yield)) != 0)
+	if ((ret = __db_mutex_lock((db_mutex_t *)dbp->mutexp, -1)) != 0)
 		return (ret);
 
 	if ((ret_dbp = LIST_FIRST(&dbp->handleq)) != NULL)
@@ -51,7 +50,7 @@ __db_gethandle(dbp, am_func, dbpp)
 		LIST_REMOVE(ret_dbp, links);
 	else {
 		/* Allocate a new handle. */
-		if ((ret_dbp = (DB *)malloc(sizeof(*dbp))) == NULL) {
+		if ((ret_dbp = (DB *)__db_malloc(sizeof(*dbp))) == NULL) {
 			ret = ENOMEM;
 			goto err;
 		}
@@ -94,8 +93,7 @@ __db_puthandle(dbp)
 	int ret;
 
 	master = dbp->master;
-	if ((ret = __db_mutex_lock((db_mutex_t *)master->mutexp, -1,
-	    dbp->dbenv == NULL ? NULL : dbp->dbenv->db_yield)) != 0)
+	if ((ret = __db_mutex_lock((db_mutex_t *)master->mutexp, -1)) != 0)
 		return (ret);
 
 	LIST_INSERT_HEAD(&master->handleq, dbp, links);
diff --git a/db2/db185/db185.c b/db2/db185/db185.c
index bf5e37edcb..1affdcdf0d 100644
--- a/db2/db185/db185.c
+++ b/db2/db185/db185.c
@@ -11,7 +11,7 @@
 static const char copyright[] =
 "@(#) Copyright (c) 1997\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db185.c	8.13 (Sleepycat) 8/24/97";
+static const char sccsid[] = "@(#)db185.c	8.14 (Sleepycat) 10/25/97";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -52,7 +52,7 @@ __dbopen(file, oflags, mode, type, openinfo)
 	DB_INFO dbinfo, *dbinfop;
 	int s_errno;
 
-	if ((db185p = (DB185 *)calloc(1, sizeof(DB185))) == NULL)
+	if ((db185p = (DB185 *)__db_calloc(1, sizeof(DB185))) == NULL)
 		return (NULL);
 	dbinfop = NULL;
 	memset(&dbinfo, 0, sizeof(dbinfo));
@@ -119,7 +119,7 @@ __dbopen(file, oflags, mode, type, openinfo)
 		 */
 		if (file != NULL) {
 			if (oflags & O_CREAT && __db_exists(file, NULL) != 0)
-				(void)close(open(file, oflags, mode));
+				(void)__os_close(open(file, oflags, mode));
 			dbinfop->re_source = (char *)file;
 			file = NULL;
 		}
@@ -131,7 +131,7 @@ __dbopen(file, oflags, mode, type, openinfo)
 			 */
 #define	BFMSG	"DB: DB 1.85's recno bfname field is not supported.\n"
 			if (ri->bfname != NULL) {
-				(void)write(2, BFMSG, sizeof(BFMSG) - 1);
+				(void)__os_write(2, BFMSG, sizeof(BFMSG) - 1);
 				goto einval;
 			}
 
@@ -183,7 +183,7 @@ __dbopen(file, oflags, mode, type, openinfo)
 	 */
 	if ((__set_errno(db_open(file,
 	    type, __db_oflags(oflags), mode, NULL, dbinfop, &dbp))) != 0) {
-		free(db185p);
+		__db_free(db185p);
 		return (NULL);
 	}
 
@@ -192,7 +192,7 @@ __dbopen(file, oflags, mode, type, openinfo)
 	    != 0) {
 		s_errno = errno;
 		(void)dbp->close(dbp, 0);
-		free(db185p);
+		__db_free(db185p);
 		__set_errno(s_errno);
 		return (NULL);
 	}
@@ -200,7 +200,7 @@ __dbopen(file, oflags, mode, type, openinfo)
 	db185p->internal = dbp;
 	return (db185p);
 
-einval:	free(db185p);
+einval:	__db_free(db185p);
 	__set_errno(EINVAL);
 	return (NULL);
 }
@@ -216,7 +216,7 @@ db185_close(db185p)
 
 	__set_errno(dbp->close(dbp, 0));
 
-	free(db185p);
+	__db_free(db185p);
 
 	return (errno == 0 ? 0 : -1);
 }
@@ -461,7 +461,7 @@ db185_sync(db185p, flags)
 		 * We can't support the R_RECNOSYNC flag.
 		 */
 #define	RSMSG	"DB: DB 1.85's R_RECNOSYNC sync flag is not supported.\n"
-		(void)write(2, RSMSG, sizeof(RSMSG) - 1);
+		(void)__os_write(2, RSMSG, sizeof(RSMSG) - 1);
 		goto einval;
 	default:
 		goto einval;
diff --git a/db2/db_int.h b/db2/db_int.h
index 56dfddb73f..1f6c790345 100644
--- a/db2/db_int.h
+++ b/db2/db_int.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_int.h.src	10.30 (Sleepycat) 9/23/97
+ *	@(#)db_int.h.src	10.36 (Sleepycat) 10/31/97
  */
 
 #ifndef _DB_INTERNAL_H_
@@ -12,6 +12,7 @@
 
 #include "db.h"				/* Standard DB include file. */
 #include "queue.h"
+#include "os_func.h"
 #include "os_ext.h"
 
 /*******************************************************
@@ -64,12 +65,16 @@
 #undef	SSZA
 #define SSZA(name, field)	((int)&(((name *)0)->field[0]))
 
+/* Macros to return per-process address, offsets based on shared regions. */
+#define	R_ADDR(base, offset)	((void *)((u_int8_t *)((base)->addr) + offset))
+#define	R_OFFSET(base, p)	((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
+
 /* Free and free-string macros that overwrite memory during debugging. */
 #ifdef DEBUG
 #undef	FREE
 #define	FREE(p, len) {							\
 	memset(p, 0xff, len);						\
-	free(p);							\
+	__db_free(p);							\
 }
 #undef	FREES
 #define	FREES(p) {							\
@@ -78,17 +83,17 @@
 #else
 #undef	FREE
 #define	FREE(p, len) {							\
-	free(p);							\
+	__db_free(p);							\
 }
 #undef	FREES
 #define	FREES(p) {							\
-	free(p);							\
+	__db_free(p);							\
 }
 #endif
 
 /* Structure used to print flag values. */
 typedef struct __fn {
-	u_int32_t   mask;		/* Flag value. */
+	u_int32_t mask;			/* Flag value. */
 	const char *name;		/* Flag name. */
 } FN;
 
@@ -163,10 +168,8 @@ typedef struct _db_mutex_t {
 	off_t	off;			/* Backing file offset. */
 	u_long	pid;			/* Lock holder: 0 or process pid. */
 #endif
-#ifdef MUTEX_STATISTICS
-	u_long	mutex_set_wait;		/* Blocking mutex: required waiting. */
-	u_long	mutex_set_nowait;	/* Blocking mutex: without waiting. */
-#endif
+	u_int32_t mutex_set_wait;	/* Granted after wait. */
+	u_int32_t mutex_set_nowait;	/* Granted without waiting. */
 } db_mutex_t;
 
 #include "mutex_ext.h"
@@ -177,11 +180,10 @@ typedef struct _db_mutex_t {
 /* Lock/unlock a DB thread. */
 #define	DB_THREAD_LOCK(dbp)						\
 	(F_ISSET(dbp, DB_AM_THREAD) ?					\
-	    __db_mutex_lock((db_mutex_t *)(dbp)->mutexp,  -1,		\
-	        (dbp)->dbenv == NULL ? NULL : (dbp)->dbenv->db_yield) : 0)
+	    __db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1) : 0)
 #define	DB_THREAD_UNLOCK(dbp)						\
 	(F_ISSET(dbp, DB_AM_THREAD) ?					\
-	    __db_mutex_unlock((db_mutex_t *)(dbp)->mutexp,  -1) : 0)
+	    __db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1) : 0)
 
 /* Btree/recno local statistics structure. */
 struct __db_bt_lstat;	typedef struct __db_bt_lstat DB_BTREE_LSTAT;
@@ -260,7 +262,7 @@ typedef struct __dbpginfo {
 #define	IS_ZERO_LSN(LSN)	((LSN).file == 0)
 
 /* Test if we need to log a change. */
-#define	DB_LOGGING(dbp) \
+#define	DB_LOGGING(dbp)							\
 	(F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER))
 
 #ifdef DEBUG
diff --git a/db2/hash/hash.c b/db2/hash/hash.c
index d986e08087..c08495378e 100644
--- a/db2/hash/hash.c
+++ b/db2/hash/hash.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)hash.c	10.27 (Sleepycat) 9/15/97";
+static const char sccsid[] = "@(#)hash.c	10.33 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -79,7 +79,7 @@ static int  __ham_cursor __P((DB *, DB_TXN *, DBC **));
 static int  __ham_delete __P((DB *, DB_TXN *, DBT *, int));
 static int  __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, int));
 static int  __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
-static void __ham_init_htab __P((HTAB *));
+static void __ham_init_htab __P((HTAB *, u_int));
 static int  __ham_lookup __P((HTAB *,
 		HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t));
 static int  __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *));
@@ -106,7 +106,7 @@ __ham_open(dbp, dbinfo)
 
 	dbenv = dbp->dbenv;
 
-	if ((hashp = (HTAB *)calloc(1, sizeof(HTAB))) == NULL)
+	if ((hashp = (HTAB *)__db_calloc(1, sizeof(HTAB))) == NULL)
 		return (ENOMEM);
 	hashp->dbp = dbp;
 
@@ -175,10 +175,9 @@ __ham_open(dbp, dbinfo)
 			goto out;
 		}
 
-		hashp->hdr->nelem = dbinfo != NULL ? dbinfo->h_nelem : 0;
 		hashp->hdr->ffactor =
 		    dbinfo != NULL && dbinfo->h_ffactor ? dbinfo->h_ffactor : 0;
-		__ham_init_htab(hashp);
+		__ham_init_htab(hashp, dbinfo != NULL ? dbinfo->h_nelem : 0);
 		if (F_ISSET(dbp, DB_AM_DUP))
 			F_SET(hashp->hdr, DB_HASH_DUP);
 		if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0)
@@ -190,7 +189,7 @@ __ham_open(dbp, dbinfo)
 	TAILQ_INSERT_TAIL(&dbp->curs_queue, curs, links);
 
 	/* Allocate memory for our split buffer. */
-	if ((hashp->split_buf = (PAGE *)malloc(dbp->pgsize)) == NULL) {
+	if ((hashp->split_buf = (PAGE *)__db_malloc(dbp->pgsize)) == NULL) {
 		ret = ENOMEM;
 		goto out;
 	}
@@ -265,13 +264,13 @@ __ham_close(dbp)
  * Returns 0 on No Error
  */
 static void
-__ham_init_htab(hashp)
+__ham_init_htab(hashp, nelem)
 	HTAB *hashp;
+	u_int nelem;
 {
-	u_int32_t nelem;
 	int32_t l2, nbuckets;
 
-	nelem = hashp->hdr->nelem;
+	hashp->hdr->nelem = 0;
 	hashp->hdr->pagesize = hashp->dbp->pgsize;
 	ZERO_LSN(hashp->hdr->lsn);
 	hashp->hdr->magic = DB_HASHMAGIC;
@@ -502,11 +501,11 @@ __ham_c_init(dbp, txnid, dbcp)
 	DBC *db_curs;
 	HASH_CURSOR *new_curs;
 
-	if ((db_curs = (DBC *)calloc(sizeof(DBC), 1)) == NULL)
+	if ((db_curs = (DBC *)__db_calloc(sizeof(DBC), 1)) == NULL)
 		return (ENOMEM);
 
 	if ((new_curs =
-	    (HASH_CURSOR *)calloc(sizeof(struct cursor_t), 1)) == NULL) {
+	    (HASH_CURSOR *)__db_calloc(sizeof(struct cursor_t), 1)) == NULL) {
 		FREE(db_curs, sizeof(DBC));
 		return (ENOMEM);
 	}
@@ -555,7 +554,7 @@ __ham_delete(dbp, txn, key, flags)
 	hashp->hash_accesses++;
 	if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_WRITE)) == 0)
 		if (F_ISSET(hcp, H_OK))
-			ret = __ham_del_pair(hashp, hcp);
+			ret = __ham_del_pair(hashp, hcp, 1);
 		else
 			ret = DB_NOTFOUND;
 
@@ -669,30 +668,41 @@ __ham_c_del(cursor, flags)
 	if ((ret = __ham_get_cpage(hashp, hcp, DB_LOCK_WRITE)) != 0)
 		goto out;
 	if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
-		ppgno = PREV_PGNO(hcp->dpagep);
-
-		/* Remove item from duplicate page. */
-		chg_pgno = hcp->dpgno;
-		if ((ret = __db_drem(hashp->dbp,
-		    &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
-			goto out;
-
 		/*
+		 * We are about to remove a duplicate from offpage.
+		 *
 		 * There are 4 cases.
-		 * 1. We removed an item on a page, but nothing else changed.
-		 * 2. We removed the last item on a page, but there is a
+		 * 1. We will remove an item on a page, but there are more
+		 *    items on that page.
+		 * 2. We will remove the last item on a page, but there is a
 		 *    following page of duplicates.
-		 * 3. We removed the last item on a page, this page was the
+		 * 3. We will remove the last item on a page, this page was the
 		 *    last page in a duplicate set, but there were dups before
 		 *    it.
-		 * 4. We removed the last item on a page, removing the last
+		 * 4. We will remove the last item on a page, removing the last
 		 *    duplicate.
 		 * In case 1 hcp->dpagep is unchanged.
 		 * In case 2 hcp->dpagep comes back pointing to the next dup
 		 *     page.
 		 * In case 3 hcp->dpagep comes back NULL.
 		 * In case 4 hcp->dpagep comes back NULL.
+		 *
+		 * Case 4 results in deleting the pair off the master page.
+		 * The normal code for doing this knows how to delete the
+		 * duplicates, so we will handle this case in the normal code.
 		 */
+		ppgno = PREV_PGNO(hcp->dpagep);
+		if (ppgno == PGNO_INVALID &&
+		    NEXT_PGNO(hcp->dpagep) == PGNO_INVALID &&
+		    NUM_ENT(hcp->dpagep) == 1)
+			goto normal;
+
+		/* Remove item from duplicate page. */
+		chg_pgno = hcp->dpgno;
+		if ((ret = __db_drem(hashp->dbp,
+		    &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
+			goto out;
+
 		if (hcp->dpagep == NULL) {
 			if (ppgno != PGNO_INVALID) {		/* Case 3 */
 				hcp->dpgno = ppgno;
@@ -702,7 +712,7 @@ __ham_c_del(cursor, flags)
 				hcp->dndx = NUM_ENT(hcp->dpagep);
 				F_SET(hcp, H_DELETED);
 			} else {				/* Case 4 */
-				ret = __ham_del_pair(hashp, hcp);
+				ret = __ham_del_pair(hashp, hcp, 1);
 				hcp->dpgno = PGNO_INVALID;
 				/*
 				 * Delpair updated the cursor queue, so we
@@ -718,14 +728,14 @@ __ham_c_del(cursor, flags)
 				    H_DATAINDEX(hcp->bndx))),
 				    &hcp->dpgno, sizeof(db_pgno_t));
 			F_SET(hcp, H_DELETED);
-		} else					/* Case 1 */
+		} else						/* Case 1 */
 			F_SET(hcp, H_DELETED);
 		if (chg_pgno != PGNO_INVALID)
 			__ham_c_update(hashp, hcp, chg_pgno, 0, 0, 1);
 	} else if (F_ISSET(hcp, H_ISDUP)) {			/* on page */
 		if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
 		    LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx))
-			ret = __ham_del_pair(hashp, hcp);
+			ret = __ham_del_pair(hashp, hcp, 1);
 		else {
 			DBT repldbt;
 
@@ -736,14 +746,14 @@ __ham_c_del(cursor, flags)
 			repldbt.size = 0;
 			ret = __ham_replpair(hashp, hcp, &repldbt, 0);
 			hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
+			F_SET(hcp, H_DELETED);
 			__ham_c_update(hashp, hcp, hcp->pgno,
 			    DUP_SIZE(hcp->dup_len), 0, 1);
-			F_SET(hcp, H_DELETED);
 		}
 
 	} else
 		/* Not a duplicate */
-		ret = __ham_del_pair(hashp, hcp);
+normal:		ret = __ham_del_pair(hashp, hcp, 1);
 
 out:	if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
 		t_ret = ret;
@@ -975,8 +985,8 @@ int
 __ham_expand_table(hashp)
 	HTAB *hashp;
 {
-	u_int32_t old_bucket, new_bucket;
-	u_int32_t spare_ndx;
+	DB_LSN new_lsn;
+	u_int32_t old_bucket, new_bucket, spare_ndx;
 	int ret;
 
 	ret = 0;
@@ -984,9 +994,30 @@ __ham_expand_table(hashp)
 	if (ret)
 		return (ret);
 
-	if (DB_LOGGING(hashp->dbp)) {
-		DB_LSN new_lsn;
+	/*
+	 * If the split point is about to increase, make sure that we
+	 * have enough extra pages.  The calculation here is weird.
+	 * We'd like to do this after we've upped max_bucket, but it's
+	 * too late then because we've logged the meta-data split.  What
+	 * we'll do between then and now is increment max bucket and then
+	 * see what the log of one greater than that is; here we have to
+	 * look at the log of max + 2.  VERY NASTY STUFF.
+	 */
+	if (__db_log2(hashp->hdr->max_bucket + 2) > hashp->hdr->ovfl_point) {
+		/*
+		 * We are about to shift the split point.  Make sure that
+		 * if the next doubling is going to be big (more than 8
+		 * pages), we have some extra pages around.
+		 */
+		if (hashp->hdr->max_bucket + 1 >= 8 && 
+		    hashp->hdr->spares[hashp->hdr->ovfl_point] <
+		    hashp->hdr->spares[hashp->hdr->ovfl_point - 1] + 
+		    hashp->hdr->ovfl_point + 1)
+			__ham_init_ovflpages(hashp);
+	}
 
+	/* Now we can log the meta-data split. */
+	if (DB_LOGGING(hashp->dbp)) {
 		if ((ret = __ham_splitmeta_log(hashp->dbp->dbenv->lg_info,
 		    (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
 		    hashp->dbp->log_fileid,
@@ -1003,22 +1034,11 @@ __ham_expand_table(hashp)
 	old_bucket = (hashp->hdr->max_bucket & hashp->hdr->low_mask);
 
 	/*
-	 * If the split point is increasing (hdr.max_bucket's log base 2
-	 * increases), max sure that we have enough extra pages, then
-	 * copy the current contents of the spare split bucket to the
-	 * next bucket.
+	 * If the split point is increasing, copy the current contents
+	 * of the spare split bucket to the next bucket.
 	 */
 	spare_ndx = __db_log2(hashp->hdr->max_bucket + 1);
 	if (spare_ndx > hashp->hdr->ovfl_point) {
-		/*
-		 * We are about to shift the split point.  Make sure that
-		 * if the next doubling is going to be big (more than 8
-		 * pages), we have some extra pages around.
-		 */
-		if (hashp->hdr->spares[hashp->hdr->ovfl_point] == 0 &&
-		    new_bucket >= 8)
-			__ham_init_ovflpages(hashp);
-
 		hashp->hdr->spares[spare_ndx] =
 		    hashp->hdr->spares[hashp->hdr->ovfl_point];
 		hashp->hdr->ovfl_point = spare_ndx;
@@ -1306,7 +1326,7 @@ __ham_init_dbt(dbt, size, bufp, sizep)
 	memset(dbt, 0, sizeof(*dbt));
 	if (*sizep < size) {
 		if ((*bufp = (void *)(*bufp == NULL ?
-		    malloc(size) : realloc(*bufp, size))) == NULL) {
+		    __db_malloc(size) : __db_realloc(*bufp, size))) == NULL) {
 			*sizep = 0;
 			return (ENOMEM);
 		}
@@ -1352,9 +1372,20 @@ __ham_c_update(hashp, hcp, chg_pgno, len, add, dup)
 	if (!dup && add)
 		return;
 
-	page_deleted = chg_pgno != PGNO_INVALID &&
-	    ((!dup && chg_pgno != hcp->pgno) ||
-	    (dup && chg_pgno != hcp->dpgno));
+	/*
+	 * Determine if a page was deleted.    If this is a regular update
+	 * (i.e., not dup) then the deleted page's number will be that in
+	 * chg_pgno, and the pgno in the cursor will be different.  If this
+	 * was an onpage-duplicate, then the same conditions apply.  If this
+	 * was an off-page duplicate, then we need to verify if hcp->dpgno
+	 * is the same (no delete) or different (delete) than chg_pgno.
+	 */
+	if (!dup || hcp->dpgno == PGNO_INVALID)
+		page_deleted =
+		    chg_pgno != PGNO_INVALID && chg_pgno != hcp->pgno;
+	else
+		page_deleted =
+		    chg_pgno != PGNO_INVALID && chg_pgno != hcp->dpgno;
 
 	hp = hcp->db_cursor->dbp->master->internal;
 	DB_THREAD_LOCK(hp->dbp);
@@ -1432,7 +1463,7 @@ __ham_hdup(orig, new)
 	DBC *curs;
 	int ret;
 
-	if ((hashp = (HTAB *)malloc(sizeof(HTAB))) == NULL)
+	if ((hashp = (HTAB *)__db_malloc(sizeof(HTAB))) == NULL)
 		return (ENOMEM);
 
 	new->internal = hashp;
@@ -1441,7 +1472,7 @@ __ham_hdup(orig, new)
 	hashp->hlock = 0;
 	hashp->hdr = NULL;
 	hashp->hash = ((HTAB *)orig->internal)->hash;
-	if ((hashp->split_buf = (PAGE *)malloc(orig->pgsize)) == NULL)
+	if ((hashp->split_buf = (PAGE *)__db_malloc(orig->pgsize)) == NULL)
 		return (ENOMEM);
 	hashp->local_errno = 0;
 	hashp->hash_accesses = 0;
diff --git a/db2/hash/hash.src b/db2/hash/hash.src
index 04a98d3cb3..8cbcee73f7 100644
--- a/db2/hash/hash.src
+++ b/db2/hash/hash.src
@@ -43,7 +43,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)hash.src	10.1 (Sleepycat) 4/12/97
+ *	@(#)hash.src	10.2 (Sleepycat) 11/2/97
  */
 
 #include "config.h"
@@ -207,5 +207,27 @@ ARG	fileid		u_int32_t	lu
 ARG	start_pgno	db_pgno_t	lu
 ARG	npages		u_int32_t	lu
 ARG	free_pgno	db_pgno_t	lu
+ARG	ovflpoint	u_int32_t	lu
 POINTER	metalsn		DB_LSN *	lu
 END
+
+/*
+ * Used when we empty the first page in a bucket and there are pages
+ * after it.  The page after it gets copied into the bucket page (since
+ * bucket pages have to be in fixed locations).
+ * pgno: the bucket page
+ * pagelsn: the old LSN on the bucket page
+ * next_pgno: the page number of the next page
+ * nnext_pgno: page after next_pgno (may need to change its prev)
+ * nnextlsn: the LSN of nnext_pgno.
+ */
+BEGIN copypage
+ARG	fileid		u_int32_t	lu
+ARG	pgno		db_pgno_t	lu
+POINTER	pagelsn		DB_LSN *	lu
+ARG	next_pgno	db_pgno_t	lu
+POINTER	nextlsn		DB_LSN *	lu
+ARG	nnext_pgno	db_pgno_t	lu
+POINTER	nnextlsn	DB_LSN *	lu
+DBT	page		DBT		s
+END
diff --git a/db2/hash/hash_auto.c b/db2/hash/hash_auto.c
index 2279de9668..4820eb8611 100644
--- a/db2/hash/hash_auto.c
+++ b/db2/hash/hash_auto.c
@@ -61,7 +61,7 @@ int __ham_insdel_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(*pagelsn)
 	    + sizeof(u_int32_t) + (key == NULL ? 0 : key->size)
 	    + sizeof(u_int32_t) + (data == NULL ? 0 : data->size);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -111,7 +111,7 @@ int __ham_insdel_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -172,7 +172,7 @@ __ham_insdel_print(notused1, dbtp, lsnp, notused3, notused4)
 	}
 	printf("\n");
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -187,7 +187,7 @@ __ham_insdel_read(recbuf, argpp)
 	__ham_insdel_args *argp;
 	u_int8_t *bp;
 
-	argp = (__ham_insdel_args *)malloc(sizeof(__ham_insdel_args) +
+	argp = (__ham_insdel_args *)__db_malloc(sizeof(__ham_insdel_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -266,7 +266,7 @@ int __ham_newpage_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(*pagelsn)
 	    + sizeof(next_pgno)
 	    + sizeof(*nextlsn);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -308,7 +308,7 @@ int __ham_newpage_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -356,7 +356,7 @@ __ham_newpage_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tnextlsn: [%lu][%lu]\n",
 	    (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -371,7 +371,7 @@ __ham_newpage_read(recbuf, argpp)
 	__ham_newpage_args *argp;
 	u_int8_t *bp;
 
-	argp = (__ham_newpage_args *)malloc(sizeof(__ham_newpage_args) +
+	argp = (__ham_newpage_args *)__db_malloc(sizeof(__ham_newpage_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -441,7 +441,7 @@ int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(ovflpoint)
 	    + sizeof(spares)
 	    + sizeof(*metalsn);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -471,7 +471,7 @@ int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -514,7 +514,7 @@ __ham_splitmeta_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tmetalsn: [%lu][%lu]\n",
 	    (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -529,7 +529,7 @@ __ham_splitmeta_read(recbuf, argpp)
 	__ham_splitmeta_args *argp;
 	u_int8_t *bp;
 
-	argp = (__ham_splitmeta_args *)malloc(sizeof(__ham_splitmeta_args) +
+	argp = (__ham_splitmeta_args *)__db_malloc(sizeof(__ham_splitmeta_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -594,7 +594,7 @@ int __ham_splitdata_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(pgno)
 	    + sizeof(u_int32_t) + (pageimage == NULL ? 0 : pageimage->size)
 	    + sizeof(*pagelsn);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -632,7 +632,7 @@ int __ham_splitdata_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -683,7 +683,7 @@ __ham_splitdata_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tpagelsn: [%lu][%lu]\n",
 	    (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -698,7 +698,7 @@ __ham_splitdata_read(recbuf, argpp)
 	__ham_splitdata_args *argp;
 	u_int8_t *bp;
 
-	argp = (__ham_splitdata_args *)malloc(sizeof(__ham_splitdata_args) +
+	argp = (__ham_splitdata_args *)__db_malloc(sizeof(__ham_splitdata_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -772,7 +772,7 @@ int __ham_replace_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(u_int32_t) + (olditem == NULL ? 0 : olditem->size)
 	    + sizeof(u_int32_t) + (newitem == NULL ? 0 : newitem->size)
 	    + sizeof(makedup);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -824,7 +824,7 @@ int __ham_replace_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -886,7 +886,7 @@ __ham_replace_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\n");
 	printf("\tmakedup: %lu\n", (u_long)argp->makedup);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -901,7 +901,7 @@ __ham_replace_read(recbuf, argpp)
 	__ham_replace_args *argp;
 	u_int8_t *bp;
 
-	argp = (__ham_replace_args *)malloc(sizeof(__ham_replace_args) +
+	argp = (__ham_replace_args *)__db_malloc(sizeof(__ham_replace_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -985,7 +985,7 @@ int __ham_newpgno_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(new_type)
 	    + sizeof(*pagelsn)
 	    + sizeof(*metalsn);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -1026,7 +1026,7 @@ int __ham_newpgno_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -1074,7 +1074,7 @@ __ham_newpgno_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tmetalsn: [%lu][%lu]\n",
 	    (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -1089,7 +1089,7 @@ __ham_newpgno_read(recbuf, argpp)
 	__ham_newpgno_args *argp;
 	u_int8_t *bp;
 
-	argp = (__ham_newpgno_args *)malloc(sizeof(__ham_newpgno_args) +
+	argp = (__ham_newpgno_args *)__db_malloc(sizeof(__ham_newpgno_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -1127,10 +1127,10 @@ __ham_newpgno_read(recbuf, argpp)
  * PUBLIC: int __ham_ovfl_log
  * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
  * PUBLIC:     u_int32_t, db_pgno_t, u_int32_t, db_pgno_t,
- * PUBLIC:     DB_LSN *));
+ * PUBLIC:     u_int32_t, DB_LSN *));
  */
 int __ham_ovfl_log(logp, txnid, ret_lsnp, flags,
-	fileid, start_pgno, npages, free_pgno, metalsn)
+	fileid, start_pgno, npages, free_pgno, ovflpoint, metalsn)
 	DB_LOG *logp;
 	DB_TXN *txnid;
 	DB_LSN *ret_lsnp;
@@ -1139,6 +1139,7 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags,
 	db_pgno_t start_pgno;
 	u_int32_t npages;
 	db_pgno_t free_pgno;
+	u_int32_t ovflpoint;
 	DB_LSN * metalsn;
 {
 	DBT logrec;
@@ -1160,8 +1161,9 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(start_pgno)
 	    + sizeof(npages)
 	    + sizeof(free_pgno)
+	    + sizeof(ovflpoint)
 	    + sizeof(*metalsn);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -1179,6 +1181,8 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(npages);
 	memcpy(bp, &free_pgno, sizeof(free_pgno));
 	bp += sizeof(free_pgno);
+	memcpy(bp, &ovflpoint, sizeof(ovflpoint));
+	bp += sizeof(ovflpoint);
 	if (metalsn != NULL)
 		memcpy(bp, metalsn, sizeof(*metalsn));
 	else
@@ -1191,7 +1195,7 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -1231,10 +1235,11 @@ __ham_ovfl_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tstart_pgno: %lu\n", (u_long)argp->start_pgno);
 	printf("\tnpages: %lu\n", (u_long)argp->npages);
 	printf("\tfree_pgno: %lu\n", (u_long)argp->free_pgno);
+	printf("\tovflpoint: %lu\n", (u_long)argp->ovflpoint);
 	printf("\tmetalsn: [%lu][%lu]\n",
 	    (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -1249,7 +1254,7 @@ __ham_ovfl_read(recbuf, argpp)
 	__ham_ovfl_args *argp;
 	u_int8_t *bp;
 
-	argp = (__ham_ovfl_args *)malloc(sizeof(__ham_ovfl_args) +
+	argp = (__ham_ovfl_args *)__db_malloc(sizeof(__ham_ovfl_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -1269,6 +1274,8 @@ __ham_ovfl_read(recbuf, argpp)
 	bp += sizeof(argp->npages);
 	memcpy(&argp->free_pgno, bp, sizeof(argp->free_pgno));
 	bp += sizeof(argp->free_pgno);
+	memcpy(&argp->ovflpoint, bp, sizeof(argp->ovflpoint));
+	bp += sizeof(argp->ovflpoint);
 	memcpy(&argp->metalsn, bp,  sizeof(argp->metalsn));
 	bp += sizeof(argp->metalsn);
 	*argpp = argp;
@@ -1276,6 +1283,207 @@ __ham_ovfl_read(recbuf, argpp)
 }
 
 /*
+ * PUBLIC: int __ham_copypage_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
+ * PUBLIC:     DB_LSN *, db_pgno_t, DB_LSN *, DBT *));
+ */
+int __ham_copypage_log(logp, txnid, ret_lsnp, flags,
+	fileid, pgno, pagelsn, next_pgno, nextlsn, nnext_pgno,
+	nnextlsn, page)
+	DB_LOG *logp;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	u_int32_t fileid;
+	db_pgno_t pgno;
+	DB_LSN * pagelsn;
+	db_pgno_t next_pgno;
+	DB_LSN * nextlsn;
+	db_pgno_t nnext_pgno;
+	DB_LSN * nnextlsn;
+	DBT *page;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t zero;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_ham_copypage;
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		null_lsn.file = 0;
+		null_lsn.offset = 0;
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(fileid)
+	    + sizeof(pgno)
+	    + sizeof(*pagelsn)
+	    + sizeof(next_pgno)
+	    + sizeof(*nextlsn)
+	    + sizeof(nnext_pgno)
+	    + sizeof(*nnextlsn)
+	    + sizeof(u_int32_t) + (page == NULL ? 0 : page->size);
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
+		return (ENOMEM);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(bp, &fileid, sizeof(fileid));
+	bp += sizeof(fileid);
+	memcpy(bp, &pgno, sizeof(pgno));
+	bp += sizeof(pgno);
+	if (pagelsn != NULL)
+		memcpy(bp, pagelsn, sizeof(*pagelsn));
+	else
+		memset(bp, 0, sizeof(*pagelsn));
+	bp += sizeof(*pagelsn);
+	memcpy(bp, &next_pgno, sizeof(next_pgno));
+	bp += sizeof(next_pgno);
+	if (nextlsn != NULL)
+		memcpy(bp, nextlsn, sizeof(*nextlsn));
+	else
+		memset(bp, 0, sizeof(*nextlsn));
+	bp += sizeof(*nextlsn);
+	memcpy(bp, &nnext_pgno, sizeof(nnext_pgno));
+	bp += sizeof(nnext_pgno);
+	if (nnextlsn != NULL)
+		memcpy(bp, nnextlsn, sizeof(*nnextlsn));
+	else
+		memset(bp, 0, sizeof(*nnextlsn));
+	bp += sizeof(*nnextlsn);
+	if (page == NULL) {
+		zero = 0;
+		memcpy(bp, &zero, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else {
+		memcpy(bp, &page->size, sizeof(page->size));
+		bp += sizeof(page->size);
+		memcpy(bp, page->data, page->size);
+		bp += page->size;
+	}
+#ifdef DEBUG
+	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+		fprintf(stderr, "Error in log record length");
+#endif
+	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	__db_free(logrec.data);
+	return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_copypage_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_copypage_print(notused1, dbtp, lsnp, notused3, notused4)
+	DB_LOG *notused1;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	int notused3;
+	void *notused4;
+{
+	__ham_copypage_args *argp;
+	u_int32_t i;
+	int c, ret;
+
+	i = 0;
+	c = 0;
+	notused1 = NULL;
+	notused3 = 0;
+	notused4 = NULL;
+
+	if ((ret = __ham_copypage_read(dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]ham_copypage: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %lu\n", (u_long)argp->fileid);
+	printf("\tpgno: %lu\n", (u_long)argp->pgno);
+	printf("\tpagelsn: [%lu][%lu]\n",
+	    (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+	printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno);
+	printf("\tnextlsn: [%lu][%lu]\n",
+	    (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset);
+	printf("\tnnext_pgno: %lu\n", (u_long)argp->nnext_pgno);
+	printf("\tnnextlsn: [%lu][%lu]\n",
+	    (u_long)argp->nnextlsn.file, (u_long)argp->nnextlsn.offset);
+	printf("\tpage: ");
+	for (i = 0; i < argp->page.size; i++) {
+		c = ((char *)argp->page.data)[i];
+		if (isprint(c) || c == 0xa)
+			putchar(c);
+		else
+			printf("%#x ", c);
+	}
+	printf("\n");
+	printf("\n");
+	__db_free(argp);
+	return (0);
+}
+
+/*
+ * PUBLIC: int __ham_copypage_read __P((void *, __ham_copypage_args **));
+ */
+int
+__ham_copypage_read(recbuf, argpp)
+	void *recbuf;
+	__ham_copypage_args **argpp;
+{
+	__ham_copypage_args *argp;
+	u_int8_t *bp;
+
+	argp = (__ham_copypage_args *)__db_malloc(sizeof(__ham_copypage_args) +
+	    sizeof(DB_TXN));
+	if (argp == NULL)
+		return (ENOMEM);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+	bp += sizeof(argp->pgno);
+	memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+	bp += sizeof(argp->pagelsn);
+	memcpy(&argp->next_pgno, bp, sizeof(argp->next_pgno));
+	bp += sizeof(argp->next_pgno);
+	memcpy(&argp->nextlsn, bp,  sizeof(argp->nextlsn));
+	bp += sizeof(argp->nextlsn);
+	memcpy(&argp->nnext_pgno, bp, sizeof(argp->nnext_pgno));
+	bp += sizeof(argp->nnext_pgno);
+	memcpy(&argp->nnextlsn, bp,  sizeof(argp->nnextlsn));
+	bp += sizeof(argp->nnextlsn);
+	memcpy(&argp->page.size, bp, sizeof(u_int32_t));
+	bp += sizeof(u_int32_t);
+	argp->page.data = bp;
+	bp += argp->page.size;
+	*argpp = argp;
+	return (0);
+}
+
+/*
  * PUBLIC: int __ham_init_print __P((DB_ENV *));
  */
 int
@@ -1305,6 +1513,9 @@ __ham_init_print(dbenv)
 	if ((ret = __db_add_recovery(dbenv,
 	    __ham_ovfl_print, DB_ham_ovfl)) != 0)
 		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_copypage_print, DB_ham_copypage)) != 0)
+		return (ret);
 	return (0);
 }
 
@@ -1338,6 +1549,9 @@ __ham_init_recover(dbenv)
 	if ((ret = __db_add_recovery(dbenv,
 	    __ham_ovfl_recover, DB_ham_ovfl)) != 0)
 		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __ham_copypage_recover, DB_ham_copypage)) != 0)
+		return (ret);
 	return (0);
 }
 
diff --git a/db2/hash/hash_dup.c b/db2/hash/hash_dup.c
index 71bd1c5eb0..22444e4966 100644
--- a/db2/hash/hash_dup.c
+++ b/db2/hash/hash_dup.c
@@ -42,7 +42,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)hash_dup.c	10.7 (Sleepycat) 9/15/97";
+static const char sccsid[] = "@(#)hash_dup.c	10.8 (Sleepycat) 10/14/97";
 #endif /* not lint */
 
 /*
@@ -480,7 +480,7 @@ __ham_check_move(hashp, hcp, add_len)
 	__ham_copy_item(hashp, hcp->pagep, H_DATAINDEX(hcp->bndx), next_pagep);
 
 	/* Now delete the pair from the current page. */
-	ret = __ham_del_pair(hashp, hcp);
+	ret = __ham_del_pair(hashp, hcp, 0);
 
 	(void)__ham_put_page(hashp->dbp, hcp->pagep, 1);
 	hcp->pagep = next_pagep;
diff --git a/db2/hash/hash_page.c b/db2/hash/hash_page.c
index 8ba42da1a4..0a12c14546 100644
--- a/db2/hash/hash_page.c
+++ b/db2/hash/hash_page.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)hash_page.c	10.24 (Sleepycat) 9/17/97";
+static const char sccsid[] = "@(#)hash_page.c	10.29 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 /*
@@ -489,19 +489,20 @@ __ham_putitem(p, dbt, type)
 
 
 /*
- * PUBLIC: int __ham_del_pair __P((HTAB *, HASH_CURSOR *));
+ * PUBLIC: int __ham_del_pair __P((HTAB *, HASH_CURSOR *, int));
  * XXX TODO: if the item is an offdup, delete the other pages and
  * then remove the pair. If the offpage page is 0, then you can
  * just remove the pair.
  */
 int
-__ham_del_pair(hashp, cursorp)
+__ham_del_pair(hashp, cursorp, reclaim_page)
 	HTAB *hashp;
 	HASH_CURSOR *cursorp;
+	int reclaim_page;
 {
 	DBT data_dbt, key_dbt;
 	DB_ENV *dbenv;
-	DB_LSN new_lsn, *n_lsn;
+	DB_LSN new_lsn, *n_lsn, tmp_lsn;
 	PAGE *p;
 	db_indx_t ndx;
 	db_pgno_t chg_pgno, pgno;
@@ -542,6 +543,15 @@ __ham_del_pair(hashp, cursorp)
 			    HOFFDUP_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
 			    sizeof(db_pgno_t));
 			ret = __db_ddup(hashp->dbp, pgno, __ham_del_page);
+			F_CLR(cursorp, H_ISDUP);
+			break;
+		case H_DUPLICATE:
+			/*
+			 * If we delete a pair that is/was a duplicate, then
+			 * we had better clear the flag so that we update the
+			 * cursor appropriately.
+			 */
+			F_CLR(cursorp, H_ISDUP);
 			break;
 		}
 
@@ -578,13 +588,13 @@ __ham_del_pair(hashp, cursorp)
 		--hashp->hdr->nelem;
 
 	/*
-	 * Check if the page is empty.  There are two cases.  If it's
-	 * empty and it's not the first chain in the bucket (i.e., the
-	 * bucket page) then we can simply remove it. If it is the first
-	 * chain in the bucket, then we need to copy the second page into
-	 * it and remove the second page.
+	 * If we need to reclaim the page, then check if the page is empty.
+	 * There are two cases.  If it's empty and it's not the first page
+	 * in the bucket (i.e., the bucket page) then we can simply remove
+	 * it. If it is the first chain in the bucket, then we need to copy
+	 * the second page into it and remove the second page.
 	 */
-	if (NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID &&
+	if (reclaim_page && NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID &&
 	    NEXT_PGNO(p) != PGNO_INVALID) {
 		PAGE *n_pagep, *nn_pagep;
 		db_pgno_t tmp_pgno;
@@ -592,7 +602,6 @@ __ham_del_pair(hashp, cursorp)
 		/*
 		 * First page in chain is empty and we know that there
 		 * are more pages in the chain.
-		 * XXX Need to log this.
 		 */
 		if ((ret =
 		    __ham_get_page(hashp->dbp, NEXT_PGNO(p), &n_pagep)) != 0)
@@ -605,13 +614,35 @@ __ham_del_pair(hashp, cursorp)
 				(void) __ham_put_page(hashp->dbp, n_pagep, 0);
 				return (ret);
 			}
+		}
+
+		if (DB_LOGGING(hashp->dbp)) {
+			key_dbt.data = n_pagep;
+			key_dbt.size = hashp->hdr->pagesize;
+			if ((ret = __ham_copypage_log(dbenv->lg_info,
+			    (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
+			    hashp->dbp->log_fileid, PGNO(p), &LSN(p),
+			    PGNO(n_pagep), &LSN(n_pagep), NEXT_PGNO(n_pagep),
+			    NEXT_PGNO(n_pagep) == PGNO_INVALID ? NULL :
+			    &LSN(nn_pagep), &key_dbt)) != 0)
+				return (ret);
+
+			/* Move lsn onto page. */
+			LSN(p) = new_lsn;	/* Structure assignment. */
+			LSN(n_pagep) = new_lsn;
+			if (NEXT_PGNO(n_pagep) != PGNO_INVALID)
+				LSN(nn_pagep) = new_lsn;
+		}
+		if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
 			PREV_PGNO(nn_pagep) = PGNO(p);
 			(void)__ham_put_page(hashp->dbp, nn_pagep, 1);
 		}
 
 		tmp_pgno = PGNO(p);
+		tmp_lsn = LSN(p);
 		memcpy(p, n_pagep, hashp->hdr->pagesize);
 		PGNO(p) = tmp_pgno;
+		LSN(p) = tmp_lsn;
 		PREV_PGNO(p) = PGNO_INVALID;
 
 		/*
@@ -623,7 +654,8 @@ __ham_del_pair(hashp, cursorp)
 		if ((ret = __ham_dirty_page(hashp, p)) != 0 ||
 		    (ret = __ham_del_page(hashp->dbp, n_pagep)) != 0)
 			return (ret);
-	} else if (NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) {
+	} else if (reclaim_page &&
+	    NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) {
 		PAGE *n_pagep, *p_pagep;
 
 		if ((ret =
@@ -690,13 +722,22 @@ __ham_del_pair(hashp, cursorp)
 	}
 	__ham_c_update(hashp, cursorp, chg_pgno, 0, 0, 0);
 
+	/*
+	 * Since we just deleted a pair from the master page, anything
+	 * in cursorp->dpgno should be cleared.
+	 */
+	cursorp->dpgno = PGNO_INVALID;
+
 	F_CLR(cursorp, H_OK);
 	return (ret);
 }
+
 /*
+ * __ham_replpair --
+ *	Given the key data indicated by the cursor, replace part/all of it
+ *	according to the fields in the dbt.
+ *
  * PUBLIC: int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
- * Given the key data indicated by the cursor, replace part/all of it
- * according to the fields in the dbt.
  */
 int
 __ham_replpair(hashp, hcp, dbt, make_dup)
@@ -768,7 +809,7 @@ __ham_replpair(hashp, hcp, dbt, make_dup)
 			return (ret);
 
 		if (dbt->doff == 0 && dbt->dlen == len) {
-			ret = __ham_del_pair(hashp, hcp);
+			ret = __ham_del_pair(hashp, hcp, 0);
 			if (ret == 0)
 			    ret = __ham_add_el(hashp,
 			        hcp, &tmp, dbt, H_KEYDATA);
@@ -784,15 +825,15 @@ __ham_replpair(hashp, hcp, dbt, make_dup)
 				goto err;
 
 			/* Now we can delete the item. */
-			if ((ret = __ham_del_pair(hashp, hcp)) != 0) {
-				free(tdata.data);
+			if ((ret = __ham_del_pair(hashp, hcp, 0)) != 0) {
+				__db_free(tdata.data);
 				goto err;
 			}
 
 			/* Now shift old data around to make room for new. */
 			if (change > 0) {
-				tdata.data = (void *)
-				    realloc(tdata.data, tdata.size + change);
+				tdata.data = (void *)__db_realloc(tdata.data,
+				    tdata.size + change);
 				memset((u_int8_t *)tdata.data + tdata.size,
 				    0, change);
 			}
@@ -812,9 +853,9 @@ __ham_replpair(hashp, hcp, dbt, make_dup)
 
 			/* Now add the pair. */
 			ret = __ham_add_el(hashp, hcp, &tmp, &tdata, type);
-			free(tdata.data);
+			__db_free(tdata.data);
 		}
-err:		free(tmp.data);
+err:		__db_free(tmp.data);
 		return (ret);
 	}
 
@@ -1025,7 +1066,7 @@ __ham_split_page(hashp, obucket, nbucket)
 		}
 	}
 	if (big_buf != NULL)
-		free(big_buf);
+		__db_free(big_buf);
 
 	/*
 	 * If the original bucket spanned multiple pages, then we've got
@@ -1549,17 +1590,20 @@ __ham_init_ovflpages(hp)
 {
 	DB_LSN new_lsn;
 	PAGE *p;
-	db_pgno_t last_pgno;
-	u_int32_t i, numpages;
+	db_pgno_t last_pgno, new_pgno;
+	u_int32_t i, curpages, numpages;
 
-	numpages = hp->hdr->ovfl_point + 1;
+	curpages = hp->hdr->spares[hp->hdr->ovfl_point] - 
+	    hp->hdr->spares[hp->hdr->ovfl_point - 1];
+	numpages = hp->hdr->ovfl_point + 1 - curpages;
 
 	last_pgno = hp->hdr->last_freed;
+	new_pgno = PGNO_OF(hp, hp->hdr->ovfl_point, curpages + 1);
 	if (DB_LOGGING(hp->dbp)) {
 		(void)__ham_ovfl_log(hp->dbp->dbenv->lg_info,
 		    (DB_TXN *)hp->dbp->txn, &new_lsn, 0,
-		    hp->dbp->log_fileid, PGNO_OF(hp, hp->hdr->ovfl_point, 1),
-		    numpages, last_pgno, &hp->hdr->lsn);
+		    hp->dbp->log_fileid, new_pgno,
+		    numpages, last_pgno, hp->hdr->ovfl_point, &hp->hdr->lsn);
 		hp->hdr->lsn = new_lsn;
 	} else
 		ZERO_LSN(new_lsn);
@@ -1567,7 +1611,8 @@ __ham_init_ovflpages(hp)
 	hp->hdr->spares[hp->hdr->ovfl_point] += numpages;
 	for (i = numpages; i > 0; i--) {
 		if (__ham_new_page(hp,
-		    PGNO_OF(hp, hp->hdr->ovfl_point, i), P_INVALID, &p) != 0)
+		    PGNO_OF(hp, hp->hdr->ovfl_point, curpages + i),
+		    P_INVALID, &p) != 0)
 			break;
 		LSN(p) = new_lsn;
 		NEXT_PGNO(p) = last_pgno;
diff --git a/db2/hash/hash_rec.c b/db2/hash/hash_rec.c
index 1b30be337d..d239e3d0df 100644
--- a/db2/hash/hash_rec.c
+++ b/db2/hash/hash_rec.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)hash_rec.c	10.13 (Sleepycat) 9/15/97";
+static const char sccsid[] = "@(#)hash_rec.c	10.14 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -756,7 +756,6 @@ __ham_ovfl_recover(logp, dbtp, lsnp, redo, info)
 	hashp = (HTAB *)file_dbp->internal;
 	GET_META(file_dbp, hashp);
 	getmeta = 1;
-	file_dbp = NULL;
 
 	cmp_n = log_compare(lsnp, &hashp->hdr->lsn);
 	cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn);
@@ -764,12 +763,12 @@ __ham_ovfl_recover(logp, dbtp, lsnp, redo, info)
 	if (cmp_p == 0 && redo) {
 		/* Redo the allocation. */
 		hashp->hdr->last_freed = argp->start_pgno;
-		hashp->hdr->spares[argp->npages  - 1] += argp->npages;
+		hashp->hdr->spares[argp->ovflpoint] += argp->npages;
 		hashp->hdr->lsn = *lsnp;
 		F_SET(file_dbp, DB_HS_DIRTYMETA);
 	} else if (cmp_n == 0 && !redo) {
 		hashp->hdr->last_freed = argp->free_pgno;
-		hashp->hdr->spares[argp->npages  - 1] -= argp->npages;
+		hashp->hdr->spares[argp->ovflpoint] -= argp->npages;
 		hashp->hdr->lsn = argp->metalsn;
 		F_SET(file_dbp, DB_HS_DIRTYMETA);
 	}
@@ -808,3 +807,142 @@ out:	if (getmeta)
 		RELEASE_META(file_dbp, hashp);
 	REC_CLOSE;
 }
+
+/*
+ * __ham_copypage_recover --
+ *	Recovery function for copypage.
+ * 
+ * PUBLIC: int __ham_copypage_recover
+ * PUBLIC:   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_copypage_recover(logp, dbtp, lsnp, redo, info)
+	DB_LOG *logp;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	int redo;
+	void *info;
+{
+	__ham_copypage_args *argp;
+	DB *file_dbp, *mdbp;
+	DB_MPOOLFILE *mpf;
+	HTAB *hashp;
+	PAGE *pagep;
+	int cmp_n, cmp_p, getmeta, modified, ret;
+
+	getmeta = 0;
+	hashp = NULL;				/* XXX: shut the compiler up. */
+	REC_PRINT(__ham_copypage_print);
+	REC_INTRO(__ham_copypage_read);
+
+	hashp = (HTAB *)file_dbp->internal;
+	GET_META(file_dbp, hashp);
+	getmeta = 1;
+	modified = 0;
+
+	/* This is the bucket page. */
+	ret = memp_fget(mpf, &argp->pgno, 0, &pagep);
+	if (ret != 0)
+		if (!redo) {
+			/*
+			 * We are undoing and the page doesn't exist.  That
+			 * is equivalent to having a pagelsn of 0, so we
+			 * would not have to undo anything.  In this case,
+			 * don't bother creating a page.
+			 */
+			ret = 0;
+			goto donext;
+		} else if ((ret = memp_fget(mpf, &argp->pgno,
+		    DB_MPOOL_CREATE, &pagep)) != 0)
+			goto out;
+
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+
+	if (cmp_p == 0 && redo) {
+		/* Need to redo update described. */
+		memcpy(pagep, argp->page.data, argp->page.size);
+		LSN(pagep) = *lsnp;
+		modified = 1;
+	} else if (cmp_n == 0 && !redo) {
+		/* Need to undo update described. */
+		P_INIT(pagep, hashp->hdr->pagesize, argp->pgno, PGNO_INVALID,
+		    argp->next_pgno, 0, P_HASH);
+		LSN(pagep) = argp->pagelsn;
+		modified = 1;
+	}
+	if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+		goto out;
+
+	/* Now fix up the "next" page. */
+donext:	ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep);
+	if (ret != 0)
+		if (!redo) {
+			/*
+			 * We are undoing and the page doesn't exist.  That
+			 * is equivalent to having a pagelsn of 0, so we
+			 * would not have to undo anything.  In this case,
+			 * don't bother creating a page.
+			 */
+			ret = 0;
+			goto do_nn;
+		} else if ((ret = memp_fget(mpf, &argp->next_pgno,
+		    DB_MPOOL_CREATE, &pagep)) != 0)
+			goto out;
+
+	/* There is nothing to do in the REDO case; only UNDO. */
+
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	if (cmp_n == 0 && !redo) {
+		/* Need to undo update described. */
+		memcpy(pagep, argp->page.data, argp->page.size);
+		modified = 1;
+	}
+	if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+		goto out;
+
+	/* Now fix up the next's next page. */
+do_nn:	if (argp->nnext_pgno == PGNO_INVALID) {
+		*lsnp = argp->prev_lsn;
+		goto out;
+	}
+
+	ret = memp_fget(mpf, &argp->nnext_pgno, 0, &pagep);
+	if (ret != 0)
+		if (!redo) {
+			/*
+			 * We are undoing and the page doesn't exist.  That
+			 * is equivalent to having a pagelsn of 0, so we
+			 * would not have to undo anything.  In this case,
+			 * don't bother creating a page.
+			 */
+			ret = 0;
+			*lsnp = argp->prev_lsn;
+			goto out;
+		} else if ((ret = memp_fget(mpf, &argp->nnext_pgno,
+		    DB_MPOOL_CREATE, &pagep)) != 0)
+			goto out;
+
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->nnextlsn);
+
+	if (cmp_p == 0 && redo) {
+		/* Need to redo update described. */
+		PREV_PGNO(pagep) = argp->pgno;
+		LSN(pagep) = *lsnp;
+		modified = 1;
+	} else if (cmp_n == 0 && !redo) {
+		/* Need to undo update described. */
+		PREV_PGNO(pagep) = argp->next_pgno;
+		LSN(pagep) = argp->nnextlsn;
+		modified = 1;
+	}
+	if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+		goto out;
+
+	*lsnp = argp->prev_lsn;
+
+out:	if (getmeta)
+		RELEASE_META(file_dbp, hashp);
+	REC_CLOSE;
+}
diff --git a/db2/include/btree_auto.h b/db2/include/btree_auto.h
index b422e1db1b..041b80f196 100644
--- a/db2/include/btree_auto.h
+++ b/db2/include/btree_auto.h
@@ -58,6 +58,7 @@ typedef struct _bam_rsplit_args {
 	u_int32_t	fileid;
 	db_pgno_t	pgno;
 	DBT	pgdbt;
+	db_pgno_t	nrec;
 	DBT	rootent;
 	DB_LSN 	rootlsn;
 } __bam_rsplit_args;
@@ -105,4 +106,22 @@ typedef struct _bam_cdel_args {
 	u_int32_t	indx;
 } __bam_cdel_args;
 
+
+#define	DB_bam_repl	(DB_bam_BEGIN + 8)
+
+typedef struct _bam_repl_args {
+	u_int32_t type;
+	DB_TXN *txnid;
+	DB_LSN prev_lsn;
+	u_int32_t	fileid;
+	db_pgno_t	pgno;
+	DB_LSN 	lsn;
+	u_int32_t	indx;
+	u_int32_t	isdeleted;
+	DBT	orig;
+	DBT	repl;
+	u_int32_t	prefix;
+	u_int32_t	suffix;
+} __bam_repl_args;
+
 #endif
diff --git a/db2/include/btree_ext.h b/db2/include/btree_ext.h
index 9133c58c6b..bbe0d971b2 100644
--- a/db2/include/btree_ext.h
+++ b/db2/include/btree_ext.h
@@ -1,4 +1,4 @@
-/* Do not edit: automatically built by dist/distrib. */
+/* DO NOT EDIT: automatically built by dist/distrib. */
 int __bam_close __P((DB *));
 int __bam_sync __P((DB *, int));
 int __bam_cmp __P((DB *, const DBT *, EPG *));
@@ -35,6 +35,7 @@ int __bam_pget __P((DB *, PAGE **, db_pgno_t *, int));
 int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
 int __bam_iitem __P((DB *,
    PAGE **, db_indx_t *, DBT *, DBT *, int, int));
+int __bam_ritem __P((DB *, PAGE *, u_int32_t, DBT *));
 int __bam_pg_alloc_recover
   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __bam_pg_free_recover
@@ -49,6 +50,8 @@ int __bam_cadjust_recover
   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __bam_cdel_recover
   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_repl_recover
+  __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __ram_open __P((DB *, DBTYPE, DB_INFO *));
 int __ram_cursor __P((DB *, DB_TXN *, DBC **));
 int __ram_close __P((DB *));
@@ -94,8 +97,8 @@ int __bam_split_print
 int __bam_split_read __P((void *, __bam_split_args **));
 int __bam_rsplit_log
     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
-    u_int32_t, db_pgno_t, DBT *, DBT *,
-    DB_LSN *));
+    u_int32_t, db_pgno_t, DBT *, db_pgno_t,
+    DBT *, DB_LSN *));
 int __bam_rsplit_print
    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __bam_rsplit_read __P((void *, __bam_rsplit_args **));
@@ -119,5 +122,13 @@ int __bam_cdel_log
 int __bam_cdel_print
    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __bam_cdel_read __P((void *, __bam_cdel_args **));
+int __bam_repl_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, DB_LSN *, u_int32_t,
+    u_int32_t, DBT *, DBT *, u_int32_t,
+    u_int32_t));
+int __bam_repl_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_repl_read __P((void *, __bam_repl_args **));
 int __bam_init_print __P((DB_ENV *));
 int __bam_init_recover __P((DB_ENV *));
diff --git a/db2/include/clib_ext.h b/db2/include/clib_ext.h
index 8ccd2b559f..91e4a13fa5 100644
--- a/db2/include/clib_ext.h
+++ b/db2/include/clib_ext.h
@@ -1,4 +1,4 @@
-/* Do not edit: automatically built by dist/distrib. */
+/* DO NOT EDIT: automatically built by dist/distrib. */
 #ifdef __STDC__
 void err __P((int eval, const char *, ...));
 #else
diff --git a/db2/include/common_ext.h b/db2/include/common_ext.h
index 9840162a12..b814582abd 100644
--- a/db2/include/common_ext.h
+++ b/db2/include/common_ext.h
@@ -1,4 +1,4 @@
-/* Do not edit: automatically built by dist/distrib. */
+/* DO NOT EDIT: automatically built by dist/distrib. */
 int __db_appname __P((DB_ENV *,
    APPNAME, const char *, const char *, int *, char **));
 int __db_apprec __P((DB_ENV *, int));
@@ -24,6 +24,7 @@ int __db_ferr __P((const DB_ENV *, const char *, int));
 u_int32_t __db_log2 __P((u_int32_t));
 int __db_rcreate __P((DB_ENV *, APPNAME,
    const char *, const char *, int, size_t, int *, void *));
+int __db_rinit __P((DB_ENV *, RLAYOUT *, int, size_t, int));
 int __db_ropen __P((DB_ENV *,
    APPNAME, const char *, const char *, int, int *, void *));
 int __db_rclose __P((DB_ENV *, int, void *));
diff --git a/db2/include/db.h.src b/db2/include/db.h.src
index 63d9603dba..3cc2bfd4fc 100644
--- a/db2/include/db.h.src
+++ b/db2/include/db.h.src
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db.h.src	10.77 (Sleepycat) 9/24/97
+ *	@(#)db.h.src	10.91 (Sleepycat) 11/3/97
  */
 
 #ifndef _DB_H_
@@ -28,9 +28,15 @@
  * XXX
  * Handle function prototypes and the keyword "const".  This steps on name
  * space that DB doesn't control, but all of the other solutions are worse.
+ *
+ * XXX
+ * While Microsoft's compiler is ANSI C compliant, it doesn't have _STDC_
+ * defined by default, you specify a command line flag or #pragma to turn
+ * it on.  Don't do that, however, because some of Microsoft's own header
+ * files won't compile.
  */
 #undef	__P
-#if defined(__STDC__) || defined(__cplusplus)
+#if defined(__STDC__) || defined(__cplusplus) || defined(_MSC_VER)
 #define	__P(protos)	protos		/* ANSI C prototypes */
 #else
 #define	const
@@ -67,8 +73,8 @@
 
 #define	DB_VERSION_MAJOR	2
 #define	DB_VERSION_MINOR	3
-#define	DB_VERSION_PATCH	10
-#define	DB_VERSION_STRING	"Sleepycat Software: DB 2.3.10: (9/24/97)"
+#define	DB_VERSION_PATCH	12
+#define	DB_VERSION_STRING	"Sleepycat Software: DB 2.3.12: (11/3/97)"
 
 typedef	u_int32_t	db_pgno_t;	/* Page number type. */
 typedef	u_int16_t	db_indx_t;	/* Page offset type. */
@@ -93,6 +99,7 @@ struct __db_lockregion;	typedef struct __db_lockregion DB_LOCKREGION;
 struct __db_lockreq;	typedef struct __db_lockreq DB_LOCKREQ;
 struct __db_locktab;	typedef struct __db_locktab DB_LOCKTAB;
 struct __db_log;	typedef struct __db_log DB_LOG;
+struct __db_log_stat;	typedef struct __db_log_stat DB_LOG_STAT;
 struct __db_lsn;	typedef struct __db_lsn DB_LSN;
 struct __db_mpool;	typedef struct __db_mpool DB_MPOOL;
 struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT;
@@ -122,6 +129,31 @@ struct __db_dbt {
 };
 
 /*
+ * DB configuration.  There are a set of functions which the application
+ * can replace with its own versions.
+ */
+#define	DB_FUNC_CALLOC	 1		/* ANSI C calloc. */
+#define	DB_FUNC_CLOSE	 2		/* POSIX 1003.1 close. */
+#define	DB_FUNC_DIRFREE	 3		/* DB: free directory list. */
+#define	DB_FUNC_DIRLIST	 4		/* DB: create directory list. */
+#define	DB_FUNC_EXISTS	 5		/* DB: return if file exists. */
+#define	DB_FUNC_FREE	 6		/* ANSI C free. */
+#define	DB_FUNC_FSYNC	 7		/* POSIX 1003.1 fsync. */
+#define	DB_FUNC_IOINFO	 8		/* DB: return file I/O information. */
+#define	DB_FUNC_MALLOC	 9		/* ANSI C malloc. */
+#define	DB_FUNC_MAP	10		/* DB: map file into shared memory. */
+#define	DB_FUNC_OPEN	11		/* POSIX 1003.1 open. */
+#define	DB_FUNC_READ	12		/* POSIX 1003.1 read. */
+#define	DB_FUNC_REALLOC	13		/* ANSI C realloc. */
+#define	DB_FUNC_SEEK	14		/* POSIX 1003.1 lseek. */
+#define	DB_FUNC_SLEEP	15		/* DB: sleep secs/usecs. */
+#define	DB_FUNC_STRDUP	16		/* ANSI C strdup. */
+#define	DB_FUNC_UNLINK	17		/* POSIX 1003.1 unlink. */
+#define	DB_FUNC_UNMAP	18		/* DB: unmap shared memory file. */
+#define	DB_FUNC_WRITE	19		/* POSIX 1003.1 write. */
+#define	DB_FUNC_YIELD	20		/* DB: yield thread to scheduler. */
+
+/*
  * Database configuration and initialization.
  */
  /*
@@ -134,21 +166,20 @@ struct __db_dbt {
 /*
  * Flags understood by db_appinit(3).
  *
- * DB_APP_INIT and DB_MUTEXDEBUG are internal only, and not documented.
+ * DB_MUTEXDEBUG is internal only, and not documented.
  */
 /*				0x00007	   COMMON MASK. */
-#define	DB_APP_INIT		0x00008	/* Appinit called, paths initialized. */
-#define	DB_INIT_LOCK		0x00010	/* Initialize locking. */
-#define	DB_INIT_LOG		0x00020	/* Initialize logging. */
-#define	DB_INIT_MPOOL		0x00040	/* Initialize mpool. */
-#define	DB_INIT_TXN		0x00080	/* Initialize transactions. */
-#define	DB_MPOOL_PRIVATE	0x00100	/* Mpool: private memory pool. */
-#define	DB_MUTEXDEBUG		0x00200	/* Do not get/set mutexes in regions. */
-#define	DB_RECOVER		0x00400	/* Run normal recovery. */
-#define	DB_RECOVER_FATAL	0x00800 /* Run catastrophic recovery. */
-#define	DB_TXN_NOSYNC		0x01000	/* Do not sync log on commit. */
-#define	DB_USE_ENVIRON		0x02000	/* Use the environment. */
-#define	DB_USE_ENVIRON_ROOT	0x04000	/* Use the environment if root. */
+#define	DB_INIT_LOCK		0x00008	/* Initialize locking. */
+#define	DB_INIT_LOG		0x00010	/* Initialize logging. */
+#define	DB_INIT_MPOOL		0x00020	/* Initialize mpool. */
+#define	DB_INIT_TXN		0x00040	/* Initialize transactions. */
+#define	DB_MPOOL_PRIVATE	0x00080	/* Mpool: private memory pool. */
+#define	DB_MUTEXDEBUG		0x00100	/* Do not get/set mutexes in regions. */
+#define	DB_RECOVER		0x00200	/* Run normal recovery. */
+#define	DB_RECOVER_FATAL	0x00400 /* Run catastrophic recovery. */
+#define	DB_TXN_NOSYNC		0x00800	/* Do not sync log on commit. */
+#define	DB_USE_ENVIRON		0x01000	/* Use the environment. */
+#define	DB_USE_ENVIRON_ROOT	0x02000	/* Use the environment if root. */
 
 /* CURRENTLY UNUSED LOCK FLAGS. */
 #define	DB_TXN_LOCK_2PL		0x00000	/* Two-phase locking. */
@@ -209,7 +240,6 @@ struct __db_env {
 	int		 lk_modes;	/* Number of lock modes in table. */
 	unsigned int	 lk_max;	/* Maximum number of locks. */
 	u_int32_t	 lk_detect;	/* Deadlock detect on every conflict. */
-	int (*db_yield) __P((void));	/* Yield function for threads. */
 
 	/* Logging. */
 	DB_LOG		*lg_info;	/* Return from log_open(). */
@@ -226,6 +256,9 @@ struct __db_env {
 	int (*tx_recover)		/* Dispatch function for recovery. */
 	    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 
+#define	DB_ENV_APPINIT		0x01	/* Paths initialized by db_appinit(). */
+#define	DB_ENV_STANDALONE	0x02	/* Test: freestanding environment. */
+#define	DB_ENV_THREAD		0x04	/* DB_ENV is multi-threaded. */
 	u_int32_t	 flags;		/* Flags. */
 };
 
@@ -301,7 +334,7 @@ struct __db_info {
 #define	DB_CURRENT	0x000010	/* c_get(), c_put(), log_get() */
 #define	DB_FIRST	0x000020	/* c_get(), log_get() */
 #define	DB_FLUSH	0x000040	/* log_put() */
-#define	DB_GET_RECNO	0x000080	/* c_get() */
+#define	DB_GET_RECNO	0x000080	/* get(), c_get() */
 #define	DB_KEYFIRST	0x000100	/* c_put() */
 #define	DB_KEYLAST	0x000200	/* c_put() */
 #define	DB_LAST		0x000400	/* c_get(), log_get() */
@@ -312,7 +345,7 @@ struct __db_info {
 #define	DB_RECORDCOUNT	0x008000	/* stat() */
 #define	DB_SET		0x010000	/* c_get(), log_get() */
 #define	DB_SET_RANGE	0x020000	/* c_get() */
-#define	DB_SET_RECNO	0x040000	/* get(), c_get() */
+#define	DB_SET_RECNO	0x040000	/* c_get() */
 
 /* DB (user visible) error return codes. */
 #define	DB_INCOMPLETE		( -1)	/* Sync didn't finish. */
@@ -472,6 +505,8 @@ struct __db_bt_stat {
 	u_int32_t bt_get;		/* Items retrieved. */
 	u_int32_t bt_cache_hit;		/* Hits in fast-insert code. */
 	u_int32_t bt_cache_miss;	/* Misses in fast-insert code. */
+	u_int32_t bt_magic;		/* Magic number. */
+	u_int32_t bt_version;		/* Version number. */
 };
 
 #if defined(__cplusplus)
@@ -479,6 +514,7 @@ extern "C" {
 #endif
 int   db_appinit __P((const char *, char * const *, DB_ENV *, int));
 int   db_appexit __P((DB_ENV *));
+int   db_jump_set __P((void *, int));
 int   db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **));
 char *db_version __P((int *, int *, int *));
 #if defined(__cplusplus)
@@ -576,6 +612,22 @@ struct __db_lsn {
 	u_int32_t	offset;		/* File offset. */
 };
 
+/* Log statistics structure. */
+struct __db_log_stat {
+	u_int32_t st_magic;		/* Log file magic number. */
+	u_int32_t st_version;		/* Log file version number. */
+	int st_mode;			/* Log file mode. */
+	u_int32_t st_lg_max;		/* Maximum log file size. */
+	u_int32_t st_w_bytes;		/* Bytes to log. */
+	u_int32_t st_w_mbytes;		/* Megabytes to log. */
+	u_int32_t st_wc_bytes;		/* Bytes to log since checkpoint. */
+	u_int32_t st_wc_mbytes;		/* Megabytes to log since checkpoint. */
+	u_int32_t st_wcount;		/* Total syncs to the log. */
+	u_int32_t st_scount;		/* Total writes to the log. */
+	u_int32_t st_region_wait;	/* Region lock granted after wait. */
+	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
+};
+
 #if defined(__cplusplus)
 extern "C" {
 #endif
@@ -588,6 +640,7 @@ int	 log_get __P((DB_LOG *, DB_LSN *, DBT *, int));
 int	 log_open __P((const char *, int, int, DB_ENV *, DB_LOG **));
 int	 log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
 int	 log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *));
+int	 log_stat __P((DB_LOG *, DB_LOG_STAT **, void *(*)(size_t)));
 int	 log_unlink __P((const char *, int, DB_ENV *));
 int	 log_unregister __P((DB_LOG *, u_int32_t));
 #if defined(__cplusplus)
@@ -610,30 +663,35 @@ int	 log_unregister __P((DB_LOG *, u_int32_t));
 /* Mpool statistics structure. */
 struct __db_mpool_stat {
 	size_t st_cachesize;		/* Cache size. */
-	unsigned long st_cache_hit;	/* Pages found in the cache. */
-	unsigned long st_cache_miss;	/* Pages not found in the cache. */
-	unsigned long st_map;		/* Pages from mapped files. */
-	unsigned long st_page_create;	/* Pages created in the cache. */
-	unsigned long st_page_in;	/* Pages read in. */
-	unsigned long st_page_out;	/* Pages written out. */
-	unsigned long st_ro_evict;	/* Read-only pages evicted. */
-	unsigned long st_rw_evict;	/* Read-write pages evicted. */
-	unsigned long st_hash_buckets;	/* Number of hash buckets. */
-	unsigned long st_hash_searches;	/* Total hash chain searches. */
-	unsigned long st_hash_longest;	/* Longest hash chain searched. */
-	unsigned long st_hash_examined;	/* Total hash entries searched. */
+	u_int32_t st_cache_hit;		/* Pages found in the cache. */
+	u_int32_t st_cache_miss;	/* Pages not found in the cache. */
+	u_int32_t st_map;		/* Pages from mapped files. */
+	u_int32_t st_page_create;	/* Pages created in the cache. */
+	u_int32_t st_page_in;		/* Pages read in. */
+	u_int32_t st_page_out;		/* Pages written out. */
+	u_int32_t st_ro_evict;		/* Clean pages forced from the cache. */
+	u_int32_t st_rw_evict;		/* Dirty pages forced from the cache. */
+	u_int32_t st_hash_buckets;	/* Number of hash buckets. */
+	u_int32_t st_hash_searches;	/* Total hash chain searches. */
+	u_int32_t st_hash_longest;	/* Longest hash chain searched. */
+	u_int32_t st_hash_examined;	/* Total hash entries searched. */
+	u_int32_t st_page_clean;	/* Clean pages. */
+	u_int32_t st_page_dirty;	/* Dirty pages. */
+	u_int32_t st_page_trickle;	/* Pages written by memp_trickle. */
+	u_int32_t st_region_wait;	/* Region lock granted after wait. */
+	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
 };
 
 /* Mpool file statistics structure. */
 struct __db_mpool_fstat {
 	char *file_name;		/* File name. */
 	size_t st_pagesize;		/* Page size. */
-	unsigned long st_cache_hit;	/* Pages found in the cache. */
-	unsigned long st_cache_miss;	/* Pages not found in the cache. */
-	unsigned long st_map;		/* Pages from mapped files. */
-	unsigned long st_page_create;	/* Pages created in the cache. */
-	unsigned long st_page_in;	/* Pages read in. */
-	unsigned long st_page_out;	/* Pages written out. */
+	u_int32_t st_cache_hit;		/* Pages found in the cache. */
+	u_int32_t st_cache_miss;	/* Pages not found in the cache. */
+	u_int32_t st_map;		/* Pages from mapped files. */
+	u_int32_t st_page_create;	/* Pages created in the cache. */
+	u_int32_t st_page_in;		/* Pages read in. */
+	u_int32_t st_page_out;		/* Pages written out. */
 };
 
 #if defined(__cplusplus)
@@ -654,6 +712,7 @@ int	memp_register __P((DB_MPOOL *, int,
 int	memp_stat __P((DB_MPOOL *,
 	    DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, void *(*)(size_t)));
 int	memp_sync __P((DB_MPOOL *, DB_LSN *));
+int	memp_trickle __P((DB_MPOOL *, int, int *));
 int	memp_unlink __P((const char *, int, DB_ENV *));
 #if defined(__cplusplus)
 };
diff --git a/db2/include/db_am.h b/db2/include/db_am.h
index 5814ff88c3..0ea24be667 100644
--- a/db2/include/db_am.h
+++ b/db2/include/db_am.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_am.h	10.6 (Sleepycat) 8/27/97
+ *	@(#)db_am.h	10.7 (Sleepycat) 10/25/97
  */
 #ifndef _DB_AM_H
 #define _DB_AM_H
@@ -49,7 +49,7 @@
 }
 #define	REC_CLOSE {							\
 	if (argp != NULL)						\
-		free (argp);						\
+		__db_free(argp);					\
 	if (file_dbp != NULL) {						\
 		F_CLR(file_dbp, DB_AM_RECOVER);				\
 		if (F_ISSET(file_dbp, DB_AM_THREAD))			\
@@ -67,7 +67,7 @@
 }
 #define	REC_NOOP_CLOSE {						\
 	if (argp != NULL)						\
-		free (argp);						\
+		__db_free(argp);					\
 	return (ret);							\
 }
 
diff --git a/db2/include/db_auto.h b/db2/include/db_auto.h
index 7478173740..4c7b4da970 100644
--- a/db2/include/db_auto.h
+++ b/db2/include/db_auto.h
@@ -59,6 +59,7 @@ typedef struct _db_ovref_args {
 	DB_LSN prev_lsn;
 	u_int32_t	fileid;
 	db_pgno_t	pgno;
+	int32_t	adjust;
 	DB_LSN 	lsn;
 } __db_ovref_args;
 
diff --git a/db2/include/db_cxx.h b/db2/include/db_cxx.h
index 611d967ef9..01d1231092 100644
--- a/db2/include/db_cxx.h
+++ b/db2/include/db_cxx.h
@@ -4,12 +4,11 @@
  * Copyright (c) 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_cxx.h	10.8 (Sleepycat) 9/20/97
+ *	@(#)db_cxx.h	10.12 (Sleepycat) 10/25/97
  */
 
 #ifndef _DB_CXX_H_
 #define _DB_CXX_H_
-
 //
 // C++ assumptions:
 //
@@ -264,7 +263,7 @@ public:
     // Normally these would be called register and unregister to
     // parallel the C interface, but "register" is a reserved word.
     //
-    int db_register(Db *dbp, const char *name, u_int32_t *fidp);
+    int db_register(Db *dbp, const char *name, DBTYPE type, u_int32_t *fidp);
     int db_unregister(u_int32_t fid);
 
     // Create or remove new log files
@@ -353,6 +352,7 @@ public:
     int stat(DB_MPOOL_STAT **gsp, DB_MPOOL_FSTAT ***fsp,
              void *(*db_malloc)(size_t));
     int sync(DbLsn *lsn);
+    int trickle(int pct, int *nwrotep);
 
     // Create or remove new mpool files
     //
@@ -598,6 +598,11 @@ public:
     //
     int appinit(const char *homeDir, char *const *db_config, int flags);
 
+    // Called automatically when DbEnv is destroyed, or can be
+    // called at any time to shut down Db.
+    //
+    int appexit();
+
     ////////////////////////////////////////////////////////////////
     // simple get/set access methods
     //
@@ -675,11 +680,6 @@ public:
     u_int32_t get_lk_detect() const;
     void set_lk_detect(u_int32_t);
 
-    // Yield function for threads.
-    typedef int (*db_yield_fcn) (void);
-    db_yield_fcn get_yield() const;
-    void set_yield(db_yield_fcn);
-
 
     ////////////////////////////////////////////////////////////////
     // Logging.
@@ -783,7 +783,7 @@ class _exported Db
 public:
     int close(int flags);
     int cursor(DbTxn *txnid, Dbc **cursorp);
-    int del(Dbt *key, DbTxn *txnid);
+    int del(DbTxn *txnid, Dbt *key, int flags);
     int fd(int *fdp);
     int get(DbTxn *txnid, Dbt *key, Dbt *data, int flags);
     int put(DbTxn *txnid, Dbt *key, Dbt *data, int flags);
@@ -884,5 +884,4 @@ private:
     Dbc(const Dbc &);
     Dbc &operator = (const Dbc &);
 };
-
 #endif /* !_DB_CXX_H_ */
diff --git a/db2/include/db_ext.h b/db2/include/db_ext.h
index b18b10ff7f..f9b3b3a214 100644
--- a/db2/include/db_ext.h
+++ b/db2/include/db_ext.h
@@ -1,4 +1,4 @@
-/* Do not edit: automatically built by dist/distrib. */
+/* DO NOT EDIT: automatically built by dist/distrib. */
 int __db_pgerr __P((DB *, db_pgno_t));
 int __db_pgfmt __P((DB *, db_pgno_t));
 int __db_addrem_log
@@ -25,7 +25,7 @@ int __db_big_print
 int __db_big_read __P((void *, __db_big_args **));
 int __db_ovref_log
     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
-    u_int32_t, db_pgno_t, DB_LSN *));
+    u_int32_t, db_pgno_t, int32_t, DB_LSN *));
 int __db_ovref_print
    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __db_ovref_read __P((void *, __db_ovref_args **));
@@ -79,7 +79,7 @@ int __db_goff __P((DB *, DBT *,
     u_int32_t, db_pgno_t, void **, u_int32_t *));
 int __db_poff __P((DB *, const DBT *, db_pgno_t *,
     int (*)(DB *, u_int32_t, PAGE **)));
-int __db_ioff __P((DB *, db_pgno_t));
+int __db_ovref __P((DB *, db_pgno_t, int));
 int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
 int __db_moff __P((DB *, const DBT *, db_pgno_t));
 void __db_loadme __P((void));
diff --git a/db2/include/db_int.h.src b/db2/include/db_int.h.src
index ebadb35d36..abd93a6e8e 100644
--- a/db2/include/db_int.h.src
+++ b/db2/include/db_int.h.src
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_int.h.src	10.30 (Sleepycat) 9/23/97
+ *	@(#)db_int.h.src	10.36 (Sleepycat) 10/31/97
  */
 
 #ifndef _DB_INTERNAL_H_
@@ -12,6 +12,7 @@
 
 #include "db.h"				/* Standard DB include file. */
 #include "queue.h"
+#include "os_func.h"
 #include "os_ext.h"
 
 /*******************************************************
@@ -64,12 +65,16 @@
 #undef	SSZA
 #define SSZA(name, field)	((int)&(((name *)0)->field[0]))
 
+/* Macros to return per-process address, offsets based on shared regions. */
+#define	R_ADDR(base, offset)	((void *)((u_int8_t *)((base)->addr) + offset))
+#define	R_OFFSET(base, p)	((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
+
 /* Free and free-string macros that overwrite memory during debugging. */
 #ifdef DEBUG
 #undef	FREE
 #define	FREE(p, len) {							\
 	memset(p, 0xff, len);						\
-	free(p);							\
+	__db_free(p);							\
 }
 #undef	FREES
 #define	FREES(p) {							\
@@ -78,18 +83,18 @@
 #else
 #undef	FREE
 #define	FREE(p, len) {							\
-	free(p);							\
+	__db_free(p);							\
 }
 #undef	FREES
 #define	FREES(p) {							\
-	free(p);							\
+	__db_free(p);							\
 }
 #endif
 
 /* Structure used to print flag values. */
 typedef struct __fn {
 	u_int32_t mask;			/* Flag value. */
-	char	 *name;			/* Flag name. */
+	const char *name;		/* Flag name. */
 } FN;
 
 /* Set, clear and test flags. */
@@ -163,10 +168,8 @@ typedef struct _db_mutex_t {
 	off_t	off;			/* Backing file offset. */
 	u_long	pid;			/* Lock holder: 0 or process pid. */
 #endif
-#ifdef MUTEX_STATISTICS
-	u_long	mutex_set_wait;		/* Blocking mutex: required waiting. */
-	u_long	mutex_set_nowait;	/* Blocking mutex: without waiting. */
-#endif
+	u_int32_t mutex_set_wait;	/* Granted after wait. */
+	u_int32_t mutex_set_nowait;	/* Granted without waiting. */
 } db_mutex_t;
 
 #include "mutex_ext.h"
@@ -177,11 +180,10 @@ typedef struct _db_mutex_t {
 /* Lock/unlock a DB thread. */
 #define	DB_THREAD_LOCK(dbp)						\
 	(F_ISSET(dbp, DB_AM_THREAD) ?					\
-	    __db_mutex_lock((db_mutex_t *)(dbp)->mutexp,  -1,		\
-	        (dbp)->dbenv == NULL ? NULL : (dbp)->dbenv->db_yield) : 0)
+	    __db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1) : 0)
 #define	DB_THREAD_UNLOCK(dbp)						\
 	(F_ISSET(dbp, DB_AM_THREAD) ?					\
-	    __db_mutex_unlock((db_mutex_t *)(dbp)->mutexp,  -1) : 0)
+	    __db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1) : 0)
 
 /* Btree/recno local statistics structure. */
 struct __db_bt_lstat;	typedef struct __db_bt_lstat DB_BTREE_LSTAT;
@@ -260,7 +262,7 @@ typedef struct __dbpginfo {
 #define	IS_ZERO_LSN(LSN)	((LSN).file == 0)
 
 /* Test if we need to log a change. */
-#define	DB_LOGGING(dbp) \
+#define	DB_LOGGING(dbp)							\
 	(F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER))
 
 #ifdef DEBUG
diff --git a/db2/include/hash.h b/db2/include/hash.h
index cb8ea350f5..ae6d3843c6 100644
--- a/db2/include/hash.h
+++ b/db2/include/hash.h
@@ -43,7 +43,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)hash.h	10.6 (Sleepycat) 8/18/97
+ *	@(#)hash.h	10.7 (Sleepycat) 11/1/97
  */
 
 /* Cursor structure definitions. */
@@ -179,8 +179,8 @@ typedef struct htab {		/* Memory resident data structure. */
 /* Constraints about number of pages and how much data goes on a page. */
 
 #define	MAX_PAGES(H)	UINT32_T_MAX
-#define	MINFILL		0.25
-#define	ISBIG(H, N)	(((N) > ((H)->hdr->pagesize * MINFILL)) ? 1 : 0)
+#define	MINFILL		4
+#define	ISBIG(H, N)	(((N) > ((H)->hdr->pagesize / MINFILL)) ? 1 : 0)
 
 /* Shorthands for accessing structure */
 #define	NDX_INVALID	0xFFFF
diff --git a/db2/include/hash_auto.h b/db2/include/hash_auto.h
index 5ff1229115..2b8aea8d86 100644
--- a/db2/include/hash_auto.h
+++ b/db2/include/hash_auto.h
@@ -108,7 +108,25 @@ typedef struct _ham_ovfl_args {
 	db_pgno_t	start_pgno;
 	u_int32_t	npages;
 	db_pgno_t	free_pgno;
+	u_int32_t	ovflpoint;
 	DB_LSN 	metalsn;
 } __ham_ovfl_args;
 
+
+#define	DB_ham_copypage	(DB_ham_BEGIN + 8)
+
+typedef struct _ham_copypage_args {
+	u_int32_t type;
+	DB_TXN *txnid;
+	DB_LSN prev_lsn;
+	u_int32_t	fileid;
+	db_pgno_t	pgno;
+	DB_LSN 	pagelsn;
+	db_pgno_t	next_pgno;
+	DB_LSN 	nextlsn;
+	db_pgno_t	nnext_pgno;
+	DB_LSN 	nnextlsn;
+	DBT	page;
+} __ham_copypage_args;
+
 #endif
diff --git a/db2/include/hash_ext.h b/db2/include/hash_ext.h
index 32788c7b8a..5abbb274f0 100644
--- a/db2/include/hash_ext.h
+++ b/db2/include/hash_ext.h
@@ -1,4 +1,4 @@
-/* Do not edit: automatically built by dist/distrib. */
+/* DO NOT EDIT: automatically built by dist/distrib. */
 int __ham_open __P((DB *, DB_INFO *));
 int  __ham_close __P((DB *));
 int __ham_c_iclose __P((DB *, DBC *));
@@ -54,10 +54,17 @@ int __ham_newpgno_read __P((void *, __ham_newpgno_args **));
 int __ham_ovfl_log
     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
     u_int32_t, db_pgno_t, u_int32_t, db_pgno_t,
-    DB_LSN *));
+    u_int32_t, DB_LSN *));
 int __ham_ovfl_print
    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __ham_ovfl_read __P((void *, __ham_ovfl_args **));
+int __ham_copypage_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
+    DB_LSN *, db_pgno_t, DB_LSN *, DBT *));
+int __ham_copypage_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_copypage_read __P((void *, __ham_copypage_args **));
 int __ham_init_print __P((DB_ENV *));
 int __ham_init_recover __P((DB_ENV *));
 int __ham_pgin __P((db_pgno_t, void *, DBT *));
@@ -81,7 +88,7 @@ int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
 int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
 int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
 void __ham_putitem __P((PAGE *p, const DBT *, int));
-int __ham_del_pair __P((HTAB *, HASH_CURSOR *));
+int __ham_del_pair __P((HTAB *, HASH_CURSOR *, int));
 int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
 void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
     int32_t,  DBT *));
@@ -118,4 +125,6 @@ int __ham_splitdata_recover
    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __ham_ovfl_recover
     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_copypage_recover
+  __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __ham_stat __P((DB *, FILE *));
diff --git a/db2/include/lock.h b/db2/include/lock.h
index 8f9e81c0fa..8a927f076e 100644
--- a/db2/include/lock.h
+++ b/db2/include/lock.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)lock.h	10.8 (Sleepycat) 9/23/97
+ *	@(#)lock.h	10.9 (Sleepycat) 10/25/97
  */
 
 typedef struct __db_lockobj	DB_LOCKOBJ;
@@ -54,8 +54,7 @@ struct __db_lockregion {
 
 /* Macros to lock/unlock the region. */
 #define	LOCK_LOCKREGION(lt)						\
-	(void)__db_mutex_lock(&(lt)->region->hdr.lock,(lt)->fd,		\
-	    (lt)->dbenv == NULL ? NULL : (lt)->dbenv->db_yield)
+	(void)__db_mutex_lock(&(lt)->region->hdr.lock, (lt)->fd)
 #define	UNLOCK_LOCKREGION(lt)						\
 	(void)__db_mutex_unlock(&(lt)->region->hdr.lock, (lt)->fd)
 
diff --git a/db2/include/lock_ext.h b/db2/include/lock_ext.h
index 59d5072bc4..0d0ba148b6 100644
--- a/db2/include/lock_ext.h
+++ b/db2/include/lock_ext.h
@@ -1,4 +1,4 @@
-/* Do not edit: automatically built by dist/distrib. */
+/* DO NOT EDIT: automatically built by dist/distrib. */
 int __lock_getobj  __P((DB_LOCKTAB *,
     u_int32_t, DBT *, u_int32_t type, DB_LOCKOBJ **));
 int __lock_cmp __P((DBT *, DB_LOCKOBJ *));
diff --git a/db2/include/log.h b/db2/include/log.h
index a9c82fa04d..a192a38136 100644
--- a/db2/include/log.h
+++ b/db2/include/log.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)log.h	10.9 (Sleepycat) 9/23/97
+ *	@(#)log.h	10.15 (Sleepycat) 11/2/97
  */
 
 #ifndef _LOG_H_
@@ -15,6 +15,8 @@ struct __hdr;		typedef struct __hdr HDR;
 struct __log;		typedef struct __log LOG;
 struct __log_persist;	typedef struct __log_persist LOGP;
 
+#define	MEGABYTE	(1024 * 1024)
+
 #define	MAXLFNAME	99999		/* Maximum log file name. */
 #define	LFNAME		"log.%05d"	/* Log file name template. */
 
@@ -23,21 +25,15 @@ struct __log_persist;	typedef struct __log_persist LOGP;
 
 #define	DEFAULT_MAX	(10 * 1048576)	/* 10 Mb. */
 
-/* Macros to return per-process address, offsets. */
-#define	ADDR(base, offset)	((void *)((u_int8_t *)((base)->addr) + offset))
-#define	OFFSET(base, p)		((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
-
 /* Macros to lock/unlock the region and threads. */
 #define	LOCK_LOGTHREAD(dblp)						\
 	if (F_ISSET(dblp, DB_AM_THREAD))				\
-		(void)__db_mutex_lock((dblp)->mutexp, -1,		\
-		    (dblp)->dbenv == NULL ? NULL : (dblp)->dbenv->db_yield)
+		(void)__db_mutex_lock((dblp)->mutexp, -1)
 #define	UNLOCK_LOGTHREAD(dblp)						\
 	if (F_ISSET(dblp, DB_AM_THREAD))				\
 		(void)__db_mutex_unlock((dblp)->mutexp, -1);
 #define	LOCK_LOGREGION(dblp)						\
-	(void)__db_mutex_lock(&((RLAYOUT *)(dblp)->lp)->lock,		\
-	    (dblp)->fd, (dblp)->dbenv == NULL ? NULL : (dblp)->dbenv->db_yield)
+	(void)__db_mutex_lock(&((RLAYOUT *)(dblp)->lp)->lock, (dblp)->fd)
 #define	UNLOCK_LOGREGION(dblp)						\
 	(void)__db_mutex_unlock(&((RLAYOUT *)(dblp)->lp)->lock, (dblp)->fd)
 
@@ -124,7 +120,7 @@ struct __log {
 	DB_LSN	  lsn;			/* LSN at current file offset. */
 	DB_LSN	  c_lsn;		/* LSN of the last checkpoint. */
 	DB_LSN	  s_lsn;		/* LSN of the last sync. */
-	DB_LSN	  span_lsn;		/* LSN spanning buffer write. */
+	DB_LSN	  uw_lsn;		/* LSN of 1st rec not fully on disk. */
 
 	u_int32_t len;			/* Length of the last record. */
 
@@ -132,7 +128,8 @@ struct __log {
 	u_int32_t w_off;		/* Current write offset in the file. */
 
 	time_t	  chkpt;		/* Time of the last checkpoint. */
-	u_int32_t written;		/* Bytes written since checkpoint. */
+
+	DB_LOG_STAT stat;		/* Log statistics. */
 
 	u_int8_t buf[4 * 1024];		/* Log buffer. */
 };
diff --git a/db2/include/log_ext.h b/db2/include/log_ext.h
index bc63d9dac8..c32d1d6af6 100644
--- a/db2/include/log_ext.h
+++ b/db2/include/log_ext.h
@@ -1,4 +1,4 @@
-/* Do not edit: automatically built by dist/distrib. */
+/* DO NOT EDIT: automatically built by dist/distrib. */
 int __log_find __P((DB_LOG *, int *));
 int __log_valid __P((DB_LOG *, LOG *, int));
 int __log_register_log
diff --git a/db2/include/mp.h b/db2/include/mp.h
index 3b71774484..f68f42b144 100644
--- a/db2/include/mp.h
+++ b/db2/include/mp.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)mp.h	10.16 (Sleepycat) 9/23/97
+ *	@(#)mp.h	10.19 (Sleepycat) 10/25/97
  */
 
 struct __bh;		typedef struct __bh BH;
@@ -22,30 +22,36 @@ struct __mpoolfile;	typedef struct __mpoolfile MPOOLFILE;
 #define	DB_CACHESIZE_DEF	(128 * 1024)
 #define	DB_CACHESIZE_MIN	( 20 * 1024)
 
-/* Macro to return per-process address, offsets. */
-#define	ADDR(base, offset)	((void *)((u_int8_t *)((base)->addr) + offset))
-#define	OFFSET(base, p)		((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
-
 #define	INVALID		0		/* Invalid shared memory offset. */
 #define	TEMPORARY	"<tmp>"		/* Temporary file name. */
 
 /*
- * There are two kinds of locks in the mpool code.  The first is the region
- * lock, used to serialize modifications to all data structures.  The second
- * is a per-buffer header lock.  The locking order is as follows:
+ * There are three ways we do locking in the mpool code:
+ *
+ * Locking a handle mutex to provide concurrency for DB_THREAD operations.
+ * Locking the region mutex to provide mutual exclusion while reading and
+ *    writing structures in the shared region.
+ * Locking buffer header mutexes during I/O.
+ *
+ * The first will not be further described here.  We use the shared mpool
+ * region lock to provide mutual exclusion while reading/modifying all of
+ * the data structures, including the buffer headers.  We use a per-buffer
+ * header lock to wait on buffer I/O.  The order of locking is as follows:
  *
- * Process searching for a buffer:
+ * Searching for a buffer:
  *	Acquire the region lock.
  *	Find the buffer header.
  *	Increment the reference count (guarantee the buffer stays).
- *	If the BH_LOCKED flag is set:
+ *	If the BH_LOCKED flag is set (I/O is going on):
  *		Release the region lock.
+ *		Request the buffer lock.
+ *		The I/O will complete...
  *		Acquire the buffer lock.
  *		Release the buffer lock.
  *		Acquire the region lock.
  *	Return the buffer.
  *
- * Process reading/writing a buffer:
+ * Reading/writing a buffer:
  *	Acquire the region lock.
  *	Find/create the buffer header.
  *	If reading, increment the reference count (guarantee the buffer stays).
@@ -69,8 +75,7 @@ struct __mpoolfile;	typedef struct __mpoolfile MPOOLFILE;
 
 #define	LOCKHANDLE(dbmp, mutexp)					\
 	if (F_ISSET(dbmp, MP_LOCKHANDLE))				\
-		(void)__db_mutex_lock(mutexp, (dbmp)->fd,		\
-		(dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield)
+		(void)__db_mutex_lock(mutexp, (dbmp)->fd)
 #define	UNLOCKHANDLE(dbmp, mutexp)					\
 	if (F_ISSET(dbmp, MP_LOCKHANDLE))				\
 		(void)__db_mutex_unlock(mutexp, (dbmp)->fd)
@@ -78,8 +83,7 @@ struct __mpoolfile;	typedef struct __mpoolfile MPOOLFILE;
 #define	LOCKREGION(dbmp)						\
 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
 		(void)__db_mutex_lock(&((RLAYOUT *)(dbmp)->mp)->lock,	\
-		    (dbmp)->fd,						\
-		    (dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield)
+		    (dbmp)->fd)
 #define	UNLOCKREGION(dbmp)						\
 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
 		(void)__db_mutex_unlock(&((RLAYOUT *)(dbmp)->mp)->lock,	\
@@ -87,8 +91,7 @@ struct __mpoolfile;	typedef struct __mpoolfile MPOOLFILE;
 
 #define	LOCKBUFFER(dbmp, bhp)						\
 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
-		(void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->fd,	\
-		    (dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield)
+		(void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->fd)
 #define	UNLOCKBUFFER(dbmp, bhp)						\
 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
 		(void)__db_mutex_unlock(&(bhp)->mutex, (dbmp)->fd)
@@ -250,8 +253,8 @@ struct __bh {
 #define	BH_WRITE	0x020		/* Page scheduled for writing. */
 	u_int16_t  flags;
 
-	SH_TAILQ_ENTRY	q;		/* LRU list of bucket headers. */
-	SH_TAILQ_ENTRY	mq;		/* MPOOLFILE list of bucket headers. */
+	SH_TAILQ_ENTRY	q;		/* LRU queue. */
+	SH_TAILQ_ENTRY	hq;		/* MPOOL hash bucket queue. */
 
 	db_pgno_t pgno;			/* Underlying MPOOLFILE page number. */
 	size_t	  mf_offset;		/* Associated MPOOLFILE offset. */
diff --git a/db2/include/mp_ext.h b/db2/include/mp_ext.h
index 3934c130a8..49d86ba2e5 100644
--- a/db2/include/mp_ext.h
+++ b/db2/include/mp_ext.h
@@ -1,4 +1,4 @@
-/* Do not edit: automatically built by dist/distrib. */
+/* DO NOT EDIT: automatically built by dist/distrib. */
 int __memp_bhwrite
     __P((DB_MPOOL *, MPOOLFILE *, BH *, int *, int *));
 int __memp_pgread __P((DB_MPOOLFILE *, BH *, int));
diff --git a/db2/include/mutex_ext.h b/db2/include/mutex_ext.h
index ff46b6a404..cb2d4886af 100644
--- a/db2/include/mutex_ext.h
+++ b/db2/include/mutex_ext.h
@@ -1,4 +1,4 @@
-/* Do not edit: automatically built by dist/distrib. */
+/* DO NOT EDIT: automatically built by dist/distrib. */
 void __db_mutex_init __P((db_mutex_t *, off_t));
-int __db_mutex_lock __P((db_mutex_t *, int, int (*)(void)));
+int __db_mutex_lock __P((db_mutex_t *, int));
 int __db_mutex_unlock __P((db_mutex_t *, int));
diff --git a/db2/include/os_ext.h b/db2/include/os_ext.h
index 59d72acf12..e48a1e9407 100644
--- a/db2/include/os_ext.h
+++ b/db2/include/os_ext.h
@@ -1,19 +1,19 @@
-/* Do not edit: automatically built by dist/distrib. */
+/* DO NOT EDIT: automatically built by dist/distrib. */
 int __db_abspath __P((const char *));
-char *__db_rpath __P((const char *));
-int __db_dir __P((DB_ENV *, const char *, char ***, int *));
-void __db_dirf __P((DB_ENV *, char **, int));
+int __os_dirlist __P((const char *, char ***, int *));
+void __os_dirfree __P((char **, int));
 int __db_fileid __P((DB_ENV *, const char *, int, u_int8_t *));
-int __db_lseek __P((int, size_t, db_pgno_t, u_long, int));
-int __db_mmap __P((int, size_t, int, int, void *));
-int __db_munmap __P((void *, size_t));
-int __db_oflags __P((int));
-int __db_fdopen __P((const char *, int, int, int, int *));
 int __db_fsync __P((int));
+int __os_map __P((int, size_t, int, int, void **));
+int __os_unmap __P((void *, size_t));
+int __db_oflags __P((int));
+int __db_open __P((const char *, int, int, int, int *));
 int __db_close __P((int));
+char *__db_rpath __P((const char *));
 int __db_read __P((int, void *, size_t, ssize_t *));
 int __db_write __P((int, void *, size_t, ssize_t *));
-int __db_sleep __P((u_long, u_long));
-int __db_exists __P((const char *, int *));
-int __db_stat __P((DB_ENV *, const char *, int, off_t *, off_t *));
+int __os_seek __P((int, size_t, db_pgno_t, u_long, int));
+int __os_sleep __P((u_long, u_long));
+int __os_exists __P((const char *, int *));
+int __os_ioinfo __P((const char *, int, off_t *, off_t *));
 int __db_unlink __P((const char *));
diff --git a/db2/include/os_func.h b/db2/include/os_func.h
new file mode 100644
index 0000000000..0a72942903
--- /dev/null
+++ b/db2/include/os_func.h
@@ -0,0 +1,76 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *	Sleepycat Software.  All rights reserved.
+ *
+ *	@(#)os_func.h	10.2 (Sleepycat) 10/28/97
+ */
+
+/* Calls which can be replaced by the application. */
+struct __db_jumptab {
+	void   *(*db_calloc) __P((size_t, size_t));	/* DB_FUNC_CALLOC */
+	int	(*db_close) __P((int));			/* DB_FUNC_CLOSE */
+	void	(*db_dirfree) __P((char **, int));	/* DB_FUNC_DIRFREE */
+	int	(*db_dirlist)				/* DB_FUNC_DIRLIST */
+		    __P((const char *, char ***, int *));
+	int	(*db_exists)				/* DB_FUNC_EXISTS */
+		    __P((const char *, int *));
+	void	(*db_free) __P((void *));		/* DB_FUNC_FREE */
+	int	(*db_fsync) __P((int));			/* DB_FUNC_FSYNC */
+	int	(*db_ioinfo)				/* DB_FUNC_IOINFO */
+		    __P((const char *, int, off_t *, off_t *));
+	void   *(*db_malloc) __P((size_t));		/* DB_FUNC_MALLOC */
+	int	(*db_map)				/* DB_FUNC_MAP */
+		    __P((int, size_t, int, int, void **));
+	int	(*db_open)				/* DB_FUNC_OPEN */
+		    __P((const char *, int, ...));
+	ssize_t	(*db_read) __P((int, void *, size_t));	/* DB_FUNC_READ */
+	void   *(*db_realloc) __P((void *, size_t));	/* DB_FUNC_REALLOC */
+	int	(*db_seek)				/* DB_FUNC_SEEK */
+		    __P((int, size_t, db_pgno_t, u_long, int));
+	int	(*db_sleep) __P((u_long, u_long));	/* DB_FUNC_SLEEP */
+	char   *(*db_strdup) __P((const char *));	/* DB_FUNC_STRDUP */
+	int	(*db_unlink) __P((const char *));	/* DB_FUNC_UNLINK */
+	int	(*db_unmap) __P((void *, size_t));	/* DB_FUNC_UNMAP */
+	ssize_t	(*db_write)				/* DB_FUNC_WRITE */
+		    __P((int, const void *, size_t));
+	int	(*db_yield) __P((void));		/* DB_FUNC_YIELD */
+};
+
+extern struct __db_jumptab __db_jump;
+
+/*
+ * Names used by DB to call through the jump table.
+ *
+ * The naming scheme goes like this: if the functionality the application can
+ * replace is the same as the DB functionality, e.g., calloc, or dirlist, then
+ * we use the name __db_XXX, and the application is expected to replace the
+ * complete functionality, which may or may not map directly to an ANSI C or
+ * POSIX 1003.1 interface.  If the functionality that the aplication replaces
+ * only underlies what the DB os directory exports to other parts of DB, e.g.,
+ * read, then the name __os_XXX is used, and the application can only replace
+ * the underlying functionality.  Under most circumstances, the os directory
+ * part of DB is the only code that should use the __os_XXX names, all other
+ * parts of DB should be calling __db_XXX functions.
+ */
+#define	__db_calloc	__db_jump.db_calloc
+#define	__os_close	__db_jump.db_close	/* __db_close is a wrapper. */
+#define	__db_dirfree	__db_jump.db_dirfree
+#define	__db_dirlist	__db_jump.db_dirlist
+#define	__db_exists	__db_jump.db_exists
+#define	__db_free	__db_jump.db_free
+#define	__os_fsync	__db_jump.db_fsync	/* __db_fsync is a wrapper. */
+#define	__db_ioinfo	__db_jump.db_ioinfo
+#define	__db_malloc	__db_jump.db_malloc
+#define	__db_map	__db_jump.db_map
+#define	__os_open	__db_jump.db_open	/* __db_open is a wrapper. */
+#define	__os_read	__db_jump.db_read	/* __db_read is a wrapper. */
+#define	__db_realloc	__db_jump.db_realloc
+#define	__db_seek	__db_jump.db_seek
+#define	__db_sleep	__db_jump.db_sleep
+#define	__db_strdup	__db_jump.db_strdup
+#define	__os_unlink	__db_jump.db_unlink	/* __db_unlink is a wrapper. */
+#define	__db_unmap	__db_jump.db_unmap
+#define	__os_write	__db_jump.db_write	/* __db_write is a wrapper. */
+#define	__db_yield	__db_jump.db_yield
diff --git a/db2/include/txn.h b/db2/include/txn.h
index 8bb3976c1c..c64ac3fc52 100644
--- a/db2/include/txn.h
+++ b/db2/include/txn.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)txn.h	10.10 (Sleepycat) 9/23/97
+ *	@(#)txn.h	10.11 (Sleepycat) 10/25/97
  */
 #ifndef	_TXN_H_
 #define	_TXN_H_
@@ -96,15 +96,13 @@ struct __db_txnregion {
 /* Macros to lock/unlock the region and threads. */
 #define	LOCK_TXNTHREAD(tmgrp)						\
 	if (F_ISSET(tmgrp, DB_THREAD))					\
-		(void)__db_mutex_lock((tmgrp)->mutexp, -1,		\
-		    (tmgrp)->dbenv == NULL ? NULL : (tmgrp)->dbenv->db_yield)
+		(void)__db_mutex_lock((tmgrp)->mutexp, -1)
 #define	UNLOCK_TXNTHREAD(tmgrp)						\
 	if (F_ISSET(tmgrp, DB_THREAD))					\
 		(void)__db_mutex_unlock((tmgrp)->mutexp, -1)
 
 #define	LOCK_TXNREGION(tmgrp)						\
-	(void)__db_mutex_lock(&(tmgrp)->region->hdr.lock,(tmgrp)->fd,	\
-	    (tmgrp)->dbenv == NULL ? NULL : (tmgrp)->dbenv->db_yield)
+	(void)__db_mutex_lock(&(tmgrp)->region->hdr.lock, (tmgrp)->fd)
 #define	UNLOCK_TXNREGION(tmgrp)						\
 	(void)__db_mutex_unlock(&(tmgrp)->region->hdr.lock, (tmgrp)->fd)
 
diff --git a/db2/include/txn_ext.h b/db2/include/txn_ext.h
index 8ba0b0c44e..9b617bb68c 100644
--- a/db2/include/txn_ext.h
+++ b/db2/include/txn_ext.h
@@ -1,4 +1,4 @@
-/* Do not edit: automatically built by dist/distrib. */
+/* DO NOT EDIT: automatically built by dist/distrib. */
 int __txn_regop_log
     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
     u_int32_t));
diff --git a/db2/lock/lock.c b/db2/lock/lock.c
index a2a3b208f2..f1223a9fa6 100644
--- a/db2/lock/lock.c
+++ b/db2/lock/lock.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)lock.c	10.36 (Sleepycat) 9/24/97";
+static const char sccsid[] = "@(#)lock.c	10.38 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -200,7 +200,7 @@ lock_open(path, flags, mode, dbenv, ltp)
 	/*
 	 * Create the lock table structure.
 	 */
-	if ((lt = (DB_LOCKTAB *)calloc(1, sizeof(DB_LOCKTAB))) == NULL) {
+	if ((lt = (DB_LOCKTAB *)__db_calloc(1, sizeof(DB_LOCKTAB))) == NULL) {
 		__db_err(dbenv, "%s", strerror(ENOMEM));
 		return (ENOMEM);
 	}
@@ -269,7 +269,7 @@ out:	if (lt->region != NULL)
 		(void)__db_rclose(lt->dbenv, lt->fd, lt->region);
 	if (LF_ISSET(DB_CREATE))
 		(void)lock_unlink(path, 1, lt->dbenv);
-	free(lt);
+	__db_free(lt);
 	return (ret);
 }
 
@@ -505,7 +505,7 @@ lock_close(lt)
 		return (ret);
 
 	/* Free lock table. */
-	free(lt);
+	__db_free(lt);
 	return (0);
 }
 
@@ -728,8 +728,7 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
 	 */
 	(void)__db_mutex_init(&newl->mutex,
 	    MUTEX_LOCK_OFFSET(lt->region, &newl->mutex));
-	(void)__db_mutex_lock(&newl->mutex, lt->fd,
-	    lt->dbenv == NULL ? NULL : lt->dbenv->db_yield);
+	(void)__db_mutex_lock(&newl->mutex, lt->fd);
 
 	/*
 	 * Now, insert the lock onto its locker's list.
@@ -760,8 +759,7 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
 		if (lrp->detect != DB_LOCK_NORUN)
 			ret = lock_detect(lt, 0, lrp->detect);
 
-		(void)__db_mutex_lock(&newl->mutex,
-		    lt->fd, lt->dbenv == NULL ? NULL : lt->dbenv->db_yield);
+		(void)__db_mutex_lock(&newl->mutex, lt->fd);
 
 		LOCK_LOCKREGION(lt);
 		if (newl->status != DB_LSTAT_PENDING) {
@@ -975,11 +973,9 @@ __lock_dump_region(lt, flags)
 #ifndef HAVE_SPINLOCKS
 	printf("Mutex: off %lu", (u_long)lrp->hdr.lock.off);
 #endif
-#ifdef MUTEX_STATISTICS
 	printf(" waits %lu nowaits %lu",
 	    (u_long)lrp->hdr.lock.mutex_set_wait,
 	    (u_long)lrp->hdr.lock.mutex_set_nowait);
-#endif
 	printf("\n%s:%lu\t%s:%lu\t%s:%lu\t%s:%lu\n",
 	    "nconflicts ", (u_long)lrp->nconflicts,
 	    "nrequests  ", (u_long)lrp->nrequests,
diff --git a/db2/lock/lock_deadlock.c b/db2/lock/lock_deadlock.c
index f947f901c3..566021fe89 100644
--- a/db2/lock/lock_deadlock.c
+++ b/db2/lock/lock_deadlock.c
@@ -11,7 +11,7 @@
 static const char copyright[] =
 "@(#) Copyright (c) 1997\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)lock_deadlock.c	10.21 (Sleepycat) 9/6/97";
+static const char sccsid[] = "@(#)lock_deadlock.c	10.25 (Sleepycat) 11/1/97";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -50,16 +50,19 @@ typedef struct {
 	int		valid;
 	u_int32_t	id;
 	DB_LOCK		last_lock;
+	db_pgno_t	pgno;
 } locker_info;
 
 static int  __dd_abort __P((DB_ENV *, locker_info *));
-static int  __dd_build __P((DB_ENV *, u_int32_t **, int *, locker_info **));
-#ifdef DEBUG
-static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, int));
-#endif
+static int  __dd_build
+	__P((DB_ENV *, u_int32_t **, u_int32_t *, locker_info **));
 static u_int32_t
 	   *__dd_find __P((u_int32_t *, locker_info *, u_int32_t));
 
+#ifdef DEBUG
+static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t));
+#endif
+
 int
 lock_detect(lt, flags, atype)
 	DB_LOCKTAB *lt;
@@ -68,8 +71,8 @@ lock_detect(lt, flags, atype)
 {
 	DB_ENV *dbenv;
 	locker_info *idmap;
-	u_int32_t *bitmap, *deadlock, killid;
-	int do_pass, i, nlockers, nentries, ret;
+	u_int32_t *bitmap, *deadlock, i, killid, nentries, nlockers;
+	int do_pass, ret;
 
 	/* Validate arguments. */
 	if ((ret =
@@ -77,17 +80,16 @@ lock_detect(lt, flags, atype)
 		return (ret);
 
 	/* Check if a detector run is necessary. */
-	do_pass = 1;
 	dbenv = lt->dbenv;
 	if (LF_ISSET(DB_LOCK_CONFLICT)) {
 		/* Make a pass every time a lock waits. */
 		LOCK_LOCKREGION(lt);
 		do_pass = dbenv->lk_info->region->need_dd != 0;
 		UNLOCK_LOCKREGION(lt);
-	}
 
-	if (!do_pass)
-		return (0);
+		if (!do_pass)
+			return (0);
+	}
 
 	/* Build the waits-for bitmap. */
 	if ((ret = __dd_build(dbenv, &bitmap, &nlockers, &idmap)) != 0)
@@ -118,8 +120,7 @@ lock_detect(lt, flags, atype)
 
 			if (killid == BAD_KILLID) {
 				__db_err(dbenv,
-				    "warning: could not find %s",
-				    "locker to abort");
+				    "warning: could not find locker to abort");
 				break;
 			}
 
@@ -137,11 +138,8 @@ lock_detect(lt, flags, atype)
 			/*
 			 * We are trying to calculate the id of the
 			 * locker whose entry is indicated by deadlock.
-			 * We know that this is less than nlockers, so
-			 * the cast below is valid.
 			 */
-			killid =
-			    (u_int32_t)((deadlock - bitmap) / nentries);
+			killid = (deadlock - bitmap) / nentries;
 			break;
 		case DB_LOCK_YOUNGEST:
 			/*
@@ -155,8 +153,7 @@ lock_detect(lt, flags, atype)
 
 			if (killid == BAD_KILLID) {
 				__db_err(dbenv,
-				    "warning: could not find %s",
-				    "locker to abort");
+				    "warning: could not find locker to abort");
 				break;
 			}
 			/*
@@ -184,8 +181,8 @@ lock_detect(lt, flags, atype)
 			    "warning: unable to abort locker %lx",
 			    (u_long)idmap[killid].id);
 	}
-	free(bitmap);
-	free(idmap);
+	__db_free(bitmap);
+	__db_free(idmap);
 
 	return (ret);
 }
@@ -197,15 +194,15 @@ lock_detect(lt, flags, atype)
 static int
 __dd_build(dbenv, bmp, nlockers, idmap)
 	DB_ENV *dbenv;
-	u_int32_t **bmp;
-	int *nlockers;
+	u_int32_t **bmp, *nlockers;
 	locker_info **idmap;
 {
-	DB_LOCKTAB *lt;
-	DB_LOCKOBJ *op, *lockerp;
 	struct __db_lock *lp;
-	u_int32_t *bitmap, count, *entryp, i, id, nentries, *tmpmap;
+	DB_LOCKTAB *lt;
+	DB_LOCKOBJ *op, *lo, *lockerp;
+	u_int8_t *pptr;
 	locker_info *id_array;
+	u_int32_t *bitmap, count, *entryp, i, id, nentries, *tmpmap;
 	int is_first, ret;
 
 	lt = dbenv->lk_info;
@@ -238,24 +235,24 @@ retry:	count = lt->region->nlockers;
 	 * We can probably save the malloc's between iterations just
 	 * reallocing if necessary because count grew by too much.
 	 */
-	if ((bitmap = (u_int32_t *)calloc((size_t)count,
+	if ((bitmap = (u_int32_t *)__db_calloc((size_t)count,
 	    sizeof(u_int32_t) * nentries)) == NULL) {
 		__db_err(dbenv, "%s", strerror(ENOMEM));
 		return (ENOMEM);
 	}
 
 	if ((tmpmap =
-	    (u_int32_t *)calloc(sizeof(u_int32_t), nentries)) == NULL) {
+	    (u_int32_t *)__db_calloc(sizeof(u_int32_t), nentries)) == NULL) {
 		__db_err(dbenv, "%s", strerror(ENOMEM));
-		free(bitmap);
+		__db_free(bitmap);
 		return (ENOMEM);
 	}
 
-	if ((id_array = (locker_info *)calloc((size_t)count,
+	if ((id_array = (locker_info *)__db_calloc((size_t)count,
 	    sizeof(locker_info))) == NULL) {
 		__db_err(dbenv, "%s", strerror(ENOMEM));
-		free(bitmap);
-		free(tmpmap);
+		__db_free(bitmap);
+		__db_free(tmpmap);
 		return (ENOMEM);
 	}
 
@@ -264,9 +261,9 @@ retry:	count = lt->region->nlockers;
 	 */
 	LOCK_LOCKREGION(lt);
 	if (lt->region->nlockers > count) {
-		free(bitmap);
-		free(tmpmap);
-		free(id_array);
+		__db_free(bitmap);
+		__db_free(tmpmap);
+		__db_free(id_array);
 		goto retry;
 	}
 
@@ -326,9 +323,8 @@ retry:	count = lt->region->nlockers;
 			    lp != NULL;
 			    is_first = 0,
 			    lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
-				if ((ret = __lock_getobj(lt,
-				    lp->holder, NULL, DB_LOCK_LOCKER, &lockerp))
-				    != 0) {
+				if ((ret = __lock_getobj(lt, lp->holder,
+				    NULL, DB_LOCK_LOCKER, &lockerp)) != 0) {
 					__db_err(dbenv,
 					    "warning unable to find object");
 					continue;
@@ -369,8 +365,16 @@ retry:	count = lt->region->nlockers;
 			continue;
 		}
 		lp = SH_LIST_FIRST(&lockerp->heldby, __db_lock);
-		if (lp != NULL)
+		if (lp != NULL) {
 			id_array[id].last_lock = LOCK_TO_OFFSET(lt, lp);
+			lo = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj);
+			pptr = SH_DBT_PTR(&lo->lockobj);
+			if (lo->lockobj.size >= sizeof(db_pgno_t))
+				memcpy(&id_array[id].pgno, pptr,
+				    sizeof(db_pgno_t));
+			else
+				id_array[id].pgno = 0;
+		}
 	}
 
 	/* Pass complete, reset the deadlock detector bit. */
@@ -384,21 +388,20 @@ retry:	count = lt->region->nlockers;
 	*nlockers = id;
 	*idmap = id_array;
 	*bmp = bitmap;
-	free(tmpmap);
+	__db_free(tmpmap);
 	return (0);
 }
 
 static u_int32_t *
 __dd_find(bmp, idmap, nlockers)
-	u_int32_t *bmp;
+	u_int32_t *bmp, nlockers;
 	locker_info *idmap;
-	u_int32_t nlockers;
 {
 	u_int32_t i, j, nentries, *mymap, *tmpmap;
 
 	/*
-	 * For each locker, or in the bits from the lockers
-	 * on which that locker is waiting.
+	 * For each locker, OR in the bits from the lockers on which that
+	 * locker is waiting.
 	 */
 	nentries = ALIGN(nlockers, 32) / 32;
 	for (mymap = bmp, i = 0; i < nlockers; i++, mymap += nentries) {
@@ -422,9 +425,9 @@ __dd_abort(dbenv, info)
 	DB_ENV *dbenv;
 	locker_info *info;
 {
+	struct __db_lock *lockp;
 	DB_LOCKTAB *lt;
 	DB_LOCKOBJ *lockerp, *sh_obj;
-	struct __db_lock *lockp;
 	int ret;
 
 	lt = dbenv->lk_info;
@@ -459,19 +462,17 @@ static void
 __dd_debug(dbenv, idmap, bitmap, nlockers)
 	DB_ENV *dbenv;
 	locker_info *idmap;
-	u_int32_t *bitmap;
-	int nlockers;
+	u_int32_t *bitmap, nlockers;
 {
-	u_int32_t *mymap;
-	int i, j, nentries;
+	u_int32_t i, j, *mymap, nentries;
 	char *msgbuf;
 
 	__db_err(dbenv, "Waitsfor array");
 	__db_err(dbenv, "waiter\twaiting on");
 	/*
-	 * Alloc space to print 10 bytes per item waited on.
+	 * Allocate space to print 10 bytes per item waited on.
 	 */
-	if ((msgbuf = (char *)malloc((nlockers + 1) * 10 + 64)) == NULL) {
+	if ((msgbuf = (char *)__db_malloc((nlockers + 1) * 10 + 64)) == NULL) {
 		__db_err(dbenv, "%s", strerror(ENOMEM));
 		return;
 	}
@@ -480,7 +481,8 @@ __dd_debug(dbenv, idmap, bitmap, nlockers)
 	for (mymap = bitmap, i = 0; i < nlockers; i++, mymap += nentries) {
 		if (!idmap[i].valid)
 			continue;
-		sprintf(msgbuf, "%lx\t\t", (u_long)idmap[i].id);/* Waiter. */
+		sprintf(msgbuf,					/* Waiter. */
+		    "%lx/%lu:\t", (u_long)idmap[i].id, (u_long)idmap[i].pgno);
 		for (j = 0; j < nlockers; j++)
 			if (ISSET_MAP(mymap, j))
 				sprintf(msgbuf, "%s %lx", msgbuf,
@@ -490,6 +492,6 @@ __dd_debug(dbenv, idmap, bitmap, nlockers)
 		__db_err(dbenv, msgbuf);
 	}
 
-	free(msgbuf);
+	__db_free(msgbuf);
 }
 #endif
diff --git a/db2/log/log.c b/db2/log/log.c
index 893c1ee402..17681f8e0f 100644
--- a/db2/log/log.c
+++ b/db2/log/log.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log.c	10.27 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)log.c	10.33 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -67,11 +67,11 @@ log_open(path, flags, mode, dbenv, lpp)
 	}
 
 	/* Create and initialize the DB_LOG structure. */
-	if ((dblp = (DB_LOG *)calloc(1, sizeof(DB_LOG))) == NULL)
+	if ((dblp = (DB_LOG *)__db_calloc(1, sizeof(DB_LOG))) == NULL)
 		return (ENOMEM);
 
-	if (path != NULL && (dblp->dir = strdup(path)) == NULL) {
-		free(dblp);
+	if (path != NULL && (dblp->dir = __db_strdup(path)) == NULL) {
+		__db_free(dblp);
 		return (ENOMEM);
 	}
 
@@ -329,10 +329,12 @@ __log_find(dblp, valp)
 	}
 
 	/* Get the list of file names. */
-	ret = __db_dir(dblp->dbenv, dir, &names, &fcnt);
+	ret = __db_dirlist(dir, &names, &fcnt);
 	FREES(p);
-	if (ret != 0)
+	if (ret != 0) {
+		__db_err(dblp->dbenv, "%s: %s", dir, strerror(ret));
 		return (ret);
+	}
 
 	/*
 	 * Search for a valid log file name, return a value of 0 on
@@ -350,7 +352,7 @@ __log_find(dblp, valp)
 		}
 
 	/* Discard the list. */
-	__db_dirf(dblp->dbenv, names, fcnt);
+	__db_dirfree(names, fcnt);
 
 	return (ret);
 }
@@ -376,10 +378,10 @@ __log_valid(dblp, lp, cnt)
 		return (ret);
 
 	fd = -1;
-	if ((ret = __db_fdopen(p,
+	if ((ret = __db_open(p,
 	    DB_RDONLY | DB_SEQUENTIAL,
 	    DB_RDONLY | DB_SEQUENTIAL, 0, &fd)) != 0 ||
-	    (ret = __db_lseek(fd, 0, 0, sizeof(HDR), SEEK_SET)) != 0 ||
+	    (ret = __db_seek(fd, 0, 0, sizeof(HDR), SEEK_SET)) != 0 ||
 	    (ret = __db_read(fd, &persist, sizeof(LOGP), &nw)) != 0 ||
 	    nw != sizeof(LOGP)) {
 		if (ret == 0)
@@ -474,3 +476,39 @@ log_unlink(path, force, dbenv)
 	return (__db_runlink(dbenv,
 	    DB_APP_LOG, path, DB_DEFAULT_LOG_FILE, force));
 }
+
+/*
+ * log_stat --
+ *	Return LOG statistics.
+ */
+int
+log_stat(dblp, gspp, db_malloc)
+	DB_LOG *dblp;
+	DB_LOG_STAT **gspp;
+	void *(*db_malloc) __P((size_t));
+{
+	LOG *lp;
+
+	*gspp = NULL;
+	lp = dblp->lp;
+
+	if ((*gspp = db_malloc == NULL ?
+	    (DB_LOG_STAT *)__db_malloc(sizeof(**gspp)) :
+	    (DB_LOG_STAT *)db_malloc(sizeof(**gspp))) == NULL)
+		return (ENOMEM);
+
+	/* Copy out the global statistics. */
+	LOCK_LOGREGION(dblp);
+	**gspp = lp->stat;
+
+	(*gspp)->st_magic = lp->persist.magic;
+	(*gspp)->st_version = lp->persist.version;
+	(*gspp)->st_mode = lp->persist.mode;
+	(*gspp)->st_lg_max = lp->persist.lg_max;
+
+	(*gspp)->st_region_nowait = lp->rlayout.lock.mutex_set_nowait;
+	(*gspp)->st_region_wait = lp->rlayout.lock.mutex_set_wait;
+	UNLOCK_LOGREGION(dblp);
+
+	return (0);
+}
diff --git a/db2/log/log_archive.c b/db2/log/log_archive.c
index 6904a2c726..140ea31fd1 100644
--- a/db2/log/log_archive.c
+++ b/db2/log/log_archive.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_archive.c	10.26 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)log_archive.c	10.28 (Sleepycat) 10/28/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -68,7 +68,7 @@ log_archive(dblp, listp, flags, db_malloc)
 	 * but that's just not possible.
 	 */
 	if (LF_ISSET(DB_ARCH_ABS)) {
-		__set_errno (0);
+		errno = 0;
 		if ((pref = getcwd(buf, sizeof(buf))) == NULL)
 			return (errno == 0 ? ENOMEM : errno);
 	} else
@@ -84,7 +84,7 @@ log_archive(dblp, listp, flags, db_malloc)
 		if ((ret = log_get(dblp, &stable_lsn, &rec, DB_LAST)) != 0)
 			return (ret);
 		if (F_ISSET(dblp, DB_AM_THREAD))
-			free(rec.data);
+			__db_free(rec.data);
 		fnum = stable_lsn.file;
 		break;
 	case 0:
@@ -102,7 +102,7 @@ log_archive(dblp, listp, flags, db_malloc)
 #define	LIST_INCREMENT	64
 	/* Get some initial space. */
 	if ((array =
-	    (char **)malloc(sizeof(char *) * (array_size = 10))) == NULL)
+	    (char **)__db_malloc(sizeof(char *) * (array_size = 10))) == NULL)
 		return (ENOMEM);
 	array[0] = NULL;
 
@@ -115,7 +115,7 @@ log_archive(dblp, listp, flags, db_malloc)
 
 		if (n >= array_size - 1) {
 			array_size += LIST_INCREMENT;
-			if ((array = (char **)realloc(array,
+			if ((array = (char **)__db_realloc(array,
 			    sizeof(char *) * array_size)) == NULL) {
 				ret = ENOMEM;
 				goto err;
@@ -127,7 +127,7 @@ log_archive(dblp, listp, flags, db_malloc)
 				goto err;
 			FREES(name);
 		} else if ((p = __db_rpath(name)) != NULL) {
-			if ((array[n] = (char *)strdup(p + 1)) == NULL) {
+			if ((array[n] = (char *)__db_strdup(p + 1)) == NULL) {
 				ret = ENOMEM;
 				goto err;
 			}
@@ -158,7 +158,7 @@ log_archive(dblp, listp, flags, db_malloc)
 err:	if (array != NULL) {
 		for (arrayp = array; *arrayp != NULL; ++arrayp)
 			FREES(*arrayp);
-		free(array);
+		__db_free(array);
 	}
 	return (ret);
 }
@@ -182,7 +182,7 @@ __build_data(dblp, pref, listp, db_malloc)
 
 	/* Get some initial space. */
 	if ((array =
-	    (char **)malloc(sizeof(char *) * (array_size = 10))) == NULL)
+	    (char **)__db_malloc(sizeof(char *) * (array_size = 10))) == NULL)
 		return (ENOMEM);
 	array[0] = NULL;
 
@@ -200,7 +200,7 @@ __build_data(dblp, pref, listp, db_malloc)
 		memcpy(&rectype, rec.data, sizeof(rectype));
 		if (rectype != DB_log_register) {
 			if (F_ISSET(dblp, DB_AM_THREAD)) {
-				free(rec.data);
+				__db_free(rec.data);
 				rec.data = NULL;
 			}
 			continue;
@@ -214,25 +214,25 @@ __build_data(dblp, pref, listp, db_malloc)
 
 		if (n >= array_size - 1) {
 			array_size += LIST_INCREMENT;
-			if ((array = (char **)realloc(array,
+			if ((array = (char **)__db_realloc(array,
 			    sizeof(char *) * array_size)) == NULL) {
 				ret = ENOMEM;
 				goto lg_free;
 			}
 		}
 
-		if ((array[n] = (char *)strdup(argp->name.data)) == NULL) {
+		if ((array[n] = (char *)__db_strdup(argp->name.data)) == NULL) {
 			ret = ENOMEM;
 lg_free:		if (F_ISSET(&rec, DB_DBT_MALLOC) && rec.data != NULL)
-				free(rec.data);
+				__db_free(rec.data);
 			goto err1;
 		}
 
 		array[++n] = NULL;
-		free(argp);
+		__db_free(argp);
 
 		if (F_ISSET(dblp, DB_AM_THREAD)) {
-			free(rec.data);
+			__db_free(rec.data);
 			rec.data = NULL;
 		}
 	}
@@ -289,7 +289,7 @@ lg_free:		if (F_ISSET(&rec, DB_DBT_MALLOC) && rec.data != NULL)
 			if (ret != 0)
 				goto err2;
 		} else if ((p = __db_rpath(real_name)) != NULL) {
-			array[last] = (char *)strdup(p + 1);
+			array[last] = (char *)__db_strdup(p + 1);
 			FREES(real_name);
 			if (array[last] == NULL)
 				goto err2;
@@ -321,7 +321,7 @@ err2:	/*
 err1:	if (array != NULL) {
 		for (arrayp = array; *arrayp != NULL; ++arrayp)
 			FREES(*arrayp);
-		free(array);
+		__db_free(array);
 	}
 	return (ret);
 }
@@ -341,7 +341,7 @@ __absname(pref, name, newnamep)
 	l_name = strlen(name);
 
 	/* Malloc space for concatenating the two. */
-	if ((newname = (char *)malloc(l_pref + l_name + 2)) == NULL)
+	if ((newname = (char *)__db_malloc(l_pref + l_name + 2)) == NULL)
 		return (ENOMEM);
 
 	/* Build the name. */
@@ -379,7 +379,7 @@ __usermem(listp, func)
 	 * Don't simplify this expression, SunOS compilers don't like it.
 	 */
 	if (func == NULL)
-		array = (char **)malloc(len);
+		array = (char **)__db_malloc(len);
 	else
 		array = (char **)func(len);
 	if (array == NULL)
@@ -399,7 +399,7 @@ __usermem(listp, func)
 	/* NULL-terminate the list. */
 	*arrayp = NULL;
 
-	free(*listp);
+	__db_free(*listp);
 	*listp = array;
 
 	return (0);
diff --git a/db2/log/log_auto.c b/db2/log/log_auto.c
index ea88a7bff9..d5dbfe1f5f 100644
--- a/db2/log/log_auto.c
+++ b/db2/log/log_auto.c
@@ -53,7 +53,7 @@ int __log_register_log(logp, txnid, ret_lsnp, flags,
 	    + sizeof(u_int32_t) + (uid == NULL ? 0 : uid->size)
 	    + sizeof(id)
 	    + sizeof(ftype);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -94,7 +94,7 @@ int __log_register_log(logp, txnid, ret_lsnp, flags,
 	ret = __log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -151,7 +151,7 @@ __log_register_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tid: %lu\n", (u_long)argp->id);
 	printf("\tftype: 0x%lx\n", (u_long)argp->ftype);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -166,7 +166,7 @@ __log_register_read(recbuf, argpp)
 	__log_register_args *argp;
 	u_int8_t *bp;
 
-	argp = (__log_register_args *)malloc(sizeof(__log_register_args) +
+	argp = (__log_register_args *)__db_malloc(sizeof(__log_register_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -223,7 +223,7 @@ int __log_unregister_log(logp, txnid, ret_lsnp, flags,
 		lsnp = &txnid->last_lsn;
 	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
 	    + sizeof(id);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -242,7 +242,7 @@ int __log_unregister_log(logp, txnid, ret_lsnp, flags,
 	ret = __log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -280,7 +280,7 @@ __log_unregister_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->prev_lsn.offset);
 	printf("\tid: %lu\n", (u_long)argp->id);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -295,7 +295,7 @@ __log_unregister_read(recbuf, argpp)
 	__log_unregister_args *argp;
 	u_int8_t *bp;
 
-	argp = (__log_unregister_args *)malloc(sizeof(__log_unregister_args) +
+	argp = (__log_unregister_args *)__db_malloc(sizeof(__log_unregister_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
diff --git a/db2/log/log_findckp.c b/db2/log/log_findckp.c
index df75e20e03..115a00e8aa 100644
--- a/db2/log/log_findckp.c
+++ b/db2/log/log_findckp.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_findckp.c	10.11 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)log_findckp.c	10.12 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -77,13 +77,13 @@ __log_findckp(lp, lsnp)
 	next_lsn = last_ckp;
 	do {
 		if (F_ISSET(lp, DB_AM_THREAD))
-			free(data.data);
+			__db_free(data.data);
 
 		if ((ret = log_get(lp, &next_lsn, &data, DB_SET)) != 0)
 			return (ret);
 		if ((ret = __txn_ckp_read(data.data, &ckp_args)) != 0) {
 			if (F_ISSET(lp, DB_AM_THREAD))
-				free(data.data);
+				__db_free(data.data);
 			return (ret);
 		}
 		if (IS_ZERO_LSN(ckp_lsn))
@@ -100,12 +100,12 @@ __log_findckp(lp, lsnp)
 		}
 		last_ckp = next_lsn;
 		next_lsn = ckp_args->last_ckp;
-		free(ckp_args);
+		__db_free(ckp_args);
 	} while (!IS_ZERO_LSN(next_lsn) &&
 	    log_compare(&last_ckp, &ckp_lsn) > 0);
 
 	if (F_ISSET(lp, DB_AM_THREAD))
-		free(data.data);
+		__db_free(data.data);
 
 	/*
 	 * At this point, either, next_lsn is ZERO or ckp_lsn is the
@@ -118,7 +118,7 @@ __log_findckp(lp, lsnp)
 		if ((ret = log_get(lp, &last_ckp, &data, DB_FIRST)) != 0)
 			return (ret);
 		if (F_ISSET(lp, DB_AM_THREAD))
-			free(data.data);
+			__db_free(data.data);
 	}
 	*lsnp = last_ckp;
 
diff --git a/db2/log/log_get.c b/db2/log/log_get.c
index 3f6df6c33c..ed35d57f82 100644
--- a/db2/log/log_get.c
+++ b/db2/log/log_get.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_get.c	10.19 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)log_get.c	10.21 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -170,7 +170,8 @@ __log_get(dblp, alsn, dbt, flags, silent)
 		if (!IS_ZERO_LSN(nlsn)) {
 			/* If at start-of-file, move to the previous file. */
 			if (nlsn.offset == 0) {
-				if (nlsn.file == 1)
+				if (nlsn.file == 1 ||
+				    __log_valid(dblp, NULL, nlsn.file - 1) != 0)
 					return (DB_NOTFOUND);
 
 				--nlsn.file;
@@ -215,27 +216,21 @@ retry:
 		goto cksum;
 	}
 
-	/*
-	 * Move the file descriptor to the page that has the hdr.  We dealt
-	 * with moving to a previous log file in the flags switch code, but
-	 * we don't yet know if we'll need to move to a subsequent file.
-	 *
-	 * Acquire a file descriptor.
-	 */
+	/* Acquire a file descriptor. */
 	if (dblp->c_fd == -1) {
 		if ((ret = __log_name(dblp, nlsn.file, &np)) != 0)
 			goto err1;
-		if ((ret = __db_fdopen(np, DB_RDONLY | DB_SEQUENTIAL,
+		if ((ret = __db_open(np, DB_RDONLY | DB_SEQUENTIAL,
 		    DB_RDONLY | DB_SEQUENTIAL, 0, &dblp->c_fd)) != 0) {
 			fail = np;
 			goto err1;
 		}
-		free(np);
+		__db_free(np);
 		np = NULL;
 	}
 
 	/* Seek to the header offset and read the header. */
-	if ((ret = __db_lseek(dblp->c_fd, 0, 0, nlsn.offset, SEEK_SET)) != 0) {
+	if ((ret = __db_seek(dblp->c_fd, 0, 0, nlsn.offset, SEEK_SET)) != 0) {
 		fail = "seek";
 		goto err1;
 	}
@@ -289,7 +284,7 @@ retry:
 	}
 
 	/* Allocate temporary memory to hold the record. */
-	if ((tbuf = (char *)malloc(len)) == NULL) {
+	if ((tbuf = (char *)__db_malloc(len)) == NULL) {
 		ret = ENOMEM;
 		goto err1;
 	}
@@ -318,7 +313,7 @@ retry:
 	if ((ret = __db_retcopy(dbt, tbuf, len,
 	    &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
 		goto err1;
-	free(tbuf);
+	__db_free(tbuf);
 	tbuf = NULL;
 
 cksum:	if (hdr.cksum != __ham_func4(dbt->data, dbt->size)) {
@@ -349,8 +344,8 @@ err1:	if (!silent)
 			__db_err(dblp->dbenv,
 			    "log_get: %s: %s", fail, strerror(ret));
 err2:	if (np != NULL)
-		free(np);
+		__db_free(np);
 	if (tbuf != NULL)
-		free(tbuf);
+		__db_free(tbuf);
 	return (ret);
 }
diff --git a/db2/log/log_put.c b/db2/log/log_put.c
index 225595f33e..92d9563301 100644
--- a/db2/log/log_put.c
+++ b/db2/log/log_put.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_put.c	10.14 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)log_put.c	10.20 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -29,9 +29,10 @@ static const char sccsid[] = "@(#)log_put.c	10.14 (Sleepycat) 9/23/97";
 #include "common_ext.h"
 
 static int __log_fill __P((DB_LOG *, void *, u_int32_t));
+static int __log_flush __P((DB_LOG *, const DB_LSN *));
 static int __log_newfd __P((DB_LOG *));
-static int __log_write __P((DB_LOG *, void *, u_int32_t));
 static int __log_putr __P((DB_LOG *, const DBT *, u_int32_t));
+static int __log_write __P((DB_LOG *, void *, u_int32_t));
 
 /*
  * log_put --
@@ -63,11 +64,8 @@ log_put(dblp, lsn, dbt, flags)
 	}
 
 	LOCK_LOGREGION(dblp);
-
 	ret = __log_put(dblp, lsn, dbt, flags);
-
 	UNLOCK_LOGREGION(dblp);
-
 	return (ret);
 }
 
@@ -102,14 +100,10 @@ __log_put(dblp, lsn, dbt, flags)
 			    "log_put: record larger than maximum file size");
 			return (EINVAL);
 		}
-		if (lp->b_off != 0) {
-			if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
-				return (ret);
-			if ((ret = __db_fsync(dblp->lfd)) != 0)
-				return (ret);
-			lp->s_lsn.file = lp->lsn.file;
-			lp->s_lsn.offset = lp->lsn.offset - 1;
-		}
+
+		/* Flush the log. */
+		if ((ret = __log_flush(dblp, NULL)) != 0)
+			return (ret);
 
 		/*
 		 * Save the last known offset from the previous file, we'll
@@ -117,9 +111,15 @@ __log_put(dblp, lsn, dbt, flags)
 		 */
 		lastoff = lp->lsn.offset;
 
+		/* Point the current LSN to the new file. */
 		++lp->lsn.file;
 		lp->lsn.offset = 0;
+
+		/* Reset the file write offset. */
 		lp->w_off = 0;
+
+		/* Reset the first-unwritten LSN for the buffer. */
+		lp->uw_lsn = lp->lsn;
 	} else
 		lastoff = 0;
 
@@ -149,56 +149,54 @@ __log_put(dblp, lsn, dbt, flags)
 	 *	Put out the checkpoint record (above).
 	 *	Save the LSN of the checkpoint in the shared region.
 	 *	Append the set of file name information into the log.
-	 *	Flush the current buffer contents to disk.
-	 *	Sync the log to disk.
-	 *	Save the time the checkpoint was written.
-	 *	Reset the bytes written since the last checkpoint.
 	 */
 	if (flags == DB_CHECKPOINT) {
 		lp->c_lsn = *lsn;
 
 		for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
 		    fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
-			t.data = ADDR(dblp, fnp->name_off);
+			t.data = R_ADDR(dblp, fnp->name_off);
 			t.size = strlen(t.data) + 1;
 			memset(&fid_dbt, 0, sizeof(fid_dbt));
-			fid_dbt.data = ADDR(dblp, fnp->fileid_off);
+			fid_dbt.data = R_ADDR(dblp, fnp->fileid_off);
 			fid_dbt.size = DB_FILE_ID_LEN;
 			if ((ret = __log_register_log(dblp, NULL, &r_unused,
 			    0, &t, &fid_dbt, fnp->id, fnp->s_type)) != 0)
 				return (ret);
 		}
-		if (lp->b_off != 0 &&
-		    (ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
-			return (ret);
-		(void)time(&lp->chkpt);
-		lp->written = 0;
-
-		if ((ret = __db_fsync(dblp->lfd)) != 0)
-			return (ret);
-		lp->s_lsn.file = lp->lsn.file;
-		lp->s_lsn.offset = lp->lsn.offset - 1;
 	}
 
-	/* We always flush on a checkpoint. */
-	if (flags == DB_FLUSH || flags == DB_CHECKPOINT) {
-		if (lp->b_off != 0 &&
-		    (ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
+	/*
+	 * On a checkpoint or when flush is requested, we:
+	 *	Flush the current buffer contents to disk.
+	 *	Sync the log to disk.
+	 */
+	if (flags == DB_FLUSH || flags == DB_CHECKPOINT)
+		if ((ret = __log_flush(dblp, NULL)) != 0)
 			return (ret);
 
-		if ((ret = __db_fsync(dblp->lfd)) != 0)
-			return (ret);
-		lp->s_lsn.file = lp->lsn.file;
-		lp->s_lsn.offset = lp->lsn.offset - 1;
+	/*
+	 * On a checkpoint, we:
+	 *	Save the time the checkpoint was written.
+	 *	Reset the bytes written since the last checkpoint.
+	 */
+	if (flags == DB_CHECKPOINT) {
+		(void)time(&lp->chkpt);
+		lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0;
 	}
 
 	/*
-	 * If we just did I/O, i.e., this LSN could have spanned the start of
-	 * the in-core buffer, we remember it so that we can flush correctly
-	 * during a sync.
+	 * When an application calls the log_flush routine, we need to figure
+	 * out if the current buffer needs to be flushed.  The problem is that
+	 * if a record spans buffers, it's possible for the record continued
+	 * in the current buffer to have begun in a previous buffer.  Each time
+	 * we write a buffer, we update the first-unwritten LSN to point to the
+	 * first LSN after that written buffer.  If we have a spanning record,
+	 * correct that value to be the LSN that started it all, here.
 	 */
 	if (lsn->offset < lp->w_off && lsn->offset + lp->len > lp->w_off)
-		lp->span_lsn = *lsn;
+		lp->uw_lsn = *lsn;
+
 	return (0);
 }
 
@@ -248,6 +246,24 @@ log_flush(dblp, lsn)
 	DB_LOG *dblp;
 	const DB_LSN *lsn;
 {
+	int ret;
+
+	LOCK_LOGREGION(dblp);
+	ret = __log_flush(dblp, lsn);
+	UNLOCK_LOGREGION(dblp);
+	return (ret);
+}
+
+/*
+ * __log_flush --
+ *	Write all records less than or equal to the specified LSN; internal
+ *	version.
+ */
+static int
+__log_flush(dblp, lsn)
+	DB_LOG *dblp;
+	const DB_LSN *lsn;
+{
 	DB_LSN t_lsn;
 	LOG *lp;
 	int ret;
@@ -255,60 +271,64 @@ log_flush(dblp, lsn)
 	ret = 0;
 	lp = dblp->lp;
 
-	LOCK_LOGREGION(dblp);
-
-	/* If no LSN specified, flush the entire log. */
+	/*
+	 * If no LSN specified, flush the entire log by setting the flush LSN
+	 * to the last LSN written in the log.  Otherwise, check that the LSN
+	 * isn't a non-existent record for the log.
+	 */
 	if (lsn == NULL) {
 		t_lsn.file = lp->lsn.file;
 		t_lsn.offset = lp->lsn.offset - lp->len;
 		lsn = &t_lsn;
-	}
-
-	/* If it's a non-existent record, it's an error. */
-	if (lsn->file > lp->lsn.file ||
-	    (lsn->file == lp->lsn.file && lsn->offset > lp->lsn.offset)) {
-		__db_err(dblp->dbenv, "log_flush: LSN past current end-of-log");
-		ret = EINVAL;
-		goto ret1;
-	}
-
-	/*
-	 * If it's from a previous file, we're done because we sync each
-	 * file when we move to a new one.
-	 */
-	if (lsn->file < lp->lsn.file)
-		goto ret1;
+	} else
+		if (lsn->file > lp->lsn.file ||
+		    (lsn->file == lp->lsn.file &&
+		    lsn->offset > lp->lsn.offset - lp->len)) {
+			__db_err(dblp->dbenv,
+			    "log_flush: LSN past current end-of-log");
+			return (EINVAL);
+		}
 
 	/*
-	 * If it's less than the last-sync'd offset, we've already sync'd
-	 * this LSN.
+	 * If the LSN is less than the last-sync'd LSN, we're done.  Note,
+	 * the last-sync LSN saved in s_lsn is the LSN of the first byte 
+	 * that has not yet been written to disk, so the test is <, not <=.
 	 */
-	if (lsn->offset <= lp->s_lsn.offset)
-		goto ret1;
+	if (lsn->file < lp->s_lsn.file ||
+	    (lsn->file == lp->s_lsn.file && lsn->offset < lp->s_lsn.offset))
+		return (0);
 
 	/*
 	 * We may need to write the current buffer.  We have to write the
-	 * current buffer if the sync LSN is greater than or equal to the
-	 * saved spanning-LSN.
+	 * current buffer if the flush LSN is greater than or equal to the
+	 * first-unwritten LSN (uw_lsn).  If we write the buffer, then we
+	 * update the first-unwritten LSN.
 	 */
-	if (lsn->file >= lp->span_lsn.file &&
-	    lsn->offset >= lp->span_lsn.offset)
+	if (lp->b_off != 0 &&
+	    lsn->file >= lp->uw_lsn.file && lsn->offset >= lp->uw_lsn.offset)
 		if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
-			goto ret1;
+			return (ret);
 
-	/* Acquire a file descriptor if we don't have one. */
-	if (dblp->lfname != dblp->lp->lsn.file &&
-	    (ret = __log_newfd(dblp)) != 0)
-		goto ret1;
+	/*
+	 * It's possible that this thread may never have written to this log
+	 * file.  Acquire a file descriptor if we don't already have one.
+	 */
+	if (dblp->lfname != dblp->lp->lsn.file)
+		if ((ret = __log_newfd(dblp)) != 0)
+			return (ret);
 
+	/* Sync all writes to disk. */
 	if ((ret = __db_fsync(dblp->lfd)) != 0)
-		goto ret1;
+		return (ret);
+	++lp->stat.st_scount;
 
-	lp->s_lsn.file = lp->lsn.file;
-	lp->s_lsn.offset = lsn->offset;
+	/*
+	 * Set the last-synced LSN, the first LSN after the last record
+	 * that we know is on disk.
+	 */
+	lp->s_lsn = lp->uw_lsn;
 
-ret1:	UNLOCK_LOGREGION(dblp);
-	return (ret);
+	return (0);
 }
 
 /*
@@ -385,17 +405,32 @@ __log_write(dblp, addr, len)
 	 * Seek to the offset in the file (someone may have written it
 	 * since we last did).
 	 */
-	if ((ret = __db_lseek(dblp->lfd, 0, 0, lp->w_off, SEEK_SET)) != 0)
+	if ((ret = __db_seek(dblp->lfd, 0, 0, lp->w_off, SEEK_SET)) != 0)
 		return (ret);
 	if ((ret = __db_write(dblp->lfd, addr, len, &nw)) != 0)
 		return (ret);
 	if (nw != (int32_t)len)
 		return (EIO);
 
-	/* Update the seek offset and reset the buffer offset. */
+	/*
+	 * Reset the buffer offset, update the seek offset, and update the
+	 * first-unwritten LSN.
+	 */
 	lp->b_off = 0;
 	lp->w_off += len;
-	lp->written += len;
+	lp->uw_lsn.file = lp->lsn.file;
+	lp->uw_lsn.offset = lp->w_off;
+
+	/* Update written statistics. */
+	if ((lp->stat.st_w_bytes += len) >= MEGABYTE) {
+		lp->stat.st_w_bytes -= MEGABYTE;
+		++lp->stat.st_w_mbytes;
+	}
+	if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) {
+		lp->stat.st_wc_bytes -= MEGABYTE;
+		++lp->stat.st_wc_mbytes;
+	}
+	++lp->stat.st_wcount;
 
 	return (0);
 }
@@ -415,11 +450,8 @@ log_file(dblp, lsn, namep, len)
 	char *p;
 
 	LOCK_LOGREGION(dblp);
-
 	ret = __log_name(dblp, lsn->file, &p);
-
 	UNLOCK_LOGREGION(dblp);
-
 	if (ret != 0)
 		return (ret);
 
@@ -429,7 +461,7 @@ log_file(dblp, lsn, namep, len)
 		return (ENOMEM);
 	}
 	(void)strcpy(namep, p);
-	free(p);
+	__db_free(p);
 
 	return (0);
 }
@@ -455,7 +487,7 @@ __log_newfd(dblp)
 	dblp->lfname = dblp->lp->lsn.file;
 	if ((ret = __log_name(dblp, dblp->lfname, &p)) != 0)
 		return (ret);
-	if ((ret = __db_fdopen(p,
+	if ((ret = __db_open(p,
 	    DB_CREATE | DB_SEQUENTIAL,
 	    DB_CREATE | DB_SEQUENTIAL,
 	    dblp->lp->persist.mode, &dblp->lfd)) != 0)
@@ -472,14 +504,14 @@ __log_newfd(dblp)
  * PUBLIC: int __log_name __P((DB_LOG *, int, char **));
  */
 int
-__log_name(dblp, fileno, namep)
+__log_name(dblp, filenumber, namep)
 	DB_LOG *dblp;
 	char **namep;
-	int fileno;
+	int filenumber;
 {
 	char name[sizeof(LFNAME) + 10];
 
-	(void)snprintf(name, sizeof(name), LFNAME, fileno);
+	(void)snprintf(name, sizeof(name), LFNAME, filenumber);
 	return (__db_appname(dblp->dbenv,
 	    DB_APP_LOG, dblp->dir, name, NULL, namep));
 }
diff --git a/db2/log/log_rec.c b/db2/log/log_rec.c
index f49a7f16ea..92b8203990 100644
--- a/db2/log/log_rec.c
+++ b/db2/log/log_rec.c
@@ -40,7 +40,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_rec.c	10.13 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)log_rec.c	10.14 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -99,7 +99,7 @@ __log_register_recover(logp, dbtp, lsnp, redo, info)
 
 out:	F_CLR(logp, DB_AM_RECOVER);
 	if (argp != NULL)
-		free(argp);
+		__db_free(argp);
 	return (ret);
 }
 
@@ -150,7 +150,7 @@ __log_unregister_recover(logp, dbtp, lsnp, redo, info)
 
 out:	F_CLR(logp, DB_AM_RECOVER);
 	if (argp != NULL)
-		free(argp);
+		__db_free(argp);
 	return (ret);
 }
 
@@ -227,14 +227,14 @@ __log_add_logid(logp, dbp, ndx)
 	 */
 	if (logp->dbentry_cnt <= ndx) {
 		if (logp->dbentry_cnt == 0) {
-			logp->dbentry =
-			    (DB_ENTRY *)malloc(DB_GROW_SIZE * sizeof(DB_ENTRY));
+			logp->dbentry = (DB_ENTRY *)
+			    __db_malloc(DB_GROW_SIZE * sizeof(DB_ENTRY));
 			if (logp->dbentry == NULL) {
 				ret = ENOMEM;
 				goto err;
 			}
 		} else {
-			temp_entryp = (DB_ENTRY *)realloc(logp->dbentry,
+			temp_entryp = (DB_ENTRY *)__db_realloc(logp->dbentry,
 			    (DB_GROW_SIZE + logp->dbentry_cnt) *
 			    sizeof(DB_ENTRY));
 			if (temp_entryp == NULL) {
diff --git a/db2/log/log_register.c b/db2/log/log_register.c
index 859b1e5bcb..2dab361616 100644
--- a/db2/log/log_register.c
+++ b/db2/log/log_register.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_register.c	10.11 (Sleepycat) 9/15/97";
+static const char sccsid[] = "@(#)log_register.c	10.12 (Sleepycat) 9/29/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -72,7 +72,7 @@ log_register(dblp, dbp, name, type, idp)
 		if (fid <= fnp->id)
 			fid = fnp->id + 1;
 		if (!memcmp(dbp->lock.fileid,
-		    ADDR(dblp, fnp->fileid_off), DB_FILE_ID_LEN)) {
+		    R_ADDR(dblp, fnp->fileid_off), DB_FILE_ID_LEN)) {
 			++fnp->ref;
 			fid = fnp->id;
 			if (!F_ISSET(dblp, DB_AM_RECOVER) &&
@@ -95,13 +95,13 @@ log_register(dblp, dbp, name, type, idp)
 	 * XXX Now that uids are fixed size, we can put them in the fnp
 	 * structure.
 	 */
-	fnp->fileid_off = OFFSET(dblp, fidp);
+	fnp->fileid_off = R_OFFSET(dblp, fidp);
 	memcpy(fidp, dbp->lock.fileid, DB_FILE_ID_LEN);
 
 	len = strlen(name) + 1;
 	if ((ret = __db_shalloc(dblp->addr, len, 0, &namep)) != 0)
 		goto err;
-	fnp->name_off = OFFSET(dblp, namep);
+	fnp->name_off = R_OFFSET(dblp, namep);
 	memcpy(namep, name, len);
 
 	SH_TAILQ_INSERT_HEAD(&dblp->lp->fq, fnp, q, __fname);
@@ -185,8 +185,8 @@ log_unregister(dblp, fid)
 	}
 
 	/* Free the unique file information, name and structure. */
-	__db_shalloc_free(dblp->addr, ADDR(dblp, fnp->fileid_off));
-	__db_shalloc_free(dblp->addr, ADDR(dblp, fnp->name_off));
+	__db_shalloc_free(dblp->addr, R_ADDR(dblp, fnp->fileid_off));
+	__db_shalloc_free(dblp->addr, R_ADDR(dblp, fnp->name_off));
 	SH_TAILQ_REMOVE(&dblp->lp->fq, fnp, q, __fname);
 	__db_shalloc_free(dblp->addr, fnp);
 
diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c
index fb6bc96ae7..a707603eec 100644
--- a/db2/mp/mp_bh.c
+++ b/db2/mp/mp_bh.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_bh.c	10.16 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)mp_bh.c	10.21 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -94,10 +94,10 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
 	 * files that we have previously tried (and failed) to open.
 	 */
 	dbt.size = mfp->pgcookie_len;
-	dbt.data = ADDR(dbmp, mfp->pgcookie_off);
-	if (__memp_fopen(dbmp, ADDR(dbmp, mfp->path_off),
+	dbt.data = R_ADDR(dbmp, mfp->pgcookie_off);
+	if (__memp_fopen(dbmp, R_ADDR(dbmp, mfp->path_off),
 	    mfp->ftype, 0, 0, mfp->stat.st_pagesize,
-	    mfp->lsn_off, &dbt, ADDR(dbmp, mfp->fileid_off), 0, &dbmfp) != 0)
+	    mfp->lsn_off, &dbt, R_ADDR(dbmp, mfp->fileid_off), 0, &dbmfp) != 0)
 		return (0);
 
 found:	return (__memp_pgwrite(dbmfp, bhp, restartp, wrotep));
@@ -137,7 +137,7 @@ __memp_pgread(dbmfp, bhp, can_create)
 	ret = 0;
 	LOCKHANDLE(dbmp, dbmfp->mutexp);
 	if (dbmfp->fd == -1 || (ret =
-	    __db_lseek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) {
+	    __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) {
 		if (!can_create) {
 			if (dbmfp->fd == -1)
 				ret = EINVAL;
@@ -230,6 +230,7 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 
 	dbmp = dbmfp->dbmp;
 	dbenv = dbmp->dbenv;
+	mp = dbmp->mp;
 	mfp = dbmfp->mfp;
 
 	if (restartp != NULL)
@@ -277,8 +278,7 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 	}
 
 	/* Write the page out. */
-	if ((ret =
-	    __db_lseek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0)
+	if ((ret = __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0)
 		fail = "seek";
 	else if ((ret = __db_write(dbmfp->fd, bhp->buf, pagesize, &nw)) != 0)
 		fail = "write";
@@ -309,15 +309,23 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 	/* Clean up the flags based on a successful write. */
 	F_SET(bhp, BH_CALLPGIN);
 	F_CLR(bhp, BH_DIRTY | BH_LOCKED);
+
+	++mp->stat.st_page_clean;
+	--mp->stat.st_page_dirty;
+
 	UNLOCKBUFFER(dbmp, bhp);
 
 	/*
-	 * If we wrote a buffer which a checkpoint is waiting for, update
+	 * If we write a buffer for which a checkpoint is waiting, update
 	 * the count of pending buffers (both in the mpool as a whole and
 	 * for this file).  If the count for this file goes to zero, flush
 	 * the writes.
 	 *
 	 * XXX:
+	 * Don't lock the region around the sync, fsync(2) has no atomicity
+	 * issues.
+	 *
+	 * XXX:
 	 * We ignore errors from the sync -- it makes no sense to return an
 	 * error to the calling process, so set a flag causing the sync to
 	 * be retried later.
@@ -325,21 +333,15 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 	 * If the buffer we wrote has a LSN larger than the current largest
 	 * we've written for this checkpoint, update the saved value.
 	 */
-	mp = dbmp->mp;
 	if (F_ISSET(bhp, BH_WRITE)) {
 		if (log_compare(&lsn, &mp->lsn) > 0)
 			mp->lsn = lsn;
 		F_CLR(bhp, BH_WRITE);
 
 		--mp->lsn_cnt;
-		if (--mfp->lsn_cnt == 0) {
-			/*
-			 * Don't lock -- there are no atomicity issues for
-			 * fsync(2).
-			 */
-			if (__db_fsync(dbmfp->fd) != 0)
-				F_SET(mp, MP_LSN_RETRY);
-		}
+
+		if (--mfp->lsn_cnt == 0 && __db_fsync(dbmfp->fd) != 0)
+			F_SET(mp, MP_LSN_RETRY);
 	}
 
 	/* Update I/O statistics. */
@@ -391,7 +393,7 @@ __memp_pg(dbmfp, bhp, is_pgin)
 			dbtp = NULL;
 		else {
 			dbt.size = mfp->pgcookie_len;
-			dbt.data = ADDR(dbmp, mfp->pgcookie_off);
+			dbt.data = R_ADDR(dbmp, mfp->pgcookie_off);
 			dbtp = &dbt;
 		}
 		UNLOCKHANDLE(dbmp, dbmp->mutexp);
@@ -433,19 +435,21 @@ __memp_bhfree(dbmp, mfp, bhp, free_mem)
 {
 	size_t off;
 
-	/* Delete the buffer header from the MPOOL hash list. */
-	off = BUCKET(dbmp->mp, OFFSET(dbmp, mfp), bhp->pgno);
-	SH_TAILQ_REMOVE(&dbmp->htab[off], bhp, mq, __bh);
+	/* Delete the buffer header from the hash bucket queue. */
+	off = BUCKET(dbmp->mp, R_OFFSET(dbmp, mfp), bhp->pgno);
+	SH_TAILQ_REMOVE(&dbmp->htab[off], bhp, hq, __bh);
 
-	/* Delete the buffer header from the LRU chain. */
+	/* Delete the buffer header from the LRU queue. */
 	SH_TAILQ_REMOVE(&dbmp->mp->bhq, bhp, q, __bh);
 
 	/*
 	 * If we're not reusing it immediately, free the buffer header
 	 * and data for real.
 	 */
-	if (free_mem)
+	if (free_mem) {
 		__db_shalloc_free(dbmp->addr, bhp);
+		--dbmp->mp->stat.st_page_clean;
+	}
 }
 
 /*
@@ -474,13 +478,13 @@ __memp_upgrade(dbmp, dbmfp, mfp)
 		return (1);
 
 	/* Try the open. */
-	if (__db_fdopen(ADDR(dbmp, mfp->path_off), 0, 0, 0, &fd) != 0) {
+	if (__db_open(R_ADDR(dbmp, mfp->path_off), 0, 0, 0, &fd) != 0) {
 		F_SET(dbmfp, MP_UPGRADE_FAIL);
 		return (1);
 	}
 
 	/* Swap the descriptors and set the upgrade flag. */
-	(void)close(dbmfp->fd);
+	(void)__db_close(dbmfp->fd);
 	dbmfp->fd = fd;
 	F_SET(dbmfp, MP_UPGRADE);
 
diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c
index a0364e92c3..3f99e60505 100644
--- a/db2/mp/mp_fget.c
+++ b/db2/mp/mp_fget.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fget.c	10.25 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)mp_fget.c	10.30 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -87,14 +87,14 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 	 * We want to switch threads as often as possible.  Sleep every time
 	 * we get a new page to make it more likely.
 	 */
-	if (__sleep_on_every_page_get && (dbmp->dbenv == NULL ||
-	    dbmp->dbenv->db_yield == NULL || dbmp->dbenv->db_yield() != 0))
+	if (__sleep_on_every_page_get &&
+	    (__db_yield == NULL || __db_yield() != 0))
 		__db_sleep(0, 1);
 #endif
 
 	mp = dbmp->mp;
 	mfp = dbmfp->mfp;
-	mf_offset = OFFSET(dbmp, mfp);
+	mf_offset = R_OFFSET(dbmp, mfp);
 	addr = NULL;
 	bhp = NULL;
 	b_incr = b_inserted = readonly_alloc = ret = 0;
@@ -137,7 +137,7 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 			}
 		}
 		if (!readonly_alloc) {
-			addr = ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
+			addr = R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
 
 			++mp->stat.st_map;
 			++mfp->stat.st_map;
@@ -159,9 +159,12 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 		 */
 		if (dbmfp->fd == -1)
 			size = 0;
-		else if ((ret = __db_stat(dbmp->dbenv,
-		    dbmfp->path, dbmfp->fd, &size, NULL)) != 0)
+		else if ((ret =
+		    __db_ioinfo(dbmfp->path, dbmfp->fd, &size, NULL)) != 0) {
+			__db_err(dbmp->dbenv,
+			    "%s: %s", dbmfp->path, strerror(ret));
 			goto err;
+		}
 
 		*pgnoaddr = size == 0 ? 0 : (size - 1) / mfp->stat.st_pagesize;
 
@@ -190,26 +193,29 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 		goto found;
 	}
 
-	/* If we haven't checked the BH list yet, do the search. */
+	/* If we haven't checked the BH hash bucket queue, do the search. */
 	if (!LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) {
-		++mp->stat.st_hash_searches;
 		bucket = BUCKET(mp, mf_offset, *pgnoaddr);
 		for (cnt = 0,
 		    bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
-		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, mq, __bh)) {
+		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
 			++cnt;
 			if (bhp->pgno == *pgnoaddr &&
 			    bhp->mf_offset == mf_offset) {
 				addr = bhp->buf;
+				++mp->stat.st_hash_searches;
 				if (cnt > mp->stat.st_hash_longest)
 					mp->stat.st_hash_longest = cnt;
 				mp->stat.st_hash_examined += cnt;
 				goto found;
 			}
 		}
-		if (cnt > mp->stat.st_hash_longest)
-			mp->stat.st_hash_longest = cnt;
-		mp->stat.st_hash_examined += cnt;
+		if (cnt != 0) {
+			++mp->stat.st_hash_searches;
+			if (cnt > mp->stat.st_hash_longest)
+				mp->stat.st_hash_longest = cnt;
+			mp->stat.st_hash_examined += cnt;
+		}
 	}
 
 	/*
@@ -239,8 +245,9 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 	 * our region lock without screwing up the world.
 	 */
 	bucket = BUCKET(mp, mf_offset, *pgnoaddr);
-	SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, mq, __bh);
+	SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, hq, __bh);
 	SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
+	++mp->stat.st_page_clean;
 	b_inserted = 1;
 
 	/* Set the page number, and associated MPOOLFILE. */
@@ -281,7 +288,8 @@ reread:		if ((ret = __memp_pgread(dbmfp,
 		 * !!!
 		 * The __memp_pgread call discarded and reacquired the region
 		 * lock.  Because the buffer reference count was incremented
-		 * before the region lock was discarded the buffer didn't move.
+		 * before the region lock was discarded the buffer can't move
+		 * and its contents can't change.
 		 */
 		++mp->stat.st_cache_miss;
 		++mfp->stat.st_cache_miss;
@@ -305,7 +313,8 @@ found:		/* Increment the reference count. */
 		 * BH_LOCKED --
 		 * I/O in progress, wait for it to finish.  Because the buffer
 		 * reference count was incremented before the region lock was
-		 * discarded we know the buffer didn't move.
+		 * discarded we know the buffer can't move and its contents
+		 * can't change.
 		 */
 		if (F_ISSET(bhp, BH_LOCKED)) {
 			UNLOCKREGION(dbmp);
diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c
index 5ab807701c..de59c9ea9b 100644
--- a/db2/mp/mp_fopen.c
+++ b/db2/mp/mp_fopen.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fopen.c	10.27 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)mp_fopen.c	10.30 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -92,7 +92,7 @@ __memp_fopen(dbmp, path,
 
 	/* Allocate and initialize the per-process structure. */
 	if ((dbmfp =
-	    (DB_MPOOLFILE *)calloc(1, sizeof(DB_MPOOLFILE))) == NULL) {
+	    (DB_MPOOLFILE *)__db_calloc(1, sizeof(DB_MPOOLFILE))) == NULL) {
 		__db_err(dbenv, "%s: %s",
 		    path == NULL ? TEMPORARY : path, strerror(ENOMEM));
 		return (ENOMEM);
@@ -120,7 +120,7 @@ __memp_fopen(dbmp, path,
 
 
 		/* Open the file. */
-		if ((ret = __db_fdopen(dbmfp->path,
+		if ((ret = __db_open(dbmfp->path,
 		    LF_ISSET(DB_CREATE | DB_RDONLY), DB_CREATE | DB_RDONLY,
 		    mode, &dbmfp->fd)) != 0) {
 			__db_err(dbenv, "%s: %s", dbmfp->path, strerror(ret));
@@ -128,9 +128,11 @@ __memp_fopen(dbmp, path,
 		}
 
 		/* Don't permit files that aren't a multiple of the pagesize. */
-		if ((ret = __db_stat(dbenv,
-		     dbmfp->path, dbmfp->fd, &size, NULL)) != 0)
+		if ((ret =
+		    __db_ioinfo(dbmfp->path, dbmfp->fd, &size, NULL)) != 0) {
+			__db_err(dbenv, "%s: %s", dbmfp->path, strerror(ret));
 			goto err;
+		}
 		if (size % pagesize) {
 			__db_err(dbenv,
 			    "%s: file size not a multiple of the pagesize",
@@ -198,7 +200,7 @@ __memp_fopen(dbmp, path,
 	dbmfp->addr = NULL;
 	if (mfp->can_mmap) {
 		dbmfp->len = size;
-		if (__db_mmap(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) {
+		if (__db_map(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) {
 			mfp->can_mmap = 0;
 			dbmfp->addr = NULL;
 		}
@@ -264,7 +266,7 @@ __memp_mf_open(dbmp, dbmfp,
 	for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
 	    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
 		if (!memcmp(fileid,
-		    ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
+		    R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
 			if (ftype != mfp->ftype ||
 			    pagesize != mfp->stat.st_pagesize) {
 				__db_err(dbmp->dbenv,
@@ -325,10 +327,10 @@ alloc:	if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
 	if (0) {
 err:		if (mfp->path_off != 0)
 			__db_shalloc_free(dbmp->addr,
-			    ADDR(dbmp, mfp->path_off));
+			    R_ADDR(dbmp, mfp->path_off));
 		if (!istemp)
 			__db_shalloc_free(dbmp->addr,
-			    ADDR(dbmp, mfp->fileid_off));
+			    R_ADDR(dbmp, mfp->fileid_off));
 		if (mfp != NULL)
 			__db_shalloc_free(dbmp->addr, mfp);
 		mfp = NULL;
@@ -367,7 +369,7 @@ memp_fclose(dbmfp)
 
 	/* Discard any mmap information. */
 	if (dbmfp->addr != NULL &&
-	    (ret = __db_munmap(dbmfp->addr, dbmfp->len)) != 0)
+	    (ret = __db_unmap(dbmfp->addr, dbmfp->len)) != 0)
 		__db_err(dbmp->dbenv, "%s: %s", dbmfp->path, strerror(ret));
 
 	/* Close the file; temporary files may not yet have been created. */
@@ -423,7 +425,7 @@ __memp_mf_close(dbmp, dbmfp)
 	 * fairly expensive to reintegrate the buffers back into the region for
 	 * no purpose.
 	 */
-	mf_offset = OFFSET(dbmp, mfp);
+	mf_offset = R_OFFSET(dbmp, mfp);
 	for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
 		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
 
@@ -436,6 +438,10 @@ __memp_mf_close(dbmp, dbmfp)
 #endif
 
 		if (bhp->mf_offset == mf_offset) {
+			if (F_ISSET(bhp, BH_DIRTY)) {
+				++mp->stat.st_page_clean;
+				--mp->stat.st_page_dirty;
+			}
 			__memp_bhfree(dbmp, mfp, bhp, 0);
 			SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh);
 		}
@@ -446,11 +452,11 @@ __memp_mf_close(dbmp, dbmfp)
 
 	/* Free the space. */
 	__db_shalloc_free(dbmp->addr, mfp);
-	__db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->path_off));
+	__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
 	if (mfp->fileid_off != 0)
-		__db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->fileid_off));
+		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));
 	if (mfp->pgcookie_off != 0)
-		__db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->pgcookie_off));
+		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off));
 
 ret1:	UNLOCKREGION(dbmp);
 	return (0);
diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c
index 9ea7cd9d0d..892f179d3a 100644
--- a/db2/mp/mp_fput.c
+++ b/db2/mp/mp_fput.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fput.c	10.12 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)mp_fput.c	10.14 (Sleepycat) 10/5/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -35,10 +35,12 @@ memp_fput(dbmfp, pgaddr, flags)
 {
 	BH *bhp;
 	DB_MPOOL *dbmp;
+	MPOOL *mp;
 	MPOOLFILE *mfp;
 	int wrote, ret;
 
 	dbmp = dbmfp->dbmp;
+	mp = dbmp->mp;
 
 	/* Validate arguments. */
 	if (flags) {
@@ -82,10 +84,16 @@ memp_fput(dbmfp, pgaddr, flags)
 	LOCKREGION(dbmp);
 
 	/* Set/clear the page bits. */
-	if (LF_ISSET(DB_MPOOL_CLEAN))
+	if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) {
+		++mp->stat.st_page_clean;
+		--mp->stat.st_page_dirty;
 		F_CLR(bhp, BH_DIRTY);
-	if (LF_ISSET(DB_MPOOL_DIRTY))
+	}
+	if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) {
+		--mp->stat.st_page_clean;
+		++mp->stat.st_page_dirty;
 		F_SET(bhp, BH_DIRTY);
+	}
 	if (LF_ISSET(DB_MPOOL_DISCARD))
 		F_SET(bhp, BH_DISCARD);
 
@@ -108,11 +116,11 @@ memp_fput(dbmfp, pgaddr, flags)
 	}
 
 	/* Move the buffer to the head/tail of the LRU chain. */
-	SH_TAILQ_REMOVE(&dbmp->mp->bhq, bhp, q, __bh);
+	SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh);
 	if (F_ISSET(bhp, BH_DISCARD))
-		SH_TAILQ_INSERT_HEAD(&dbmp->mp->bhq, bhp, q, __bh);
+		SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh);
 	else
-		SH_TAILQ_INSERT_TAIL(&dbmp->mp->bhq, bhp, q);
+		SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
 
 	/*
 	 * If this buffer is scheduled for writing because of a checkpoint,
@@ -125,14 +133,14 @@ memp_fput(dbmfp, pgaddr, flags)
 		if (F_ISSET(bhp, BH_DIRTY)) {
 			if (__memp_bhwrite(dbmp,
 			    dbmfp->mfp, bhp, NULL, &wrote) != 0 || !wrote)
-				F_SET(dbmp->mp, MP_LSN_RETRY);
+				F_SET(mp, MP_LSN_RETRY);
 		} else {
 			F_CLR(bhp, BH_WRITE);
 
-			mfp = ADDR(dbmp, bhp->mf_offset);
+			mfp = R_ADDR(dbmp, bhp->mf_offset);
 			--mfp->lsn_cnt;
 
-			--dbmp->mp->lsn_cnt;
+			--mp->lsn_cnt;
 		}
 
 	UNLOCKREGION(dbmp);
diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c
index a3a3dcef9c..a7d2706008 100644
--- a/db2/mp/mp_fset.c
+++ b/db2/mp/mp_fset.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fset.c	10.9 (Sleepycat) 9/20/97";
+static const char sccsid[] = "@(#)mp_fset.c	10.10 (Sleepycat) 10/5/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -34,9 +34,13 @@ memp_fset(dbmfp, pgaddr, flags)
 {
 	BH *bhp;
 	DB_MPOOL *dbmp;
+	MPOOL *mp;
+	MPOOLFILE *mfp;
 	int ret;
 
 	dbmp = dbmfp->dbmp;
+	mfp = dbmfp->mfp;
+	mp = dbmp->mp;
 
 	/* Validate arguments. */
 	if (flags != 0) {
@@ -60,10 +64,16 @@ memp_fset(dbmfp, pgaddr, flags)
 
 	LOCKREGION(dbmp);
 
-	if (LF_ISSET(DB_MPOOL_DIRTY))
-		F_SET(bhp, BH_DIRTY);
-	if (LF_ISSET(DB_MPOOL_CLEAN))
+	if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) {
+		++mp->stat.st_page_clean;
+		--mp->stat.st_page_dirty;
 		F_CLR(bhp, BH_DIRTY);
+	}
+	if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) {
+		--mp->stat.st_page_clean;
+		++mp->stat.st_page_dirty;
+		F_SET(bhp, BH_DIRTY);
+	}
 	if (LF_ISSET(DB_MPOOL_DISCARD))
 		F_SET(bhp, BH_DISCARD);
 
diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c
index f622b1ed26..4c19739ebd 100644
--- a/db2/mp/mp_open.c
+++ b/db2/mp/mp_open.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_open.c	10.13 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)mp_open.c	10.15 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -54,7 +54,7 @@ memp_open(path, flags, mode, dbenv, retp)
 	cachesize = dbenv == NULL ? 0 : dbenv->mp_size;
 
 	/* Create and initialize the DB_MPOOL structure. */
-	if ((dbmp = (DB_MPOOL *)calloc(1, sizeof(DB_MPOOL))) == NULL)
+	if ((dbmp = (DB_MPOOL *)__db_calloc(1, sizeof(DB_MPOOL))) == NULL)
 		return (ENOMEM);
 	LIST_INIT(&dbmp->dbregq);
 	TAILQ_INIT(&dbmp->dbmfq);
@@ -62,8 +62,7 @@ memp_open(path, flags, mode, dbenv, retp)
 	dbmp->dbenv = dbenv;
 
 	/* Decide if it's possible for anyone else to access the pool. */
-	if ((dbenv == NULL && path == NULL) ||
-	    (dbenv != NULL && F_ISSET(dbenv, DB_MPOOL_PRIVATE)))
+	if ((dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE))
 		F_SET(dbmp, MP_ISPRIVATE);
 
 	/*
@@ -183,7 +182,7 @@ memp_register(dbmp, ftype, pgin, pgout)
 {
 	DB_MPREG *mpr;
 
-	if ((mpr = (DB_MPREG *)malloc(sizeof(DB_MPREG))) == NULL)
+	if ((mpr = (DB_MPREG *)__db_malloc(sizeof(DB_MPREG))) == NULL)
 		return (ENOMEM);
 
 	mpr->ftype = ftype;
diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c
index 7794cfa7f3..01f0920df4 100644
--- a/db2/mp/mp_pr.c
+++ b/db2/mp/mp_pr.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_pr.c	10.13 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)mp_pr.c	10.18 (Sleepycat) 11/1/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -53,7 +53,7 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
 		*gspp = NULL;
 
 		if ((*gspp = db_malloc == NULL ?
-		    (DB_MPOOL_STAT *)malloc(sizeof(**gspp)) :
+		    (DB_MPOOL_STAT *)__db_malloc(sizeof(**gspp)) :
 		    (DB_MPOOL_STAT *)db_malloc(sizeof(**gspp))) == NULL)
 			return (ENOMEM);
 
@@ -62,6 +62,10 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
 		/* Copy out the global statistics. */
 		**gspp = dbmp->mp->stat;
 		(*gspp)->st_hash_buckets = dbmp->mp->htab_buckets;
+		(*gspp)->st_region_wait =
+		    dbmp->mp->rlayout.lock.mutex_set_wait;
+		(*gspp)->st_region_nowait =
+		    dbmp->mp->rlayout.lock.mutex_set_nowait;
 
 		UNLOCKREGION(dbmp);
 	}
@@ -85,7 +89,7 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
 		/* Allocate space for the pointers. */
 		len = (len + 1) * sizeof(DB_MPOOL_FSTAT *);
 		if ((*fspp = db_malloc == NULL ?
-		    (DB_MPOOL_FSTAT **)malloc(len) :
+		    (DB_MPOOL_FSTAT **)__db_malloc(len) :
 		    (DB_MPOOL_FSTAT **)db_malloc(len)) == NULL)
 			return (ENOMEM);
 
@@ -96,11 +100,11 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
 		    mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
 		    mfp != NULL;
 		    ++tfsp, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
-			name = ADDR(dbmp, mfp->path_off);
+			name = R_ADDR(dbmp, mfp->path_off);
 			nlen = strlen(name);
 			len = sizeof(DB_MPOOL_FSTAT) + nlen + 1;
 			if ((*tfsp = db_malloc == NULL ?
-			    (DB_MPOOL_FSTAT *)malloc(len) :
+			    (DB_MPOOL_FSTAT *)__db_malloc(len) :
 			    (DB_MPOOL_FSTAT *)db_malloc(len)) == NULL)
 				return (ENOMEM);
 			**tfsp = mfp->stat;
@@ -200,18 +204,19 @@ __memp_pmp(fp, dbmp, mp, data)
 	(void)fprintf(fp, "references: %lu; cachesize: %lu\n",
 	    (u_long)mp->rlayout.refcnt, (u_long)mp->stat.st_cachesize);
 	(void)fprintf(fp,
-	    "    %lu pages created\n", mp->stat.st_page_create);
+	    "    %lu pages created\n", (u_long)mp->stat.st_page_create);
 	(void)fprintf(fp,
-	    "    %lu mmap pages returned\n", mp->stat.st_map);
+	    "    %lu mmap pages returned\n", (u_long)mp->stat.st_map);
 	(void)fprintf(fp, "    %lu I/O's (%lu read, %lu written)\n",
-	    mp->stat.st_page_in + mp->stat.st_page_out,
-	    mp->stat.st_page_in, mp->stat.st_page_out);
+	    (u_long)mp->stat.st_page_in + mp->stat.st_page_out,
+	    (u_long)mp->stat.st_page_in, (u_long)mp->stat.st_page_out);
 	if (mp->stat.st_cache_hit + mp->stat.st_cache_miss != 0)
 		(void)fprintf(fp,
 		    "    %.0f%% cache hit rate (%lu hit, %lu miss)\n",
 		    ((double)mp->stat.st_cache_hit /
 	    (mp->stat.st_cache_hit + mp->stat.st_cache_miss)) * 100,
-		    mp->stat.st_cache_hit, mp->stat.st_cache_miss);
+		    (u_long)mp->stat.st_cache_hit,
+		    (u_long)mp->stat.st_cache_miss);
 
 	/* Display the MPOOLFILE structures. */
 	for (cnt = 0, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
@@ -230,17 +235,18 @@ __memp_pmp(fp, dbmp, mp, data)
 	(void)fprintf(fp, "%s\nHASH table of BH's (%lu buckets):\n",
 	    DB_LINE, (u_long)mp->htab_buckets);
 	(void)fprintf(fp,
-	    "longest chain searched %lu\n", mp->stat.st_hash_longest);
+	    "longest chain searched %lu\n", (u_long)mp->stat.st_hash_longest);
 	(void)fprintf(fp, "average chain searched %lu (total/calls: %lu/%lu)\n",
-	    mp->stat.st_hash_examined /
+	    (u_long)mp->stat.st_hash_examined /
 	    (mp->stat.st_hash_searches ? mp->stat.st_hash_searches : 1),
-	    mp->stat.st_hash_examined, mp->stat.st_hash_searches);
+	    (u_long)mp->stat.st_hash_examined,
+	    (u_long)mp->stat.st_hash_searches);
 	for (htabp = dbmp->htab,
 	    bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) {
 		if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL)
 			(void)fprintf(fp, "%lu:\n", (u_long)bucket);
 		for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
-		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, mq, __bh))
+		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
 			__memp_pbh(fp, dbmp, bhp, data);
 	}
 
@@ -249,7 +255,7 @@ __memp_pmp(fp, dbmp, mp, data)
 	for (sep = "\n    ", bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh);
 	    bhp != NULL; sep = ", ", bhp = SH_TAILQ_NEXT(bhp, q, __bh))
 		(void)fprintf(fp, "%s%lu/%lu", sep,
-		    (u_long)bhp->pgno, (u_long)OFFSET(dbmp, bhp));
+		    (u_long)bhp->pgno, (u_long)R_OFFSET(dbmp, bhp));
 	(void)fprintf(fp, "\n");
 }
 
@@ -263,16 +269,18 @@ __memp_pmf(fp, mfp, data)
 	MPOOLFILE *mfp;
 	int data;
 {
-	(void)fprintf(fp, "    %lu pages created\n", mfp->stat.st_page_create);
+	(void)fprintf(fp, "    %lu pages created\n",
+	    (u_long)mfp->stat.st_page_create);
 	(void)fprintf(fp, "    %lu I/O's (%lu read, %lu written)\n",
-	    mfp->stat.st_page_in + mfp->stat.st_page_out,
-	    mfp->stat.st_page_in, mfp->stat.st_page_out);
+	    (u_long)mfp->stat.st_page_in + mfp->stat.st_page_out,
+	    (u_long)mfp->stat.st_page_in, (u_long)mfp->stat.st_page_out);
 	if (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss != 0)
 		(void)fprintf(fp,
 		    "    %.0f%% cache hit rate (%lu hit, %lu miss)\n",
 		    ((double)mfp->stat.st_cache_hit /
 		    (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss)) * 100,
-		    mfp->stat.st_cache_hit, mfp->stat.st_cache_miss);
+		    (u_long)mfp->stat.st_cache_hit,
+		    (u_long)mfp->stat.st_cache_miss);
 	if (!data)
 		return;
 
@@ -298,7 +306,7 @@ __memp_pbh(fp, dbmp, bhp, data)
 		return;
 
 	(void)fprintf(fp, "    BH @ %lu (mf: %lu): page %lu; ref %lu",
-	    (u_long)OFFSET(dbmp, bhp),
+	    (u_long)R_OFFSET(dbmp, bhp),
 	    (u_long)bhp->mf_offset, (u_long)bhp->pgno, (u_long)bhp->ref);
 	sep = "; ";
 	if (F_ISSET(bhp, BH_DIRTY)) {
diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c
index a5c52123b9..6b2f93125c 100644
--- a/db2/mp/mp_region.c
+++ b/db2/mp/mp_region.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_region.c	10.11 (Sleepycat) 8/2/97";
+static const char sccsid[] = "@(#)mp_region.c	10.16 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -51,7 +51,7 @@ __memp_ralloc(dbmp, len, offsetp, retp)
 	nomore = 0;
 alloc:	if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
 		if (offsetp != NULL)
-			*offsetp = OFFSET(dbmp, p);
+			*offsetp = R_OFFSET(dbmp, p);
 		*(void **)retp = p;
 		return (0);
 	}
@@ -68,7 +68,7 @@ alloc:	if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
 		if (__db_shsizeof(bhp) == len) {
 			SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
 			if (offsetp != NULL)
-				*offsetp = OFFSET(dbmp, bhp);
+				*offsetp = R_OFFSET(dbmp, bhp);
 			*(void **)retp = bhp;
 			return (0);
 		}
@@ -82,6 +82,7 @@ alloc:	if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
 
 		SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
 		__db_shalloc_free(dbmp->addr, bhp);
+		--mp->stat.st_page_clean;
 
 		/*
 		 * Retry as soon as we've freed up sufficient space.  If we
@@ -104,7 +105,7 @@ retry:	/* Find a buffer we can flush; pure LRU. */
 			continue;
 
 		/* Find the associated MPOOLFILE. */
-		mfp = ADDR(dbmp, bhp->mf_offset);
+		mfp = R_ADDR(dbmp, bhp->mf_offset);
 
 		/*
 		 * Write the page if it's dirty.
@@ -135,8 +136,7 @@ retry:	/* Find a buffer we can flush; pure LRU. */
 			else {
 				if (restart)
 					goto retry;
-				else
-					continue;
+				continue;
 			}
 		} else
 			++mp->stat.st_ro_evict;
@@ -150,7 +150,7 @@ retry:	/* Find a buffer we can flush; pure LRU. */
 			__memp_bhfree(dbmp, mfp, bhp, 0);
 
 			if (offsetp != NULL)
-				*offsetp = OFFSET(dbmp, bhp);
+				*offsetp = R_OFFSET(dbmp, bhp);
 			*(void **)retp = bhp;
 			return (0);
 		}
@@ -225,9 +225,13 @@ retry:	if (LF_ISSET(DB_CREATE)) {
 		 * be possible for DB_THREAD to be set if HAVE_SPINLOCKS aren't
 		 * defined.
 		 */
-		if (F_ISSET(dbmp, MP_ISPRIVATE))
-			ret = (dbmp->maddr = malloc(rlen)) == NULL ? ENOMEM : 0;
-		else
+		if (F_ISSET(dbmp, MP_ISPRIVATE)) {
+			if ((dbmp->maddr = __db_malloc(rlen)) == NULL)
+				ret = ENOMEM;
+			else
+				ret = __db_rinit(dbmp->dbenv,
+				    dbmp->maddr, 0, rlen, 0);
+		} else
 			ret = __db_rcreate(dbmp->dbenv, DB_APP_NONE, path,
 			    DB_DEFAULT_MPOOL_FILE, mode, rlen, &fd,
 			    &dbmp->maddr);
@@ -259,7 +263,10 @@ retry:	if (LF_ISSET(DB_CREATE)) {
 			    0, &dbmp->htab)) != 0)
 				goto err;
 			__db_hashinit(dbmp->htab, mp->htab_buckets);
-			mp->htab = OFFSET(dbmp, dbmp->htab);
+			mp->htab = R_OFFSET(dbmp, dbmp->htab);
+
+			ZERO_LSN(mp->lsn);
+			mp->lsn_cnt = 0;
 
 			memset(&mp->stat, 0, sizeof(mp->stat));
 			mp->stat.st_cachesize = cachesize;
@@ -303,7 +310,7 @@ retry:	if (LF_ISSET(DB_CREATE)) {
 	 * Get the hash table address; it's on the shared page, so we have
 	 * to lock first.
 	 */
-	dbmp->htab = ADDR(dbmp, dbmp->mp->htab);
+	dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
 
 	dbmp->fd = fd;
 
@@ -333,7 +340,7 @@ __memp_rclose(dbmp)
 	DB_MPOOL *dbmp;
 {
 	if (F_ISSET(dbmp, MP_ISPRIVATE)) {
-		free(dbmp->maddr);
+		__db_free(dbmp->maddr);
 		return (0);
 	}
 	return (__db_rclose(dbmp->dbenv, dbmp->fd, dbmp->maddr));
diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c
index 65b2a18267..2f042df9e1 100644
--- a/db2/mp/mp_sync.c
+++ b/db2/mp/mp_sync.c
@@ -7,13 +7,14 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_sync.c	10.9 (Sleepycat) 8/29/97";
+static const char sccsid[] = "@(#)mp_sync.c	10.15 (Sleepycat) 11/1/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
+#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -23,6 +24,8 @@ static const char sccsid[] = "@(#)mp_sync.c	10.9 (Sleepycat) 8/29/97";
 #include "mp.h"
 #include "common_ext.h"
 
+static int __bhcmp __P((const void *, const void *));
+
 /*
  * memp_sync --
  *	Mpool sync function.
@@ -32,27 +35,40 @@ memp_sync(dbmp, lsnp)
 	DB_MPOOL *dbmp;
 	DB_LSN *lsnp;
 {
-	BH *bhp;
+	BH *bhp, **bharray;
 	DB_ENV *dbenv;
 	MPOOL *mp;
 	MPOOLFILE *mfp;
-	int can_write, wrote, lsn_cnt, restart, ret;
+	int ar_cnt, cnt, nalloc, next, notused, ret, wrote;
 
 	dbenv = dbmp->dbenv;
 
-	if (dbmp->dbenv->lg_info == NULL) {
-		__db_err(dbenv, "memp_sync requires logging");
+	if (dbenv->lg_info == NULL) {
+		__db_err(dbenv, "memp_sync: requires logging");
 		return (EINVAL);
 	}
 
+	/*
+	 * We try and write the buffers in page order so that the underlying
+	 * filesystem doesn't have to seek and can write contiguous blocks,
+	 * plus, we don't want to hold the region lock while we write the
+	 * buffers.  Get memory to hold the buffer pointers.  Get a good-size
+	 * block, too, because we realloc while holding the region lock if we
+	 * run out.
+	 */
+	if ((bharray =
+	    (BH **)__db_malloc((nalloc = 1024) * sizeof(BH *))) == NULL)
+		return (ENOMEM);
+
 	LOCKREGION(dbmp);
 
 	/*
-	 * If the application is asking about a previous call, and we haven't
-	 * found any buffers that the application holding the pin couldn't
-	 * write, return yes or no based on the current count.  Note, if the
-	 * application is asking about a LSN *smaller* than one we've already
-	 * handled, then we return based on the count for that LSN.
+	 * If the application is asking about a previous call to memp_sync(),
+	 * and we haven't found any buffers that the application holding the
+	 * pin couldn't write, return yes or no based on the current count.
+	 * Note, if the application is asking about a LSN *smaller* than one
+	 * we've already handled or are currently handling, then we return a
+	 * result based on the count for the larger LSN.
 	 */
 	mp = dbmp->mp;
 	if (!F_ISSET(mp, MP_LSN_RETRY) && log_compare(lsnp, &mp->lsn) <= 0) {
@@ -61,9 +77,7 @@ memp_sync(dbmp, lsnp)
 			ret = 0;
 		} else
 			ret = DB_INCOMPLETE;
-
-		UNLOCKREGION(dbmp);
-		return (ret);
+		goto done;
 	}
 
 	/* Else, it's a new checkpoint. */
@@ -74,7 +88,7 @@ memp_sync(dbmp, lsnp)
 	 * for which we were already doing a checkpoint.  (BTW, I don't expect
 	 * to see multiple LSN's from the same or multiple processes, but You
 	 * Just Never Know.  Responding as if they all called with the largest
-	 * of the LSNs specified makes everything work.
+	 * of the LSNs specified makes everything work.)
 	 *
 	 * We don't currently use the LSN we save.  We could potentially save
 	 * the last-written LSN in each buffer header and use it to determine
@@ -93,64 +107,127 @@ memp_sync(dbmp, lsnp)
 
 	/*
 	 * Walk the list of buffers and mark all dirty buffers to be written
-	 * and all pinned buffers to be potentially written.  We do this in
-	 * single fell swoop while holding the region locked so that processes
-	 * can't make new buffers dirty, causing us to never finish.  Since
-	 * the application may have restarted the sync, clear any BH_WRITE
-	 * flags that appear to be left over.
+	 * and all pinned buffers to be potentially written (we can't know if
+	 * we'll need to write them until the holding process returns them to
+	 * the cache).  We do this in one pass while holding the region locked
+	 * so that processes can't make new buffers dirty, causing us to never
+	 * finish.  Since the application may have restarted the sync, clear
+	 * any BH_WRITE flags that appear to be left over from previous calls.
+	 *
+	 * Keep a count of the total number of buffers we need to write in
+	 * MPOOL->lsn_cnt, and for each file, in MPOOLFILE->lsn_count.
 	 */
-	can_write = lsn_cnt = 0;
-	for (lsn_cnt = 0, bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
+	ar_cnt = 0;
+	for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
 	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
 		if (F_ISSET(bhp, BH_DIRTY) || bhp->ref != 0) {
 			F_SET(bhp, BH_WRITE);
 
-			if (bhp->ref == 0)
-				can_write = 1;
+			++mp->lsn_cnt;
 
-			mfp = ADDR(dbmp, bhp->mf_offset);
+			mfp = R_ADDR(dbmp, bhp->mf_offset);
 			++mfp->lsn_cnt;
 
-			++lsn_cnt;
+			/*
+			 * If the buffer isn't in use, we should be able to
+			 * write it immediately, so save a reference to it.
+			 */
+			if (bhp->ref == 0) {
+				if (ar_cnt == nalloc) {
+					nalloc *= 2;
+					if ((bharray =
+					    (BH **)__db_realloc(bharray,
+					    nalloc * sizeof(BH *))) == NULL) {
+						ret = ENOMEM;
+						goto err;
+					}
+				}
+				bharray[ar_cnt++] = bhp;
+			}
 		} else
 			F_CLR(bhp, BH_WRITE);
 
-	mp->lsn_cnt = lsn_cnt;
-
-	/* If there no buffers we can write, we're done. */
-	if (!can_write) {
-		UNLOCKREGION(dbmp);
-		return (mp->lsn_cnt ? DB_INCOMPLETE : 0);
+	/* If there no buffers we can write immediately, we're done. */
+	if (ar_cnt == 0) {
+		ret = mp->lsn_cnt ? DB_INCOMPLETE : 0;
+		goto done;
 	}
 
-	/*
-	 * Write any buffers that we can.  Restart the walk after each write,
-	 * __memp_pgwrite() discards and reacquires the region lock during I/O.
-	 */
-retry:	for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
-	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) {
-		/* Ignore pinned or locked buffers. */
-		if (!F_ISSET(bhp, BH_WRITE) ||
-		    bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
-			continue;
+	/* Lock down the buffers and their contents. */
+	for (cnt = 0; cnt < ar_cnt; ++cnt)
+		++bharray[cnt]->ref;
 
-		mfp = ADDR(dbmp, bhp->mf_offset);
-		if ((ret =
-		    __memp_bhwrite(dbmp, mfp, bhp, &restart, &wrote)) != 0)
-			goto err;
-		if (wrote) {
-			if (restart)
-				goto retry;
+	UNLOCKREGION(dbmp);
+
+	/* Sort the buffers we're going to write. */
+	qsort(bharray, ar_cnt, sizeof(BH *), __bhcmp);
+
+	LOCKREGION(dbmp);
+
+	/* Walk the array, writing buffers. */
+	for (next = 0; next < ar_cnt; ++next) {
+		/*
+		 * It's possible for a thread to have gotten the buffer since
+		 * we listed it for writing.  If the reference count is still
+		 * 1, we're the only ones using the buffer, go ahead and write.
+		 * If it's >1, then skip the buffer and assume that it will be
+		 * written when it's returned to the cache.
+		 */
+		if (bharray[next]->ref > 1) {
+			--bharray[next]->ref;
 			continue;
 		}
-		__db_err(dbenv, "%s: unable to flush page: %lu",
-		    ADDR(dbmp, mfp->path_off), (u_long)bhp->pgno);
-		ret = EPERM;
-		goto err;
+
+		/* Write the buffer. */
+		mfp = R_ADDR(dbmp, bharray[next]->mf_offset);
+		ret =
+		    __memp_bhwrite(dbmp, mfp, bharray[next], &notused, &wrote);
+
+		/* Release the buffer. */
+		--bharray[next]->ref;
+
+		/* If there's an error, release the rest of the buffers. */
+		if (ret != 0 || !wrote) {
+			while (++next < ar_cnt)
+				--bharray[next]->ref;
+
+			if (ret != 0)
+				goto err;
+
+			/*
+			 * Any process syncing the shared memory buffer pool
+			 * had better be able to write to any underlying file.
+			 * Be understanding, but firm, on this point.
+			 */
+			if (!wrote) {
+				__db_err(dbenv, "%s: unable to flush page: %lu",
+				    R_ADDR(dbmp, mfp->path_off),
+				    (u_long)bharray[next]->pgno);
+				ret = EPERM;
+				goto err;
+			}
+		}
 	}
 	ret = mp->lsn_cnt ? DB_INCOMPLETE : 0;
 
-err:	UNLOCKREGION(dbmp);
+done:
+	if (0) {
+err:		/*
+		 * On error, clear:
+		 *	MPOOL->lsn_cnt (the total sync count)
+		 *	MPOOLFILE->lsn_cnt (the per-file sync count)
+		 *	BH_WRITE flag (the scheduled for writing flag)
+		 */
+		mp->lsn_cnt = 0;
+		for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
+		    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
+			mfp->lsn_cnt = 0;
+		for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
+		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
+			F_CLR(bhp, BH_WRITE);
+	}
+	UNLOCKREGION(dbmp);
+	__db_free(bharray);
 	return (ret);
 }
 
@@ -162,10 +239,10 @@ int
 memp_fsync(dbmfp)
 	DB_MPOOLFILE *dbmfp;
 {
-	BH *bhp;
+	BH *bhp, **bharray;
 	DB_MPOOL *dbmp;
 	size_t mf_offset;
-	int pincnt, restart, ret, wrote;
+	int ar_cnt, cnt, nalloc, next, pincnt, notused, ret, wrote;
 
 	/*
 	 * If this handle doesn't have a file descriptor that's open for
@@ -175,35 +252,205 @@ memp_fsync(dbmfp)
 	if (F_ISSET(dbmfp, MP_READONLY | MP_PATH_TEMP))
 		return (0);
 
-	dbmp = dbmfp->dbmp;
 	ret = 0;
+	dbmp = dbmfp->dbmp;
+	mf_offset = R_OFFSET(dbmp, dbmfp->mfp);
 
-	mf_offset = OFFSET(dbmp, dbmfp->mfp);
+	/*
+	 * We try and write the buffers in page order so that the underlying
+	 * filesystem doesn't have to seek and can write contiguous blocks,
+	 * plus, we don't want to hold the region lock while we write the
+	 * buffers.  Get memory to hold the buffer pointers.  Get a good-size
+	 * block, too, because we realloc while holding the region lock if we
+	 * run out.
+	 */
+	nalloc = 1024;
+	if ((bharray =
+	    (BH **)__db_malloc((size_t)nalloc * sizeof(BH *))) == NULL)
+		return (ENOMEM);
 
 	LOCKREGION(dbmp);
 
 	/*
-	 * Walk the list of buffer headers for the MPOOLFILE, and write out any
-	 * dirty buffers that we can.
+	 * Walk the LRU list of buffer headers, and get a list of buffers to
+	 * write for this MPOOLFILE.
 	 */
-retry:	pincnt = 0;
+	ar_cnt = pincnt = 0;
 	for (bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh);
-	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
-		if (F_ISSET(bhp, BH_DIRTY) && bhp->mf_offset == mf_offset) {
-			if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED)) {
-				++pincnt;
-				continue;
-			}
-			if ((ret =
-			    __memp_pgwrite(dbmfp, bhp, &restart, &wrote)) != 0)
+	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) {
+		if (!F_ISSET(bhp, BH_DIRTY) || bhp->mf_offset != mf_offset)
+			continue;
+		if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED)) {
+			++pincnt;
+			continue;
+		}
+
+		if (ar_cnt == nalloc) {
+			nalloc *= 2;
+			if ((bharray = (BH **)__db_realloc(bharray,
+			    nalloc * sizeof(BH *))) == NULL) {
+				ret = ENOMEM;
 				goto err;
-			if (!wrote)
-				++pincnt;
-			if (restart)
-				goto retry;
+			}
+		}
+
+		bharray[ar_cnt++] = bhp;
+	}
+
+	/* Lock down the buffers and their contents. */
+	for (cnt = 0; cnt < ar_cnt; ++cnt)
+		++bharray[cnt]->ref;
+
+	UNLOCKREGION(dbmp);
+
+	/* Sort the buffers we're going to write. */
+	qsort(bharray, ar_cnt, sizeof(BH *), __bhcmp);
+
+	LOCKREGION(dbmp);
+
+	/* Walk the array, writing buffers. */
+	for (next = 0; next < ar_cnt; ++next) {
+		/*
+		 * It's possible for a thread to have gotten the buffer since
+		 * we listed it for writing.  If the reference count is still
+		 * 1, we're the only ones using the buffer, go ahead and write.
+		 * If it's >1, then skip the buffer and assume that it will be
+		 * written when it's returned to the cache.
+		 */
+		if (bharray[next]->ref > 1) {
+			++pincnt;
+
+			--bharray[next]->ref;
+			continue;
 		}
 
+		/* Write the buffer. */
+		ret = __memp_pgwrite(dbmfp, bharray[next], &notused, &wrote);
+
+		/* Release the buffer. */
+		--bharray[next]->ref;
+
+		/* If there's an error, release the rest of the buffers. */
+		if (ret != 0) {
+			while (++next < ar_cnt)
+				--bharray[next]->ref;
+			goto err;
+		}
+		if (!wrote)
+			++pincnt;
+	}
+
+err:	UNLOCKREGION(dbmp);
+
+	__db_free(bharray);
+
+	/*
+	 * Sync the underlying file as the last thing we do, so that the OS
+	 * has maximal opportunity to flush buffers before we request it.
+	 *
+	 * XXX:
+	 * Don't lock the region around the sync, fsync(2) has no atomicity
+	 * issues.
+	 */
+	if (ret == 0)
+		return (pincnt == 0 ? __db_fsync(dbmfp->fd) : DB_INCOMPLETE);
+	return (ret);
+
+}
+
+/*
+ * memp_trickle --
+ *	Keep a specified percentage of the buffers clean.
+ */
+int
+memp_trickle(dbmp, pct, nwrotep)
+	DB_MPOOL *dbmp;
+	int pct, *nwrotep;
+{
+	BH *bhp;
+	MPOOL *mp;
+	MPOOLFILE *mfp;
+	u_long total;
+	int notused, ret, wrote;
+
+	mp = dbmp->mp;
+	if (nwrotep != NULL)
+		*nwrotep = 0;
+
+	if (pct < 1 || pct > 100)
+		return (EINVAL);
+
+	LOCKREGION(dbmp);
+
+	/*
+	 * If there are sufficient clean buffers, or no buffers or no dirty
+	 * buffers, we're done.
+	 *
+	 * XXX
+	 * Using st_page_clean and st_page_dirty is our only choice at the
+	 * moment, but it's not as correct as we might like in the presence
+	 * of pools with more than one buffer size, as a free 512-byte buffer
+	 * isn't the same as a free 8K buffer.
+	 */
+loop:	total = mp->stat.st_page_clean + mp->stat.st_page_dirty;
+	if (total == 0 || mp->stat.st_page_dirty == 0 ||
+	    (mp->stat.st_page_clean * 100) / total >= (u_long)pct) {
+		UNLOCKREGION(dbmp);
+		return (0);
+	}
+
+	/* Loop until we write a buffer. */
+	for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
+	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) {
+		if (bhp->ref != 0 ||
+		    !F_ISSET(bhp, BH_DIRTY) || F_ISSET(bhp, BH_LOCKED))
+			continue;
+
+		mfp = R_ADDR(dbmp, bhp->mf_offset);
+		if ((ret =
+		    __memp_bhwrite(dbmp, mfp, bhp, &notused, &wrote)) != 0)
+			goto err;
+
+		/*
+		 * Any process syncing the shared memory buffer pool
+		 * had better be able to write to any underlying file.
+		 * Be understanding, but firm, on this point.
+		 */
+		if (!wrote) {
+			__db_err(dbmp->dbenv, "%s: unable to flush page: %lu",
+			    R_ADDR(dbmp, mfp->path_off), (u_long)bhp->pgno);
+			ret = EPERM;
+			goto err;
+		}
+
+		++mp->stat.st_page_trickle;
+		if (nwrotep != NULL)
+			++*nwrotep;
+		goto loop;
+	}
+
+	/* No more buffers to write. */
+	return (0);
+
 err:	UNLOCKREGION(dbmp);
+	return (ret);
+}
+
+static int
+__bhcmp(p1, p2)
+	const void *p1, *p2;
+{
+	BH *bhp1, *bhp2;
+
+	bhp1 = *(BH **)p1;
+	bhp2 = *(BH **)p2;
+
+	/* Sort by file (shared memory pool offset). */
+	if (bhp1->mf_offset < bhp2->mf_offset)
+		return (-1);
+	if (bhp1->mf_offset > bhp2->mf_offset)
+		return (1);
 
-	return (ret == 0 ? (pincnt ? DB_INCOMPLETE : 0) : ret);
+	/* Sort by page in file. */
+	return (bhp1->pgno < bhp2->pgno ? -1 : 1);
 }
diff --git a/db2/mutex/mutex.c b/db2/mutex/mutex.c
index 5315b2d3fe..7c8ea6ebd1 100644
--- a/db2/mutex/mutex.c
+++ b/db2/mutex/mutex.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mutex.c	10.25 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)mutex.c	10.28 (Sleepycat) 10/31/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -67,6 +67,10 @@ static const char sccsid[] = "@(#)mutex.c	10.25 (Sleepycat) 9/23/97";
 #define	TSL_UNSET(x)	_lock_clear(x)
 #endif
 
+#ifdef HAVE_ASSEM_SCO_CC
+#include "sco.cc"
+#endif
+
 #ifdef HAVE_ASSEM_SPARC_GCC
 #include "sparc.gcc"
 #endif
@@ -138,13 +142,12 @@ __db_mutex_init(mp, off)
  * __db_mutex_lock
  *	Lock on a mutex, logically blocking if necessary.
  *
- * PUBLIC: int __db_mutex_lock __P((db_mutex_t *, int, int (*)(void)));
+ * PUBLIC: int __db_mutex_lock __P((db_mutex_t *, int));
  */
 int
-__db_mutex_lock(mp, fd, yield)
+__db_mutex_lock(mp, fd)
 	db_mutex_t *mp;
 	int fd;
-	int (*yield) __P((void));
 {
 	u_long usecs;
 
@@ -166,17 +169,15 @@ __db_mutex_lock(mp, fd, yield)
 				}
 				mp->pid = getpid();
 #endif
-#ifdef MUTEX_STATISTICS
 				if (usecs == MS(10))
 					++mp->mutex_set_nowait;
 				else
 					++mp->mutex_set_wait;
-#endif
 				return (0);
 			}
 
 		/* Yield the processor; wait 10ms initially, up to 1 second. */
-		if (yield == NULL || yield() != 0) {
+		if (__db_yield == NULL || __db_yield() != 0) {
 			(void)__db_sleep(0, usecs);
 			if ((usecs <<= 1) > SECOND)
 				usecs = SECOND;
@@ -200,7 +201,7 @@ __db_mutex_lock(mp, fd, yield)
 		 * up to 1 second.
 		 */
 		for (usecs = MS(10); mp->pid != 0;)
-			if (yield == NULL || yield() != 0) {
+			if (__db_yield == NULL || __db_yield() != 0) {
 				(void)__db_sleep(0, usecs);
 				if ((usecs <<= 1) > SECOND)
 					usecs = SECOND;
@@ -234,10 +235,6 @@ __db_mutex_lock(mp, fd, yield)
 		if (locked)
 			break;
 	}
-
-#ifdef MUTEX_STATISTICS
-	++mp->mutex_set_wait;
-#endif
 	return (0);
 #endif /* !HAVE_SPINLOCKS */
 }
diff --git a/db2/os/db_os_abs.c b/db2/os/db_os_abs.c
deleted file mode 100644
index 8795205839..0000000000
--- a/db2/os/db_os_abs.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1997
- *	Sleepycat Software.  All rights reserved.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)db_os_abs.c	10.5 (Sleepycat) 7/5/97";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "os_ext.h"
-
-/*
- * __db_abspath --
- *	Return if a path is an absolute path.
- *
- * PUBLIC: int __db_abspath __P((const char *));
- */
-int
-__db_abspath(path)
-	const char *path;
-{
-#ifdef _WIN32
-	/*
-	 * !!!
-	 * Check for drive specifications, e.g., "C:".  In addition, the path
-	 * separator used by the win32 DB (PATH_SEPARATOR) is \; look for both
-	 * / and \ since these are user-input paths.
-	 */
-	if (isalpha(path[0]) && path[1] == ':')
-		path += 2;
-	return (path[0] == '/' || path[0] == '\\');
-#else
-#ifdef macintosh
-	/*
-	 * !!!
-	 * Absolute pathnames always start with a volume name, which must be
-	 * followed by a colon, thus they are of the form:
-	 *	volume: or volume:dir1:dir2:file
-	 *
-	 * Relative pathnames are either a single name without colons or a
-	 * path starting with a colon, thus of the form:
-	 *	file or :file or :dir1:dir2:file
-	 */
-	return (strchr(path, ':') != NULL && path[0] != ':');
-#else
-	return (path[0] == '/');
-#endif
-#endif
-}
-
-/*
- * __db_rpath --
- *	Return the last path separator in the path or NULL if none found.
- *
- * PUBLIC: char *__db_rpath __P((const char *));
- */
-char *
-__db_rpath(path)
-	const char *path;
-{
-	const char *s, *last;
-
-	last = NULL;
-	if (PATH_SEPARATOR[1] != '\0') {
-		for (s = path; s[0] != '\0'; ++s)
-			if (strchr(PATH_SEPARATOR, s[0]) != NULL)
-				last = s;
-	} else
-		for (s = path; s[0] != '\0'; ++s)
-			if (s[0] == PATH_SEPARATOR[0])
-				last = s;
-	return ((char *)last);
-}
diff --git a/db2/os/db_os_dir.c b/db2/os/db_os_dir.c
deleted file mode 100644
index 1206e3faa7..0000000000
--- a/db2/os/db_os_dir.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1997
- *	Sleepycat Software.  All rights reserved.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)db_os_dir.c	10.10 (Sleepycat) 9/17/97";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#if HAVE_DIRENT_H
-# include <dirent.h>
-# define NAMLEN(dirent) strlen((dirent)->d_name)
-#else
-# define dirent direct
-# define NAMLEN(dirent) (dirent)->d_namlen
-# if HAVE_SYS_NDIR_H
-#  include <sys/ndir.h>
-# endif
-# if HAVE_SYS_DIR_H
-#  include <sys/dir.h>
-# endif
-# if HAVE_NDIR_H
-#  include <ndir.h>
-# endif
-#endif
-
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#endif
-
-#include "db_int.h"
-#include "os_ext.h"
-#include "common_ext.h"
-
-/*
- * __db_dir --
- *	Return a list of the files in a directory.
- *
- * PUBLIC: int __db_dir __P((DB_ENV *, const char *, char ***, int *));
- */
-int
-__db_dir(dbenv, dir, namesp, cntp)
-	DB_ENV *dbenv;
-	const char *dir;
-	char ***namesp;
-	int *cntp;
-{
-	int arraysz, cnt;
-	char **names;
-#ifdef _WIN32
-	struct _finddata_t fdata;
-	long dirhandle;
-	int finished;
-	char filespec[MAX_PATH];
-
-	(void)snprintf(filespec, sizeof(filespec), "%s/*", dir);
-	if ((dirhandle = _findfirst(filespec, &fdata)) == -1) {
-		__db_err(dbenv, "%s: %s", filespec, strerror(errno));
-		return (errno);
-	}
-
-	names = NULL;
-	finished = 0;
-	for (arraysz = cnt = 0; finished != 1; ++cnt) {
-		if (cnt >= arraysz) {
-			arraysz += 100;
-			names = (char **)(names == NULL ?
-			    malloc(arraysz * sizeof(names[0])) :
-			    realloc(names, arraysz * sizeof(names[0])));
-			if (names == NULL)
-				goto nomem;
-		}
-		if ((names[cnt] = (char *)strdup(fdata.name)) == NULL)
-			goto nomem;
-		if (_findnext(dirhandle,&fdata) != 0)
-			finished = 1;
-	}
-	_findclose(dirhandle);
-#else /* !_WIN32 */
-	struct dirent *dp;
-	DIR *dirp;
-
-	if ((dirp = opendir(dir)) == NULL) {
-		__db_err(dbenv, "%s: %s", dir, strerror(errno));
-		return (errno);
-	}
-	names = NULL;
-	for (arraysz = cnt = 0; (dp = readdir(dirp)) != NULL; ++cnt) {
-		if (cnt >= arraysz) {
-			arraysz += 100;
-			names = (char **)(names == NULL ?
-			    malloc(arraysz * sizeof(names[0])) :
-			    realloc(names, arraysz * sizeof(names[0])));
-			if (names == NULL)
-				goto nomem;
-		}
-		if ((names[cnt] = (char *)strdup(dp->d_name)) == NULL)
-			goto nomem;
-	}
-	(void)closedir(dirp);
-#endif /* !_WIN32 */
-
-	*namesp = names;
-	*cntp = cnt;
-	return (0);
-
-nomem:	if (names != NULL)
-		__db_dirf(dbenv, names, cnt);
-	__db_err(dbenv, "%s", strerror(ENOMEM));
-	return (ENOMEM);
-}
-
-/*
- * __db_dirf --
- *	Free the list of files.
- *
- * PUBLIC: void __db_dirf __P((DB_ENV *, char **, int));
- */
-void
-__db_dirf(dbenv, names, cnt)
-	DB_ENV *dbenv;
-	char **names;
-	int cnt;
-{
-	dbenv = dbenv;			/* XXX: Shut the compiler up. */
-	while (cnt > 0)
-		free(names[--cnt]);
-	free (names);
-}
diff --git a/db2/os/db_os_lseek.c b/db2/os/db_os_lseek.c
deleted file mode 100644
index cecf0e156b..0000000000
--- a/db2/os/db_os_lseek.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1997
- *	Sleepycat Software.  All rights reserved.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)db_os_lseek.c	10.3 (Sleepycat) 6/28/97";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <unistd.h>
-#endif
-
-#include "db_int.h"
-#include "os_ext.h"
-
-/*
- * __db_lseek --
- *	Seek to a page/byte offset in the file.
- *
- * PUBLIC: int __db_lseek __P((int, size_t, db_pgno_t, u_long, int));
- */
-int
-__db_lseek(fd, pgsize, pageno, relative, whence)
-	int fd;
-	size_t pgsize;
-	db_pgno_t pageno;
-	u_long relative;
-	int whence;
-{
-	/* 64-bit offsets are done differently by different vendors. */
-#undef	__LSEEK_SET
-#ifdef	HAVE_LLSEEK
-#define	__LSEEK_SET
-	offset_t offset;			/* Solaris. */
-
-	offset = pgsize * pageno + relative;
-	return (llseek(fd, offset, whence) == -1 ? errno : 0);
-#endif
-#ifdef	HAVE_LSEEKI
-#define	__LSEEK_SET
-	__int64 offset;				/* WNT */
-
-	offset = pgsize * pageno + relative;
-	return (_lseeki64(fd, offset, whence) == -1 ? errno : 0);
-#endif
-#ifndef	__LSEEK_SET
-	off_t offset;				/* Default. */
-
-	offset = pgsize * pageno + relative;
-	return (lseek(fd, offset, whence) == -1 ? errno : 0);
-#endif
-}
diff --git a/db2/os/db_os_mmap.c b/db2/os/db_os_mmap.c
deleted file mode 100644
index 0cd8fad0b0..0000000000
--- a/db2/os/db_os_mmap.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997
- *	Sleepycat Software.  All rights reserved.
- */
-
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)db_os_mmap.c	10.4 (Sleepycat) 6/28/97";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-#include <sys/mman.h>
-
-#include <errno.h>
-#endif
-
-#include "db_int.h"
-#include "os_ext.h"
-
-/*
- * __db_mmap --
- *	Map in some shared memory backed by a file descriptor.
- *
- * PUBLIC: int __db_mmap __P((int, size_t, int, int, void *));
- */
-int
-__db_mmap(fd, len, is_private, rdonly, addr)
-	int fd, is_private, rdonly;
-	size_t len;
-	void *addr;
-{
-#ifdef _WIN32
-	/* We have not implemented copy-on-write here */
-	void * pMemory = 0;
-	HANDLE hFile = (HANDLE)_get_osfhandle(fd);
-	HANDLE hMemory = CreateFileMapping(
-	      hFile,
-	      0,
-	      (rdonly ? PAGE_READONLY : PAGE_READWRITE),
-	      0,
-	      len, /* This code fails if the library is ever compiled on a 64-bit machine */
-	      0
-	      );
-	if (NULL == hMemory)
-	{
-	      return errno;
-	}
-	pMemory = MapViewOfFile(
-	      hMemory,
-	      (rdonly ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS),
-	      0,
-	      0,
-	      len
-	      );
-	CloseHandle(hMemory);
-	*(void **)addr = pMemory;
-	return 0;
-
-#else /* !_WIN32 */
-
-	void *p;
-	int flags, prot;
-
-	flags = is_private ? MAP_PRIVATE : MAP_SHARED;
-#ifdef MAP_HASSEMAPHORE
-	flags += MAP_HASSEMAPHORE;
-#endif
-	prot = PROT_READ | (rdonly ? 0 : PROT_WRITE);
-
-#ifndef MAP_FAILED			/* XXX: Mmap(2) failure return. */
-#define	MAP_FAILED	-1
-#endif
-	if ((p =
-	    mmap(NULL, len, prot, flags, fd, (off_t)0)) == (void *)MAP_FAILED)
-		return (errno);
-
-	*(void **)addr = p;
-	return (0);
-#endif /* _WIN32 */
-}
-
-/*
- * __db_unmap --
- *	Release the specified shared memory.
- *
- * PUBLIC: int __db_munmap __P((void *, size_t));
- */
-int
-__db_munmap(addr, len)
-	void *addr;
-	size_t len;
-{
-	/*
-	 * !!!
-	 * The argument len is always the same length as was mapped.
-	 */
-#ifdef _WIN32
-	return (!UnmapViewOfFile(addr) ? errno : 0);
-#else
-	return (munmap(addr, len) ? errno : 0);
-#endif
-}
diff --git a/db2/os/os_abs.c b/db2/os/os_abs.c
new file mode 100644
index 0000000000..872e46d058
--- /dev/null
+++ b/db2/os/os_abs.c
@@ -0,0 +1,31 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)os_abs.c	10.7 (Sleepycat) 10/24/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * __db_abspath --
+ *	Return if a path is an absolute path.
+ *
+ * PUBLIC: int __db_abspath __P((const char *));
+ */
+int
+__db_abspath(path)
+	const char *path;
+{
+	return (path[0] == '/');
+}
diff --git a/db2/os/os_dir.c b/db2/os/os_dir.c
new file mode 100644
index 0000000000..10fb8b6739
--- /dev/null
+++ b/db2/os/os_dir.c
@@ -0,0 +1,100 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)os_dir.c	10.13 (Sleepycat) 10/28/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+#  include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+#  include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+#  include <ndir.h>
+# endif
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "common_ext.h"
+
+/*
+ * __os_dirlist --
+ *	Return a list of the files in a directory.
+ *
+ * PUBLIC: int __os_dirlist __P((const char *, char ***, int *));
+ */
+int
+__os_dirlist(dir, namesp, cntp)
+	const char *dir;
+	char ***namesp;
+	int *cntp;
+{
+	struct dirent *dp;
+	DIR *dirp;
+	int arraysz, cnt;
+	char **names;
+
+	if ((dirp = opendir(dir)) == NULL)
+		return (errno);
+	names = NULL;
+	for (arraysz = cnt = 0; (dp = readdir(dirp)) != NULL; ++cnt) {
+		if (cnt >= arraysz) {
+			arraysz += 100;
+			names = (char **)(names == NULL ?
+			    __db_malloc(arraysz * sizeof(names[0])) :
+			    __db_realloc(names, arraysz * sizeof(names[0])));
+			if (names == NULL)
+				goto nomem;
+		}
+		if ((names[cnt] = (char *)__db_strdup(dp->d_name)) == NULL)
+			goto nomem;
+	}
+	(void)closedir(dirp);
+
+	*namesp = names;
+	*cntp = cnt;
+	return (0);
+
+nomem:	if (names != NULL)
+		__os_dirfree(names, cnt);
+	return (ENOMEM);
+}
+
+/*
+ * __os_dirfree --
+ *	Free the list of files.
+ *
+ * PUBLIC: void __os_dirfree __P((char **, int));
+ */
+void
+__os_dirfree(names, cnt)
+	char **names;
+	int cnt;
+{
+	while (cnt > 0)
+		__db_free(names[--cnt]);
+	__db_free(names);
+}
diff --git a/db2/os/db_os_fid.c b/db2/os/os_fid.c
index 960d580bad..6820b88786 100644
--- a/db2/os/db_os_fid.c
+++ b/db2/os/os_fid.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_os_fid.c	10.8 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)os_fid.c	10.9 (Sleepycat) 10/24/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -22,8 +22,6 @@ static const char sccsid[] = "@(#)db_os_fid.c	10.8 (Sleepycat) 8/27/97";
 #endif
 
 #include "db_int.h"
-#include "db_page.h"
-#include "os_ext.h"
 #include "common_ext.h"
 
 /*
@@ -39,58 +37,11 @@ __db_fileid(dbenv, fname, timestamp, fidp)
 	int timestamp;
 	u_int8_t *fidp;
 {
+	struct stat sb;
 	size_t i;
 	time_t now;
 	u_int8_t *p;
 
-#ifdef _WIN32
-	/*
-	 * The documentation for GetFileInformationByHandle() states that the
-	 * inode-type numbers are not constant between processes.  Actually,
-	 * they are, they're the NTFS MFT indexes.  So, this works on NTFS,
-	 * but perhaps not on other platforms, and perhaps not over a network.
-	 * Can't think of a better solution right now.
-	 */
-	int fd = 0;
-	HANDLE fh = 0;
-	BY_HANDLE_FILE_INFORMATION fi;
-	BOOL retval = FALSE;
-
-	/* Clear the buffer. */
-	memset(fidp, 0, DB_FILE_ID_LEN);
-
-	/* first we open the file, because we're not given a handle to it */
-	fd = open(fname,_O_RDONLY,_S_IREAD);
-	if (-1 == fd) {
-		/* If we can't open it, we're in trouble */
-		return (errno);
-	}
-
-	/* File open, get its info */
-	fh = (HANDLE)_get_osfhandle(fd);
-	if ((HANDLE)(-1) != fh) {
-		retval = GetFileInformationByHandle(fh,&fi);
-	}
-	close(fd);
-
-	/*
-	 * We want the three 32-bit words which tell us the volume ID and
-	 * the file ID.  We make a crude attempt to copy the bytes over to
-	 * the callers buffer.
-	 *
-	 * DBDB: really we should ensure that the bytes get packed the same
-	 * way on all compilers, platforms etc.
-	 */
-	if ( ((HANDLE)(-1) != fh) && (TRUE == retval) ) {
-		memcpy(fidp, &fi.nFileIndexLow, sizeof(u_int32_t));
-		fidp += sizeof(u_int32_t);
-		memcpy(fidp, &fi.nFileIndexHigh, sizeof(u_int32_t));
-		fidp += sizeof(u_int32_t);
-		memcpy(fidp, &fi.dwVolumeSerialNumber, sizeof(u_int32_t));
-	}
-#else
-	struct stat sb;
-
 	/* Clear the buffer. */
 	memset(fidp, 0, DB_FILE_ID_LEN);
 
@@ -115,7 +66,7 @@ __db_fileid(dbenv, fname, timestamp, fidp)
 	for (p = (u_int8_t *)&sb.st_dev +
 	    sizeof(sb.st_dev), i = 0; i < sizeof(sb.st_dev); ++i)
 		*fidp++ = *--p;
-#endif
+
 	if (timestamp) {
 		(void)time(&now);
 		for (p = (u_int8_t *)&now +
diff --git a/db2/os/os_fsync.c b/db2/os/os_fsync.c
new file mode 100644
index 0000000000..7b001ceeb0
--- /dev/null
+++ b/db2/os/os_fsync.c
@@ -0,0 +1,34 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)os_fsync.c	10.3 (Sleepycat) 10/25/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * __db_fsync --
+ *	Flush a file descriptor.
+ *
+ * PUBLIC: int __db_fsync __P((int));
+ */
+int
+__db_fsync(fd)
+	int fd;
+{
+	return (__os_fsync(fd) ? errno : 0);
+}
diff --git a/db2/os/os_func.c b/db2/os/os_func.c
new file mode 100644
index 0000000000..afd40f4624
--- /dev/null
+++ b/db2/os/os_func.c
@@ -0,0 +1,153 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)os_func.c	10.4 (Sleepycat) 10/28/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * XXX
+ * We provide our own extern declarations so that we don't collide with
+ * systems that get them wrong, e.g., SunOS.
+ */
+#ifdef _WIN32
+#define fsync		_commit
+#define imported	__declspec(dllimport)
+#else
+#define imported
+#endif
+
+imported extern void    *calloc __P((size_t, size_t));
+imported extern int	 close __P((int));
+imported extern void	 free __P((void *));
+imported extern int	 fsync __P((int));
+imported extern void    *malloc __P((size_t));
+imported extern int	 open __P((const char *, int, ...));
+imported extern ssize_t	 read __P((int, void *, size_t));
+imported extern char	*strdup __P((const char *));
+imported extern void    *realloc __P((void *, size_t));
+imported extern int	 unlink __P((const char *));
+imported extern ssize_t	 write __P((int, const void *, size_t));
+
+/*
+ * __db_jump --
+ *	This list of interfaces that applications can replace.  In some
+ *	cases, the user is permitted to replace the standard ANSI C or
+ *	POSIX 1003.1 call, e.g., calloc or read.  In others, we provide
+ *	a local interface to the functionality, e.g., __os_map.
+ */
+struct __db_jumptab __db_jump = {
+	calloc,				/* DB_FUNC_CALLOC */
+	close,				/* DB_FUNC_CLOSE */
+	__os_dirfree,			/* DB_FUNC_DIRFREE */
+	__os_dirlist,			/* DB_FUNC_DIRLIST */
+	__os_exists,			/* DB_FUNC_EXISTS */
+	free,				/* DB_FUNC_FREE */
+	fsync,				/* DB_FUNC_FSYNC */
+	__os_ioinfo,			/* DB_FUNC_IOINFO */
+	malloc,				/* DB_FUNC_MALLOC */
+	__os_map,			/* DB_FUNC_MAP */
+	open,				/* DB_FUNC_OPEN */
+	read,				/* DB_FUNC_READ */
+	realloc,			/* DB_FUNC_REALLOC */
+	__os_seek,			/* DB_FUNC_SEEK */
+	__os_sleep,			/* DB_FUNC_SLEEP */
+	strdup,				/* DB_FUNC_STRDUP */
+	unlink,				/* DB_FUNC_UNLINK */
+	__os_unmap,			/* DB_FUNC_UNMAP */
+	write,				/* DB_FUNC_WRITE */
+	NULL				/* DB_FUNC_YIELD */
+};
+
+/*
+ * db_jump_set --
+ *	Replace an interface.
+ */
+int
+db_jump_set(func, which)
+	void *func;
+	int which;
+{
+	switch (which) {
+	case DB_FUNC_CALLOC:
+		__db_calloc = (void *(*) __P((size_t, size_t)))func;
+		break;
+	case DB_FUNC_CLOSE:
+		__os_close = (int (*) __P((int)))func;
+		break;
+	case DB_FUNC_DIRFREE:
+		__db_dirfree = (void (*) __P((char **, int)))func;
+		break;
+	case DB_FUNC_DIRLIST:
+		__db_dirlist =
+		    (int (*) __P((const char *, char ***, int *)))func;
+		break;
+	case DB_FUNC_EXISTS:
+		__db_exists = (int (*) __P((const char *, int *)))func;
+		break;
+	case DB_FUNC_FREE:
+		__db_free = (void (*) __P((void *)))func;
+		break;
+	case DB_FUNC_FSYNC:
+		__os_fsync = (int (*) __P((int)))func;
+		break;
+	case DB_FUNC_IOINFO:
+		__db_ioinfo =
+		    (int (*) __P((const char *, int, off_t *, off_t *)))func;
+		break;
+	case DB_FUNC_MALLOC:
+		__db_malloc = (void *(*) __P((size_t)))func;
+		break;
+	case DB_FUNC_MAP:
+		__db_map = (int (*) __P((int, size_t, int, int, void **)))func;
+		break;
+	case DB_FUNC_OPEN:
+		__os_open = (int (*) __P((const char *, int, ...)))func;
+		break;
+	case DB_FUNC_READ:
+		__os_read = (ssize_t (*) __P((int, void *, size_t)))func;
+		break;
+	case DB_FUNC_REALLOC:
+		__db_realloc = (void *(*) __P((void *, size_t)))func;
+		break;
+	case DB_FUNC_SEEK:
+		__db_seek =
+		    (int (*) __P((int, size_t, db_pgno_t, u_long, int)))func;
+		break;
+	case DB_FUNC_SLEEP:
+		__db_sleep = (int (*) __P((u_long, u_long)))func;
+		break;
+	case DB_FUNC_STRDUP:
+		__db_strdup = (char *(*) __P((const char *)))func;
+		break;
+	case DB_FUNC_UNLINK:
+		__os_unlink = (int (*) __P((const char *)))func;
+		break;
+	case DB_FUNC_UNMAP:
+		__db_unmap = (int (*) __P((void *, size_t)))func;
+		break;
+	case DB_FUNC_WRITE:
+		__os_write = (ssize_t (*) __P((int, const void *, size_t)))func;
+		break;
+	case DB_FUNC_YIELD:
+		__db_yield = (int (*) __P((void)))func;
+		break;
+	default:
+		return (EINVAL);
+	}
+	return (0);
+}
diff --git a/db2/os/os_map.c b/db2/os/os_map.c
new file mode 100644
index 0000000000..b1553188dc
--- /dev/null
+++ b/db2/os/os_map.c
@@ -0,0 +1,71 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)os_map.c	10.7 (Sleepycat) 10/25/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#include <errno.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * __os_map --
+ *	Map in some shared memory backed by a file descriptor.
+ *
+ * PUBLIC: int __os_map __P((int, size_t, int, int, void **));
+ */
+int
+__os_map(fd, len, is_private, is_rdonly, addr)
+	int fd, is_private, is_rdonly;
+	size_t len;
+	void **addr;
+{
+	void *p;
+	int flags, prot;
+
+	flags = is_private ? MAP_PRIVATE : MAP_SHARED;
+#ifdef MAP_HASSEMAPHORE
+	flags |= MAP_HASSEMAPHORE;
+#endif
+	prot = PROT_READ | (is_rdonly ? 0 : PROT_WRITE);
+
+#ifndef MAP_FAILED			/* XXX: Mmap(2) failure return. */
+#define	MAP_FAILED	-1
+#endif
+	if ((p =
+	    mmap(NULL, len, prot, flags, fd, (off_t)0)) == (void *)MAP_FAILED)
+		return (errno);
+
+	*addr = p;
+	return (0);
+}
+
+/*
+ * __os_unmap --
+ *	Release the specified shared memory.
+ *
+ * PUBLIC: int __os_unmap __P((void *, size_t));
+ */
+int
+__os_unmap(addr, len)
+	void *addr;
+	size_t len;
+{
+	/*
+	 * !!!
+	 * The argument len is always the same length as was mapped.
+	 */
+	return (munmap(addr, len) ? errno : 0);
+}
diff --git a/db2/os/os_oflags.c b/db2/os/os_oflags.c
new file mode 100644
index 0000000000..3656eef1c4
--- /dev/null
+++ b/db2/os/os_oflags.c
@@ -0,0 +1,48 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)os_oflags.c	10.2 (Sleepycat) 10/24/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <fcntl.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * __db_oflags --
+ *	Convert open(2) flags to DB flags.
+ *
+ * PUBLIC: int __db_oflags __P((int));
+ */
+int
+__db_oflags(oflags)
+	int oflags;
+{
+	int dbflags;
+
+	/*
+	 * XXX
+	 * Convert POSIX 1003.1 open(2) flags to DB flags.  Not an exact
+	 * science as most POSIX implementations don't have a flag value
+	 * for O_RDONLY, it's simply the lack of a write flag.
+	 */
+	dbflags = 0;
+	if (oflags & O_CREAT)
+		dbflags |= DB_CREATE;
+	if (!(oflags & (O_RDWR | O_WRONLY)) || oflags & O_RDONLY)
+		dbflags |= DB_RDONLY;
+	if (oflags & O_TRUNC)
+		dbflags |= DB_TRUNCATE;
+	return (dbflags);
+}
diff --git a/db2/os/db_os_open.c b/db2/os/os_open.c
index 1d67ef9508..05784e4810 100644
--- a/db2/os/db_os_open.c
+++ b/db2/os/os_open.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_os_open.c	10.14 (Sleepycat) 7/5/97";
+static const char sccsid[] = "@(#)os_open.c	10.19 (Sleepycat) 10/28/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -20,44 +20,15 @@ static const char sccsid[] = "@(#)db_os_open.c	10.14 (Sleepycat) 7/5/97";
 #endif
 
 #include "db_int.h"
-#include "os_ext.h"
 
 /*
- * __db_oflags --
- *	Convert open(2) flags to DB flags.
- *
- * PUBLIC: int __db_oflags __P((int));
- */
-int
-__db_oflags(oflags)
-	int oflags;
-{
-	int dbflags;
-
-	/*
-	 * XXX
-	 * Convert POSIX 1003.1 open(2) flags to DB flags.  Not an exact
-	 * science as most POSIX implementations don't have a flag value
-	 * for O_RDONLY, it's simply the lack of a write flag.
-	 */
-	dbflags = 0;
-	if (oflags & O_CREAT)
-		dbflags |= DB_CREATE;
-	if (!(oflags & (O_RDWR | O_WRONLY)) || oflags & O_RDONLY)
-		dbflags |= DB_RDONLY;
-	if (oflags & O_TRUNC)
-		dbflags |= DB_TRUNCATE;
-	return (dbflags);
-}
-
-/*
- * __db_fdopen --
+ * __db_open --
  *	Open a file descriptor.
  *
- * PUBLIC: int __db_fdopen __P((const char *, int, int, int, int *));
+ * PUBLIC: int __db_open __P((const char *, int, int, int, int *));
  */
 int
-__db_fdopen(name, arg_flags, ok_flags, mode, fdp)
+__db_open(name, arg_flags, ok_flags, mode, fdp)
 	const char *name;
 	int arg_flags, ok_flags, mode, *fdp;
 {
@@ -95,13 +66,13 @@ __db_fdopen(name, arg_flags, ok_flags, mode, fdp)
 		flags |= O_TRUNC;
 
 	/* Open the file. */
-	if ((fd = open(name, flags, mode)) == -1)
+	if ((fd = __os_open(name, flags, mode)) == -1)
 		return (errno);
 
 #ifndef _WIN32
 	/* Delete any temporary file; done for Win32 by _O_TEMPORARY. */
 	if (arg_flags & DB_TEMPORARY)
-		(void)unlink(name);
+		(void)__os_unlink(name);
 #endif
 
 #if !defined(_WIN32) && !defined(macintosh)
@@ -112,7 +83,7 @@ __db_fdopen(name, arg_flags, ok_flags, mode, fdp)
 	if (fcntl(fd, F_SETFD, 1) == -1) {
 		int ret = errno;
 
-		(void)__db_close(fd);
+		(void)__os_close(fd);
 		return (ret);
 	}
 #endif
@@ -121,19 +92,6 @@ __db_fdopen(name, arg_flags, ok_flags, mode, fdp)
 }
 
 /*
- * __db_fsync --
- *	Flush a file descriptor.
- *
- * PUBLIC: int __db_fsync __P((int));
- */
-int
-__db_fsync(fd)
-	int fd;
-{
-	return (fsync(fd) ? errno : 0);
-}
-
-/*
  * __db_close --
  *	Close a file descriptor.
  *
@@ -143,5 +101,5 @@ int
 __db_close(fd)
 	int fd;
 {
-	return (close(fd) ? errno : 0);
+	return (__os_close(fd) ? errno : 0);
 }
diff --git a/db2/os/os_rpath.c b/db2/os/os_rpath.c
new file mode 100644
index 0000000000..44fd4ec9f4
--- /dev/null
+++ b/db2/os/os_rpath.c
@@ -0,0 +1,42 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)os_rpath.c	10.2 (Sleepycat) 10/24/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <string.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * __db_rpath --
+ *	Return the last path separator in the path or NULL if none found.
+ *
+ * PUBLIC: char *__db_rpath __P((const char *));
+ */
+char *
+__db_rpath(path)
+	const char *path;
+{
+	const char *s, *last;
+
+	last = NULL;
+	if (PATH_SEPARATOR[1] != '\0') {
+		for (s = path; s[0] != '\0'; ++s)
+			if (strchr(PATH_SEPARATOR, s[0]) != NULL)
+				last = s;
+	} else
+		for (s = path; s[0] != '\0'; ++s)
+			if (s[0] == PATH_SEPARATOR[0])
+				last = s;
+	return ((char *)last);
+}
diff --git a/db2/os/db_os_rw.c b/db2/os/os_rw.c
index 5a6c2196fd..48f7fdc5b1 100644
--- a/db2/os/db_os_rw.c
+++ b/db2/os/os_rw.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_os_rw.c	10.4 (Sleepycat) 6/28/97";
+static const char sccsid[] = "@(#)os_rw.c	10.6 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -19,7 +19,6 @@ static const char sccsid[] = "@(#)db_os_rw.c	10.4 (Sleepycat) 6/28/97";
 #endif
 
 #include "db_int.h"
-#include "os_ext.h"
 
 /*
  * __db_read --
@@ -40,7 +39,7 @@ __db_read(fd, addr, len, nrp)
 
 	for (taddr = addr,
 	    offset = 0; offset < len; taddr += nr, offset += nr) {
-		if ((nr = read(fd, taddr, len - offset)) < 0)
+		if ((nr = __os_read(fd, taddr, len - offset)) < 0)
 			return (errno);
 		if (nr == 0)
 			break;
@@ -68,7 +67,7 @@ __db_write(fd, addr, len, nwp)
 
 	for (taddr = addr,
 	    offset = 0; offset < len; taddr += nw, offset += nw)
-		if ((nw = write(fd, taddr, len - offset)) < 0)
+		if ((nw = __os_write(fd, taddr, len - offset)) < 0)
 			return (errno);
 	*nwp = len;
 	return (0);
diff --git a/db2/os/os_seek.c b/db2/os/os_seek.c
new file mode 100644
index 0000000000..e27044b626
--- /dev/null
+++ b/db2/os/os_seek.c
@@ -0,0 +1,42 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)os_seek.c	10.6 (Sleepycat) 10/25/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * __os_seek --
+ *	Seek to a page/byte offset in the file.
+ *
+ * PUBLIC: int __os_seek __P((int, size_t, db_pgno_t, u_long, int));
+ */
+int
+__os_seek(fd, pgsize, pageno, relative, whence)
+	int fd;
+	size_t pgsize;
+	db_pgno_t pageno;
+	u_long relative;
+	int whence;
+{
+	off_t offset;
+
+	offset = pgsize * pageno + relative;
+
+	return (lseek(fd, offset, whence) == -1 ? errno : 0);
+}
diff --git a/db2/os/db_os_sleep.c b/db2/os/os_sleep.c
index 5591789f51..2d2cb71f6d 100644
--- a/db2/os/db_os_sleep.c
+++ b/db2/os/os_sleep.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_os_sleep.c	10.6 (Sleepycat) 6/28/97";
+static const char sccsid[] = "@(#)os_sleep.c	10.8 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -28,21 +28,18 @@ static const char sccsid[] = "@(#)db_os_sleep.c	10.6 (Sleepycat) 6/28/97";
 #endif
 
 #include "db_int.h"
-#include "os_ext.h"
 
 /*
- * __db_sleep --
+ * __os_sleep --
  *	Yield the processor for a period of time.
  *
- * PUBLIC: int __db_sleep __P((u_long, u_long));
+ * PUBLIC: int __os_sleep __P((u_long, u_long));
  */
 int
-__db_sleep(secs, usecs)
+__os_sleep(secs, usecs)
 	u_long secs, usecs;		/* Seconds and microseconds. */
 {
-#ifndef _WIN32
 	struct timeval t;
-#endif
 
 	/* Don't require that the values be normalized. */
 	for (; usecs >= 1000000; ++secs, usecs -= 1000000);
@@ -51,12 +48,7 @@ __db_sleep(secs, usecs)
 	 * It's important that we yield the processor here so that other
 	 * processes or threads are permitted to run.
 	 */
-#ifdef _WIN32
-	Sleep(secs * 1000 + usecs / 1000);
-	return (0);
-#else
 	t.tv_sec = secs;
 	t.tv_usec = usecs;
 	return (select(0, NULL, NULL, NULL, &t) == -1 ? errno : 0);
-#endif
 }
diff --git a/db2/os/db_os_stat.c b/db2/os/os_stat.c
index 7929b6b754..ee84ab0588 100644
--- a/db2/os/db_os_stat.c
+++ b/db2/os/os_stat.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_os_stat.c	10.6 (Sleepycat) 7/2/97";
+static const char sccsid[] = "@(#)os_stat.c	10.8 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -20,17 +20,16 @@ static const char sccsid[] = "@(#)db_os_stat.c	10.6 (Sleepycat) 7/2/97";
 #endif
 
 #include "db_int.h"
-#include "os_ext.h"
 #include "common_ext.h"
 
 /*
- * __db_exists --
+ * __os_exists --
  *	Return if the file exists.
  *
- * PUBLIC: int __db_exists __P((const char *, int *));
+ * PUBLIC: int __os_exists __P((const char *, int *));
  */
 int
-__db_exists(path, isdirp)
+__os_exists(path, isdirp)
 	const char *path;
 	int *isdirp;
 {
@@ -44,25 +43,22 @@ __db_exists(path, isdirp)
 }
 
 /*
- * __db_stat --
+ * __os_ioinfo --
  *	Return file size and I/O size; abstracted to make it easier
  *	to replace.
  *
- * PUBLIC: int __db_stat __P((DB_ENV *, const char *, int, off_t *, off_t *));
+ * PUBLIC: int __os_ioinfo __P((const char *, int, off_t *, off_t *));
  */
 int
-__db_stat(dbenv, path, fd, sizep, iop)
-	DB_ENV *dbenv;
+__os_ioinfo(path, fd, sizep, iop)
 	const char *path;
 	int fd;
 	off_t *sizep, *iop;
 {
 	struct stat sb;
 
-	if (fstat(fd, &sb) == -1) {
-		__db_err(dbenv, "%s: fstat: %s", path, strerror(errno));
+	if (fstat(fd, &sb) == -1)
 		return (errno);
-	}
 
 	/* Return the size of the file. */
 	if (sizep != NULL)
diff --git a/db2/os/db_os_unlink.c b/db2/os/os_unlink.c
index 872beba3cf..473ce77d39 100644
--- a/db2/os/db_os_unlink.c
+++ b/db2/os/os_unlink.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_os_unlink.c	10.2 (Sleepycat) 6/28/97";
+static const char sccsid[] = "@(#)os_unlink.c	10.4 (Sleepycat) 10/28/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -19,7 +19,6 @@ static const char sccsid[] = "@(#)db_os_unlink.c	10.2 (Sleepycat) 6/28/97";
 #endif
 
 #include "db_int.h"
-#include "os_ext.h"
 
 /*
  * __db_unlink --
@@ -31,5 +30,5 @@ int
 __db_unlink(path)
 	const char *path;
 {
-	return (unlink(path) == -1 ? errno : 0);
+	return (__os_unlink(path) == -1 ? errno : 0);
 }
diff --git a/db2/progs/db_deadlock/db_deadlock.c b/db2/progs/db_deadlock/db_deadlock.c
index ec2b53dee7..473e5b9cb2 100644
--- a/db2/progs/db_deadlock/db_deadlock.c
+++ b/db2/progs/db_deadlock/db_deadlock.c
@@ -11,7 +11,7 @@
 static const char copyright[] =
 "@(#) Copyright (c) 1997\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_deadlock.c	10.15 (Sleepycat) 9/4/97";
+static const char sccsid[] = "@(#)db_deadlock.c	10.16 (Sleepycat) 10/14/97";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -53,13 +53,13 @@ main(argc, argv)
 	DB_ENV *dbenv;
 	u_int32_t atype;
 	time_t now;
-	long seconds;
+	long usecs;
 	int ch, flags, verbose;
 	char *home, *logfile;
 
 	atype = DB_LOCK_DEFAULT;
 	home = logfile = NULL;
-	seconds = 0;
+	usecs = 0;
 	flags = verbose = 0;
 	while ((ch = getopt(argc, argv, "a:h:L:t:vw")) != EOF)
 		switch (ch) {
@@ -85,7 +85,8 @@ main(argc, argv)
 			logfile = optarg;
 			break;
 		case 't':
-			get_long(optarg, 1, LONG_MAX, &seconds);
+			get_long(optarg, 1, LONG_MAX, &usecs);
+			usecs *= 1000000;
 			break;
 		case 'v':
 			verbose = 1;
@@ -103,16 +104,17 @@ main(argc, argv)
 	if (argc != 0)
 		usage();
 
-	if (seconds == 0 && !LF_ISSET(DB_LOCK_CONFLICT)) {
+	if (usecs == 0 && !LF_ISSET(DB_LOCK_CONFLICT)) {
 		warnx("at least one of -t and -w must be specified");
 		usage();
 	}
 
 	/*
-	 * We detect every second when we're running in DB_LOCK_CONFLICT mode.
+	 * We detect every 100ms (100000 us) when we're running in
+	 * DB_LOCK_CONFLICT mode.
 	 */
-	if (seconds == 0)
-		seconds = 1;
+	if (usecs == 0)
+		usecs = 100000;
 
 	/* Initialize the deadlock detector by opening the lock manager. */
 	dbenv = db_init(home, verbose);
@@ -125,14 +127,14 @@ main(argc, argv)
 	while (!interrupted) {
 		if (dbenv->db_verbose != 0) {
 			time(&now);
-			__db_err(dbenv, "Running at %s", ctime(&now));
+			__db_err(dbenv, "Running at %.24s", ctime(&now));
 		}
 
 		if ((errno = lock_detect(dbenv->lk_info, flags, atype)) != 0)
 			break;
 
-		/* Make a pass every "seconds" seconds. */
-		(void)__db_sleep(seconds, 0);
+		/* Make a pass every "usecs" usecs. */
+		(void)__db_sleep(0, usecs);
 	}
 
 	if (logfile != NULL)
diff --git a/db2/progs/db_dump/db_dump.c b/db2/progs/db_dump/db_dump.c
index a0f60c69a6..c09719059b 100644
--- a/db2/progs/db_dump/db_dump.c
+++ b/db2/progs/db_dump/db_dump.c
@@ -31,6 +31,8 @@ static const char sccsid[] = "@(#)db_dump.c	10.16 (Sleepycat) 8/27/97";
 #include "hash.h"
 #include "clib_ext.h"
 
+#undef stat
+
 void	configure __P((char *));
 DB_ENV *db_init __P((char *));
 void	dbt_dump __P((DBT *));
diff --git a/db2/progs/db_load/db_load.c b/db2/progs/db_load/db_load.c
index a1ebfa8a11..6597f10e10 100644
--- a/db2/progs/db_load/db_load.c
+++ b/db2/progs/db_load/db_load.c
@@ -11,7 +11,7 @@
 static const char copyright[] =
 "@(#) Copyright (c) 1997\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_load.c	10.13 (Sleepycat) 9/15/97";
+static const char sccsid[] = "@(#)db_load.c	10.14 (Sleepycat) 10/19/97";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -352,7 +352,7 @@ dbt_rprint(dbtp)
 				escape = 1;
 				continue;
 			}
-		if (++len >= dbtp->ulen - 10) {
+		if (len >= dbtp->ulen - 10) {
 			dbtp->ulen *= 2;
 			if ((dbtp->data =
 			    (void *)realloc(dbtp->data, dbtp->ulen)) == NULL) {
@@ -361,6 +361,7 @@ dbt_rprint(dbtp)
 			}
 			p = (u_int8_t *)dbtp->data + len;
 		}
+		++len;
 		*p++ = c1;
 	}
 	dbtp->size = len;
@@ -420,7 +421,7 @@ dbt_rdump(dbtp)
 		}
 		if ((c2 = getchar()) == EOF)
 			err(1, "unexpected end of key/data pair");
-		if (++len >= dbtp->ulen - 10) {
+		if (len >= dbtp->ulen - 10) {
 			dbtp->ulen *= 2;
 			if ((dbtp->data =
 			    (void *)realloc(dbtp->data, dbtp->ulen)) == NULL) {
@@ -429,6 +430,7 @@ dbt_rdump(dbtp)
 			}
 			p = (u_int8_t *)dbtp->data + len;
 		}
+		++len;
 		*p++ = digitize(c1) << 4 | digitize(c2);
 	}
 	dbtp->size = len;
diff --git a/db2/progs/db_recover/db_recover.c b/db2/progs/db_recover/db_recover.c
index 55b9b49a79..5a39d320f8 100644
--- a/db2/progs/db_recover/db_recover.c
+++ b/db2/progs/db_recover/db_recover.c
@@ -11,7 +11,7 @@
 static const char copyright[] =
 "@(#) Copyright (c) 1997\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_recover.c	10.15 (Sleepycat) 9/21/97";
+static const char sccsid[] = "@(#)db_recover.c	10.16 (Sleepycat) 10/28/97";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -82,7 +82,7 @@ main(argc, argv)
 		    (u_long)dbenv->tx_info->region->last_ckp.offset);
 	}
 
-	exit (db_appexit(dbenv));
+	return (db_appexit(dbenv));
 }
 
 DB_ENV *
diff --git a/db2/progs/db_stat/db_stat.c b/db2/progs/db_stat/db_stat.c
index 1a989f4df3..b1f1615fa9 100644
--- a/db2/progs/db_stat/db_stat.c
+++ b/db2/progs/db_stat/db_stat.c
@@ -11,7 +11,7 @@
 static const char copyright[] =
 "@(#) Copyright (c) 1997\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_stat.c	8.20 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)db_stat.c	8.26 (Sleepycat) 11/2/97";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -29,18 +29,22 @@ static const char sccsid[] = "@(#)db_stat.c	8.20 (Sleepycat) 8/27/97";
 #include "db_int.h"
 #include "clib_ext.h"
 
+#undef stat
+
+#define	MB	1048576
 #define	DIVIDER	"=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
 
-typedef enum { T_NOTSET, T_DB, T_MPOOL, T_TXN } test_t;
+typedef enum { T_NOTSET, T_DB, T_LOG, T_MPOOL, T_TXN } test_t;
 
-void	bstat __P((DB *));
+void	btree_stats __P((DB *));
 DB_ENV *db_init __P((char *, test_t));
-void	hstat __P((DB *));
+void	hash_stats __P((DB *));
 int	main __P((int, char *[]));
-void	mstat __P((DB_ENV *));
+void	log_stats __P((DB_ENV *));
+void	mpool_stats __P((DB_ENV *));
 void	onint __P((int));
 void	prflags __P((u_int32_t, const FN *));
-void	tstat __P((DB_ENV *));
+void	txn_stats __P((DB_ENV *));
 int	txn_compare __P((const void *, const void *));
 void	usage __P((void));
 
@@ -63,7 +67,7 @@ main(argc, argv)
 
 	ttype = T_NOTSET;
 	db = home = NULL;
-	while ((ch = getopt(argc, argv, "d:h:mt")) != EOF)
+	while ((ch = getopt(argc, argv, "d:h:lmt")) != EOF)
 		switch (ch) {
 		case 'd':
 			db = optarg;
@@ -72,6 +76,9 @@ main(argc, argv)
 		case 'h':
 			home = optarg;
 			break;
+		case 'l':
+			ttype = T_LOG;
+			break;
 		case 'm':
 			ttype = T_MPOOL;
 			break;
@@ -100,10 +107,10 @@ main(argc, argv)
 		switch (dbp->type) {
 		case DB_BTREE:
 		case DB_RECNO:
-			bstat(dbp);
+			btree_stats(dbp);
 			break;
 		case DB_HASH:
-			hstat(dbp);
+			hash_stats(dbp);
 			break;
 		case DB_UNKNOWN:
 			abort();		/* Impossible. */
@@ -111,11 +118,14 @@ main(argc, argv)
 		}
 		(void)dbp->close(dbp, 0);
 		break;
+	case T_LOG:
+		log_stats(dbenv);
+		break;
 	case T_MPOOL:
-		mstat(dbenv);
+		mpool_stats(dbenv);
 		break;
 	case T_TXN:
-		tstat(dbenv);
+		txn_stats(dbenv);
 		break;
 	case T_NOTSET:
 		abort();			/* Impossible. */
@@ -133,11 +143,11 @@ main(argc, argv)
 }
 
 /*
- * bstat --
+ * btree_stats --
  *	Display btree/recno statistics.
  */
 void
-bstat(dbp)
+btree_stats(dbp)
 	DB *dbp;
 {
 	static const FN fn[] = {
@@ -156,6 +166,8 @@ bstat(dbp)
     (t == 0 ? 0 :							\
     (((double)((t * sp->bt_pagesize) - f) / (t * sp->bt_pagesize)) * 100))
 
+	printf("%#lx\tBtree magic number.\n", (u_long)sp->bt_magic);
+	printf("%lu\tBtree version number.\n", (u_long)sp->bt_version);
 	prflags(sp->bt_flags, fn);
 	if (dbp->type == DB_BTREE) {
 #ifdef NOT_IMPLEMENTED
@@ -213,22 +225,56 @@ printf("%lu\tNumber of bytes free in tree overflow pages (%.0f%% ff).\n",
 }
 
 /*
- * hstat --
+ * hash_stats --
  *	Display hash statistics.
  */
 void
-hstat(dbp)
+hash_stats(dbp)
 	DB *dbp;
 {
 	return;
 }
 
 /*
- * mstat --
+ * log_stats --
+ *	Display log statistics.
+ */
+void
+log_stats(dbenv)
+	DB_ENV *dbenv;
+{
+	DB_LOG_STAT *sp;
+
+	if (log_stat(dbenv->lg_info, &sp, NULL))
+		err(1, NULL);
+
+	printf("%#lx\tLog magic number.\n", (u_long)sp->st_magic);
+	printf("%lu\tLog version number.\n", (u_long)sp->st_version);
+	printf("%#o\tLog file mode.\n", sp->st_mode);
+	if (sp->st_lg_max % MB == 0)
+		printf("%luMb\tLog file size.\n", (u_long)sp->st_lg_max / MB);
+	else if (sp->st_lg_max % 1024 == 0)
+		printf("%luKb\tLog file size.\n", (u_long)sp->st_lg_max / 1024);
+	else
+		printf("%lu\tLog file size.\n", (u_long)sp->st_lg_max);
+	printf("%luMb\tLog bytes written (+%lu bytes).\n",
+	    (u_long)sp->st_w_mbytes, (u_long)sp->st_w_bytes);
+	printf("%luMb\tLog bytes written since last checkpoint (+%lu bytes).\n",
+	    (u_long)sp->st_wc_mbytes, (u_long)sp->st_wc_bytes);
+	printf("%lu\tTotal log file writes.\n", (u_long)sp->st_wcount);
+	printf("%lu\tTotal log file flushes.\n", (u_long)sp->st_scount);
+	printf("%lu\tThe number of region locks granted without waiting.\n",
+	    (u_long)sp->st_region_nowait);
+	printf("%lu\tThe number of region locks granted after waiting.\n",
+	    (u_long)sp->st_region_wait);
+}
+
+/*
+ * mpool_stats --
  *	Display mpool statistics.
  */
 void
-mstat(dbenv)
+mpool_stats(dbenv)
 	DB_ENV *dbenv;
 {
 	DB_MPOOL_FSTAT **fsp;
@@ -239,62 +285,75 @@ mstat(dbenv)
 
 	printf("%lu\tCache size (%luK).\n",
 	    (u_long)gsp->st_cachesize, (u_long)gsp->st_cachesize / 1024);
-	printf("%lu\tRequested pages found in the cache", gsp->st_cache_hit);
+	printf("%lu\tRequested pages found in the cache",
+	    (u_long)gsp->st_cache_hit);
 	if (gsp->st_cache_hit + gsp->st_cache_miss != 0)
 		printf(" (%.0f%%)", ((double)gsp->st_cache_hit /
 		    (gsp->st_cache_hit + gsp->st_cache_miss)) * 100);
 	printf(".\n");
 	printf("%lu\tRequested pages mapped into the process' address space.\n",
-	    gsp->st_map);
+	    (u_long)gsp->st_map);
 	printf("%lu\tRequested pages not found in the cache.\n",
-	    gsp->st_cache_miss);
-	printf("%lu\tPages created in the cache.\n", gsp->st_page_create);
-	printf("%lu\tPages read into the cache.\n", gsp->st_page_in);
+	    (u_long)gsp->st_cache_miss);
+	printf("%lu\tPages created in the cache.\n",
+	    (u_long)gsp->st_page_create);
+	printf("%lu\tPages read into the cache.\n", (u_long)gsp->st_page_in);
 	printf("%lu\tPages written from the cache to the backing file.\n",
-	    gsp->st_page_out);
-	printf("%lu\tRead-only pages forced from the cache.\n",
-	    gsp->st_ro_evict);
-	printf("%lu\tRead-write pages forced from the cache.\n",
-	    gsp->st_rw_evict);
+	    (u_long)gsp->st_page_out);
+	printf("%lu\tClean pages forced from the cache.\n",
+	    (u_long)gsp->st_ro_evict);
+	printf("%lu\tDirty pages forced from the cache.\n",
+	    (u_long)gsp->st_rw_evict);
+	printf("%lu\tDirty buffers written by trickle-sync thread.\n",
+	    (u_long)gsp->st_page_trickle);
+	printf("%lu\tCurrent clean buffer count.\n",
+	    (u_long)gsp->st_page_clean);
+	printf("%lu\tCurrent dirty buffer count.\n",
+	    (u_long)gsp->st_page_dirty);
 	printf("%lu\tNumber of hash buckets used for page location.\n",
-	    gsp->st_hash_buckets);
+	    (u_long)gsp->st_hash_buckets);
 	printf("%lu\tTotal number of times hash chains searched for a page.\n",
-	    gsp->st_hash_searches);
+	    (u_long)gsp->st_hash_searches);
 	printf("%lu\tThe longest hash chain searched for a page.\n",
-	    gsp->st_hash_longest);
+	    (u_long)gsp->st_hash_longest);
 	printf(
 	    "%lu\tTotal number of hash buckets examined for page location.\n",
-	    gsp->st_hash_examined);
+	    (u_long)gsp->st_hash_examined);
+	printf("%lu\tThe number of region locks granted without waiting.\n",
+	    (u_long)gsp->st_region_nowait);
+	printf("%lu\tThe number of region locks granted after waiting.\n",
+	    (u_long)gsp->st_region_wait);
 
 	for (; fsp != NULL && *fsp != NULL; ++fsp) {
 		printf("%s\n", DIVIDER);
 		printf("%s\n", (*fsp)->file_name);
 		printf("%lu\tPage size.\n", (u_long)(*fsp)->st_pagesize);
 		printf("%lu\tRequested pages found in the cache",
-		    (*fsp)->st_cache_hit);
+		    (u_long)(*fsp)->st_cache_hit);
 		if ((*fsp)->st_cache_hit + (*fsp)->st_cache_miss != 0)
 			printf(" (%.0f%%)", ((double)(*fsp)->st_cache_hit /
 			    ((*fsp)->st_cache_hit + (*fsp)->st_cache_miss)) *
 			    100);
 		printf(".\n");
 	printf("%lu\tRequested pages mapped into the process' address space.\n",
-		    (*fsp)->st_map);
+		    (u_long)(*fsp)->st_map);
 		printf("%lu\tRequested pages not found in the cache.\n",
-		    (*fsp)->st_cache_miss);
+		    (u_long)(*fsp)->st_cache_miss);
 		printf("%lu\tPages created in the cache.\n",
-		    (*fsp)->st_page_create);
-		printf("%lu\tPages read into the cache.\n", (*fsp)->st_page_in);
+		    (u_long)(*fsp)->st_page_create);
+		printf("%lu\tPages read into the cache.\n",
+		    (u_long)(*fsp)->st_page_in);
 	printf("%lu\tPages written from the cache to the backing file.\n",
-		    (*fsp)->st_page_out);
+		    (u_long)(*fsp)->st_page_out);
 	}
 }
 
 /*
- * tstat --
+ * txn_stats --
  *	Display transaction statistics.
  */
 void
-tstat(dbenv)
+txn_stats(dbenv)
 	DB_ENV *dbenv;
 {
 	DB_TXN_STAT *tstat;
@@ -311,7 +370,7 @@ tstat(dbenv)
 	p = tstat->st_pending_ckp.file == 0 ?
 	    "No pending checkpoint LSN." :
 	    "File/offset for last pending checkpoint LSN.";
-	printf("%lu/%lu\t%s.\n",
+	printf("%lu/%lu\t%s\n",
 	    (u_long)tstat->st_pending_ckp.file,
 	    (u_long)tstat->st_pending_ckp.offset, p);
 	if (tstat->st_time_ckp == 0)
@@ -391,27 +450,58 @@ db_init(home, ttype)
 	DB_ENV *dbenv;
 	int flags;
 
+	if ((dbenv = (DB_ENV *)malloc(sizeof(DB_ENV))) == NULL) {
+		errno = ENOMEM;
+		err(1, NULL);
+	}
+
+	/*
+	 * Try and use the shared regions when reporting statistics on the
+	 * DB databases, so our information is as up-to-date as possible,
+	 * even if the mpool cache hasn't been flushed.  If that fails, we
+	 * turn off the DB_INIT_MPOOL flag and try again.
+	 */
 	flags = DB_USE_ENVIRON;
 	switch (ttype) {
+	case T_DB:
 	case T_MPOOL:
 		flags |= DB_INIT_MPOOL;
 		break;
+	case T_LOG:
+		flags |= DB_INIT_LOG;
+		break;
 	case T_TXN:
 		flags |= DB_INIT_TXN;
 		break;
-	default:
-		break;
+	case T_NOTSET:
+		abort();
+		/* NOTREACHED */
 	}
 
-	if ((dbenv = (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) {
-		errno = ENOMEM;
-		err(1, NULL);
+	/*
+	 * If it works, we're done.  Set the error output options so that
+	 * future errors are correctly reported.
+	 */
+	memset(dbenv, 0, sizeof(*dbenv));
+	if ((errno = db_appinit(home, NULL, dbenv, flags)) == 0) {
+		dbenv->db_errfile = stderr;
+		dbenv->db_errpfx = progname;
+		return (dbenv);
 	}
+
+	/* Turn off the DB_INIT_MPOOL flag if it's a database. */
+	if (ttype == T_DB)
+		flags &= ~DB_INIT_MPOOL;
+
+	/* Set the error output options -- this time we want a message. */
+	memset(dbenv, 0, sizeof(*dbenv));
 	dbenv->db_errfile = stderr;
 	dbenv->db_errpfx = progname;
 
+	/* Try again, and it's fatal if we fail. */
 	if ((errno = db_appinit(home, NULL, dbenv, flags)) != 0)
 		err(1, "db_appinit");
+
 	return (dbenv);
 }
 
@@ -430,6 +520,6 @@ onint(signo)
 void
 usage()
 {
-	fprintf(stderr, "usage: db_stat [-mt] [-d file] [-h home]\n");
+	fprintf(stderr, "usage: db_stat [-mlt] [-d file] [-h home]\n");
 	exit (1);
 }
diff --git a/db2/txn/txn.c b/db2/txn/txn.c
index 9a0d626c3e..55423f0470 100644
--- a/db2/txn/txn.c
+++ b/db2/txn/txn.c
@@ -43,7 +43,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)txn.c	10.30 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)txn.c	10.35 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 
@@ -187,7 +187,7 @@ retry1:	if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE,
 	}
 
 	/* Now, create the transaction manager structure and set its fields. */
-	if ((tmgrp = (DB_TXNMGR *)malloc(sizeof(DB_TXNMGR))) == NULL) {
+	if ((tmgrp = (DB_TXNMGR *)__db_malloc(sizeof(DB_TXNMGR))) == NULL) {
 		__db_err(dbenv, "txn_open: %s", strerror(ENOMEM));
 		ret = ENOMEM;
 		goto out;
@@ -205,7 +205,7 @@ retry1:	if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE,
 	TAILQ_INIT(&tmgrp->txn_chain);
 	if (LF_ISSET(DB_THREAD)) {
 		LOCK_TXNREGION(tmgrp);
-		if ((ret = __db_shalloc(tmgrp->mem, sizeof(db_mutex_t), 
+		if ((ret = __db_shalloc(tmgrp->mem, sizeof(db_mutex_t),
 		    MUTEX_ALIGNMENT, &tmgrp->mutexp)) == 0)
 			__db_mutex_init(tmgrp->mutexp, -1);
 		UNLOCK_TXNREGION(tmgrp);
@@ -225,7 +225,7 @@ out:	if (txn_regionp != NULL)
 			__db_shalloc_free(tmgrp->mem, tmgrp->mutexp);
 			UNLOCK_TXNREGION(tmgrp);
 		}
-		free(tmgrp);
+		__db_free(tmgrp);
 	}
 	return (ret);
 }
@@ -254,7 +254,7 @@ txn_begin(tmgrp, parent, txnpp)
 	if ((ret = __db_shalloc(tmgrp->mem, sizeof(TXN_DETAIL), 0, &txnp)) != 0
 	    && ret == ENOMEM && (ret = __txn_grow_region(tmgrp)) == 0)
 	    	ret = __db_shalloc(tmgrp->mem, sizeof(TXN_DETAIL), 0, &txnp);
-		
+
 	if (ret != 0)
 		goto err;
 
@@ -262,7 +262,7 @@ txn_begin(tmgrp, parent, txnpp)
 	if (tmgrp->region->last_txnid == TXN_INVALID)
 		return (EINVAL);
 
-	if ((retp = (DB_TXN *)malloc(sizeof(DB_TXN))) == NULL) {
+	if ((retp = (DB_TXN *)__db_malloc(sizeof(DB_TXN))) == NULL) {
 		__db_err(tmgrp->dbenv, "txn_begin : %s", strerror(ENOMEM));
 		ret = ENOMEM;
 		goto err1;
@@ -297,7 +297,7 @@ txn_begin(tmgrp, parent, txnpp)
 		    txnp, links, __txn_detail);
 		__db_shalloc_free(tmgrp->mem, txnp);
 		UNLOCK_TXNREGION(tmgrp);
-		free (retp);
+		__db_free(retp);
 		return (ret);
 	}
 
@@ -433,7 +433,7 @@ txn_close(tmgrp)
 		ret = t_ret;
 
 	if (ret == 0)
-		free (tmgrp);
+		__db_free(tmgrp);
 
 	return (ret);
 }
@@ -561,7 +561,7 @@ __txn_undo(txnp)
 			ret =
 			    mgr->recover(logp, &rdbt, &key_lsn, TXN_UNDO, NULL);
 			if (F_ISSET(logp, DB_AM_THREAD) && rdbt.data != NULL) {
-				free(rdbt.data);
+				__db_free(rdbt.data);
 				rdbt.data = NULL;
 			}
 		}
@@ -590,7 +590,7 @@ txn_checkpoint(mgr, kbytes, minutes)
 	TXN_DETAIL *txnp;
 	DB_LSN ckp_lsn, last_ckp;
 	DB_LOG *dblp;
-	u_int32_t bytes_written;
+	u_int32_t kbytes_written;
 	time_t last_ckp_time, now;
 	int ret;
 
@@ -616,10 +616,12 @@ txn_checkpoint(mgr, kbytes, minutes)
 	if (kbytes != 0) {
 		dblp = mgr->dbenv->lg_info;
 		LOCK_LOGREGION(dblp);
-		bytes_written = dblp->lp->written;
+		kbytes_written =
+		    dblp->lp->stat.st_wc_mbytes * 1024 +
+		    dblp->lp->stat.st_wc_bytes / 1024;
 		ckp_lsn = dblp->lp->lsn;
 		UNLOCK_LOGREGION(dblp);
-		if (bytes_written >= (u_int32_t)(kbytes * 1024))
+		if (kbytes_written >= (u_int32_t)kbytes)
 			goto do_ckp;
 	}
 
@@ -726,12 +728,14 @@ __txn_grow_region(tp)
 	DB_TXNMGR *tp;
 {
 	size_t incr;
+	off_t mutex_offset;
 	u_int32_t oldmax;
 	u_int8_t *curaddr;
 	int ret;
 
 	oldmax = tp->region->maxtxns;
 	incr = oldmax * sizeof(DB_TXN);
+	mutex_offset = (u_int8_t *)tp->mutexp - (u_int8_t *)tp->region;
 
 	if ((ret = __db_rgrow(tp->dbenv, tp->fd, incr)) != 0)
 		return (ret);
@@ -744,6 +748,7 @@ __txn_grow_region(tp)
 	curaddr = (u_int8_t *)tp->region + tp->reg_size;
 	tp->mem = &tp->region[1];
 	tp->reg_size += incr;
+	tp->mutexp = (db_mutex_t *)((u_int8_t *)tp->region + mutex_offset);
 
 	*((size_t *)curaddr) = incr - sizeof(size_t);
 	curaddr += sizeof(size_t);
@@ -776,7 +781,7 @@ txn_stat(mgr, statp, db_malloc)
 	 */
 	nbytes = sizeof(DB_TXN_STAT) + sizeof(DB_TXN_ACTIVE) * (nactive + 200);
 	if (db_malloc == NULL)
-		stats = (DB_TXN_STAT *)malloc(nbytes);
+		stats = (DB_TXN_STAT *)__db_malloc(nbytes);
 	else
 		stats = (DB_TXN_STAT *)db_malloc(nbytes);
 
diff --git a/db2/txn/txn_auto.c b/db2/txn/txn_auto.c
index baef7333c7..9edbc03eab 100644
--- a/db2/txn/txn_auto.c
+++ b/db2/txn/txn_auto.c
@@ -46,7 +46,7 @@ int __txn_regop_log(logp, txnid, ret_lsnp, flags,
 		lsnp = &txnid->last_lsn;
 	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
 	    + sizeof(opcode);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -65,7 +65,7 @@ int __txn_regop_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -103,7 +103,7 @@ __txn_regop_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->prev_lsn.offset);
 	printf("\topcode: %lu\n", (u_long)argp->opcode);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -118,7 +118,7 @@ __txn_regop_read(recbuf, argpp)
 	__txn_regop_args *argp;
 	u_int8_t *bp;
 
-	argp = (__txn_regop_args *)malloc(sizeof(__txn_regop_args) +
+	argp = (__txn_regop_args *)__db_malloc(sizeof(__txn_regop_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
@@ -167,7 +167,7 @@ int __txn_ckp_log(logp, txnid, ret_lsnp, flags,
 	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
 	    + sizeof(*ckp_lsn)
 	    + sizeof(*last_ckp);
-	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
 	bp = logrec.data;
@@ -194,7 +194,7 @@ int __txn_ckp_log(logp, txnid, ret_lsnp, flags,
 	ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
 	if (txnid != NULL)
 		txnid->last_lsn = *ret_lsnp;
-	free(logrec.data);
+	__db_free(logrec.data);
 	return (ret);
 }
 
@@ -235,7 +235,7 @@ __txn_ckp_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tlast_ckp: [%lu][%lu]\n",
 	    (u_long)argp->last_ckp.file, (u_long)argp->last_ckp.offset);
 	printf("\n");
-	free(argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -250,7 +250,7 @@ __txn_ckp_read(recbuf, argpp)
 	__txn_ckp_args *argp;
 	u_int8_t *bp;
 
-	argp = (__txn_ckp_args *)malloc(sizeof(__txn_ckp_args) +
+	argp = (__txn_ckp_args *)__db_malloc(sizeof(__txn_ckp_args) +
 	    sizeof(DB_TXN));
 	if (argp == NULL)
 		return (ENOMEM);
diff --git a/db2/txn/txn_rec.c b/db2/txn/txn_rec.c
index c172d874d9..679cffb567 100644
--- a/db2/txn/txn_rec.c
+++ b/db2/txn/txn_rec.c
@@ -40,7 +40,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)txn_rec.c	10.5 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)txn_rec.c	10.6 (Sleepycat) 10/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -97,7 +97,7 @@ __txn_regop_recover(logp, dbtp, lsnp, redo, info)
 	}
 
 	*lsnp = argp->prev_lsn;
-	free (argp);
+	__db_free(argp);
 	return (0);
 }
 
@@ -126,6 +126,6 @@ __txn_ckp_recover(logp, dbtp, lsnp, redo, info)
 		return (ret);
 
 	*lsnp = argp->last_ckp;
-	free(argp);
+	__db_free(argp);
 	return (DB_TXN_CKP);
 }