about summary refs log tree commit diff
path: root/db2
diff options
context:
space:
mode:
Diffstat (limited to 'db2')
-rw-r--r--db2/Makefile2
-rw-r--r--db2/btree/bt_close.c19
-rw-r--r--db2/btree/bt_compare.c23
-rw-r--r--db2/btree/bt_conv.c4
-rw-r--r--db2/btree/bt_cursor.c381
-rw-r--r--db2/btree/bt_delete.c92
-rw-r--r--db2/btree/bt_open.c14
-rw-r--r--db2/btree/bt_page.c28
-rw-r--r--db2/btree/bt_put.c176
-rw-r--r--db2/btree/bt_rec.c15
-rw-r--r--db2/btree/bt_recno.c230
-rw-r--r--db2/btree/bt_rsearch.c98
-rw-r--r--db2/btree/bt_search.c30
-rw-r--r--db2/btree/bt_split.c22
-rw-r--r--db2/btree/bt_stat.c9
-rw-r--r--db2/btree/btree.src10
-rw-r--r--db2/btree/btree_auto.c186
-rw-r--r--db2/clib/getlong.c4
-rw-r--r--db2/common/db_appinit.c183
-rw-r--r--db2/common/db_apprec.c49
-rw-r--r--db2/common/db_byteorder.c4
-rw-r--r--db2/common/db_err.c137
-rw-r--r--db2/common/db_log2.c7
-rw-r--r--db2/common/db_region.c1131
-rw-r--r--db2/common/db_salloc.c41
-rw-r--r--db2/common/db_shash.c82
-rw-r--r--db2/config.h31
-rw-r--r--db2/db.h245
-rw-r--r--db2/db/db.c113
-rw-r--r--db2/db/db.src9
-rw-r--r--db2/db/db_auto.c227
-rw-r--r--db2/db/db_conv.c25
-rw-r--r--db2/db/db_dispatch.c117
-rw-r--r--db2/db/db_dup.c21
-rw-r--r--db2/db/db_overflow.c17
-rw-r--r--db2/db/db_pr.c92
-rw-r--r--db2/db/db_rec.c51
-rw-r--r--db2/db/db_ret.c7
-rw-r--r--db2/db/db_thread.c6
-rw-r--r--db2/db185/db185.c28
-rw-r--r--db2/db185/db185_int.h28
-rw-r--r--db2/db_185.h28
-rw-r--r--db2/db_int.h132
-rw-r--r--db2/dbm/dbm.c67
-rw-r--r--db2/hash/hash.c92
-rw-r--r--db2/hash/hash.src6
-rw-r--r--db2/hash/hash_auto.c186
-rw-r--r--db2/hash/hash_conv.c4
-rw-r--r--db2/hash/hash_debug.c10
-rw-r--r--db2/hash/hash_dup.c19
-rw-r--r--db2/hash/hash_func.c4
-rw-r--r--db2/hash/hash_page.c120
-rw-r--r--db2/hash/hash_rec.c36
-rw-r--r--db2/hash/hash_stat.c5
-rw-r--r--db2/include/btree.h62
-rw-r--r--db2/include/btree_ext.h22
-rw-r--r--db2/include/clib_ext.h3
-rw-r--r--db2/include/common_ext.h47
-rw-r--r--db2/include/cxx_int.h4
-rw-r--r--db2/include/db.h.src245
-rw-r--r--db2/include/db_185.h.src28
-rw-r--r--db2/include/db_am.h4
-rw-r--r--db2/include/db_auto.h3
-rw-r--r--db2/include/db_cxx.h84
-rw-r--r--db2/include/db_dispatch.h24
-rw-r--r--db2/include/db_ext.h14
-rw-r--r--db2/include/db_int.h.src132
-rw-r--r--db2/include/db_page.h12
-rw-r--r--db2/include/db_shash.h4
-rw-r--r--db2/include/db_swap.h6
-rw-r--r--db2/include/hash.h4
-rw-r--r--db2/include/hash_ext.h18
-rw-r--r--db2/include/lock.h52
-rw-r--r--db2/include/lock_ext.h9
-rw-r--r--db2/include/log.h27
-rw-r--r--db2/include/log_ext.h4
-rw-r--r--db2/include/mp.h45
-rw-r--r--db2/include/mp_ext.h10
-rw-r--r--db2/include/mutex_ext.h2
-rw-r--r--db2/include/os_ext.h18
-rw-r--r--db2/include/os_func.h79
-rw-r--r--db2/include/queue.h2
-rw-r--r--db2/include/shqueue.h4
-rw-r--r--db2/include/txn.h20
-rw-r--r--db2/lock/lock.c666
-rw-r--r--db2/lock/lock_conflict.c4
-rw-r--r--db2/lock/lock_deadlock.c30
-rw-r--r--db2/lock/lock_region.c726
-rw-r--r--db2/lock/lock_util.c93
-rw-r--r--db2/log/log.c203
-rw-r--r--db2/log/log.src35
-rw-r--r--db2/log/log_archive.c35
-rw-r--r--db2/log/log_auto.c35
-rw-r--r--db2/log/log_compare.c4
-rw-r--r--db2/log/log_findckp.c24
-rw-r--r--db2/log/log_get.c29
-rw-r--r--db2/log/log_put.c42
-rw-r--r--db2/log/log_rec.c10
-rw-r--r--db2/log/log_register.c29
-rw-r--r--db2/mp/mp_bh.c79
-rw-r--r--db2/mp/mp_fget.c359
-rw-r--r--db2/mp/mp_fopen.c128
-rw-r--r--db2/mp/mp_fput.c64
-rw-r--r--db2/mp/mp_fset.c8
-rw-r--r--db2/mp/mp_open.c41
-rw-r--r--db2/mp/mp_pr.c294
-rw-r--r--db2/mp/mp_region.c229
-rw-r--r--db2/mp/mp_sync.c74
-rw-r--r--db2/mutex/68020.gcc3
-rw-r--r--db2/mutex/mutex.c74
-rw-r--r--db2/mutex/parisc.gcc18
-rw-r--r--db2/mutex/parisc.hp4
-rw-r--r--db2/mutex/sco.cc24
-rw-r--r--db2/os/os_abs.c4
-rw-r--r--db2/os/os_alloc.c44
-rw-r--r--db2/os/os_config.c120
-rw-r--r--db2/os/os_dir.c8
-rw-r--r--db2/os/os_fid.c5
-rw-r--r--db2/os/os_fsync.c19
-rw-r--r--db2/os/os_map.c440
-rw-r--r--db2/os/os_oflags.c56
-rw-r--r--db2/os/os_open.c13
-rw-r--r--db2/os/os_rpath.c4
-rw-r--r--db2/os/os_rw.c4
-rw-r--r--db2/os/os_seek.c16
-rw-r--r--db2/os/os_sleep.c7
-rw-r--r--db2/os/os_spin.c41
-rw-r--r--db2/os/os_stat.c27
-rw-r--r--db2/os/os_unlink.c4
-rw-r--r--db2/progs/db_archive/db_archive.c9
-rw-r--r--db2/progs/db_checkpoint/db_checkpoint.c47
-rw-r--r--db2/progs/db_deadlock/db_deadlock.c12
-rw-r--r--db2/progs/db_dump/db_dump.c79
-rw-r--r--db2/progs/db_dump185/db_dump185.c82
-rw-r--r--db2/progs/db_load/db_load.c193
-rw-r--r--db2/progs/db_printlog/db_printlog.c6
-rw-r--r--db2/progs/db_recover/db_recover.c16
-rw-r--r--db2/progs/db_stat/db_stat.c335
-rw-r--r--db2/txn/txn.c349
-rw-r--r--db2/txn/txn.src8
-rw-r--r--db2/txn/txn_auto.c36
-rw-r--r--db2/txn/txn_rec.c35
142 files changed, 6504 insertions, 4570 deletions
diff --git a/db2/Makefile b/db2/Makefile
index 35c67dadfe..cc530a8a71 100644
--- a/db2/Makefile
+++ b/db2/Makefile
@@ -65,7 +65,7 @@ libdb-routines := bt_close bt_compare bt_conv bt_cursor bt_delete \
 	os_spin db_overflow db_pr db_rec db_region db_ret db_salloc \
 	db_shash db_thread hash hash_auto hash_conv hash_debug \
 	hash_dup hash_func hash_page hash_rec hash_stat lock \
-	lock_conflict lock_deadlock lock_util log log_archive \
+	lock_conflict lock_deadlock lock_region lock_util log log_archive \
 	log_auto log_compare log_findckp log_get log_put log_rec \
 	log_register mp_bh mp_fget mp_fopen mp_fput mp_fset \
 	mp_open mp_pr mp_region mp_sync mutex txn txn_auto \
diff --git a/db2/btree/bt_close.c b/db2/btree/bt_close.c
index ecccc9fe08..9df5c717e6 100644
--- a/db2/btree/bt_close.c
+++ b/db2/btree/bt_close.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,18 +47,13 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_close.c	10.25 (Sleepycat) 1/6/98";
+static const char sccsid[] = "@(#)bt_close.c	10.32 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/mman.h>
 
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
@@ -104,12 +99,12 @@ __bam_close(dbp)
  * __bam_sync --
  *	Sync the btree to disk.
  *
- * PUBLIC: int __bam_sync __P((DB *, int));
+ * PUBLIC: int __bam_sync __P((DB *, u_int32_t));
  */
 int
 __bam_sync(argdbp, flags)
 	DB *argdbp;
-	int flags;
+	u_int32_t flags;
 {
 	DB *dbp;
 	int ret;
@@ -146,7 +141,7 @@ __bam_upstat(dbp)
 	BTMETA *meta;
 	DB_LOCK metalock;
 	db_pgno_t pgno;
-	int flags, ret;
+	u_int32_t flags;
 
 	/*
 	 * We use a no-op log call to log the update of the statistics onto the
@@ -166,8 +161,8 @@ __bam_upstat(dbp)
 	if (__bam_pget(dbp, (PAGE **)&meta, &pgno, 0) == 0) {
 		/* Log the change. */
 		if (DB_LOGGING(dbp) &&
-		    (ret = __db_noop_log(dbp->dbenv->lg_info, dbp->txn,
-		    &LSN(meta), 0)) == 0)
+		    __db_noop_log(dbp->dbenv->lg_info, dbp->txn, &LSN(meta), 0,
+		    dbp->log_fileid, PGNO_METADATA, &LSN(meta)) != 0)
 			goto err;
 
 		/* Update the statistics. */
diff --git a/db2/btree/bt_compare.c b/db2/btree/bt_compare.c
index a68b1fa891..5c6d1e38ca 100644
--- a/db2/btree/bt_compare.c
+++ b/db2/btree/bt_compare.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,14 +47,12 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_compare.c	10.4 (Sleepycat) 9/3/97";
+static const char sccsid[] = "@(#)bt_compare.c	10.9 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -106,7 +104,6 @@ __bam_cmp(dbp, k1, e)
 		if (B_TYPE(bk->type) == B_OVERFLOW)
 			bo = (BOVERFLOW *)bk;
 		else {
-			memset(&k2, 0, sizeof(k2));
 			k2.data = bk->data;
 			k2.size = bk->len;
 		}
@@ -115,7 +112,6 @@ __bam_cmp(dbp, k1, e)
 		if (B_TYPE(bi->type) == B_OVERFLOW)
 			bo = (BOVERFLOW *)(bi->data);
 		else {
-			memset(&k2, 0, sizeof(k2));
 			k2.data = bi->data;
 			k2.size = bi->len;
 		}
@@ -139,10 +135,21 @@ __bam_cmp(dbp, k1, e)
 		 * Otherwise, we need a contiguous record so we can hand it
 		 * to the user's routine.
 		 */
+		memset(&k2, 0, sizeof(k2));
 		if (__db_goff(dbp, &k2, bo->tlen,
-		    bo->pgno, &t->bt_rdata.data, &t->bt_rdata.ulen) != 0)
-			abort();
+		    bo->pgno, &t->bt_rdata.data, &t->bt_rdata.ulen) != 0) {
+			(void)__db_panic(dbp);
+			return (0);
+		}
 	}
+
+	/*
+	 * XXX
+	 * Note, we have not cleared the k2 DBT in this path.  This should
+	 * be okay, because the user's comparison routine had better not be
+	 * looking at any fields other than the data/size.  We don't clear
+	 * it because we go through this path a lot and it's expensive.
+	 */
 	return ((*t->bt_compare)(k1, &k2));
 }
 
diff --git a/db2/btree/bt_conv.c b/db2/btree/bt_conv.c
index c89493cbed..3da4507723 100644
--- a/db2/btree/bt_conv.c
+++ b/db2/btree/bt_conv.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_conv.c	10.5 (Sleepycat) 9/15/97";
+static const char sccsid[] = "@(#)bt_conv.c	10.6 (Sleepycat) 4/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
diff --git a/db2/btree/bt_cursor.c b/db2/btree/bt_cursor.c
index f526c965e5..cfa388741e 100644
--- a/db2/btree/bt_cursor.c
+++ b/db2/btree/bt_cursor.c
@@ -1,22 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_cursor.c	10.41 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)bt_cursor.c	10.53 (Sleepycat) 5/25/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -25,24 +23,30 @@ static const char sccsid[] = "@(#)bt_cursor.c	10.41 (Sleepycat) 1/8/98";
 #include "btree.h"
 
 static int __bam_c_close __P((DBC *));
-static int __bam_c_del __P((DBC *, int));
+static int __bam_c_del __P((DBC *, u_int32_t));
 static int __bam_c_first __P((DB *, CURSOR *));
-static int __bam_c_get __P((DBC *, DBT *, DBT *, int));
+static int __bam_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
+static int __bam_c_getstack __P((DB *, CURSOR *));
 static int __bam_c_last __P((DB *, CURSOR *));
 static int __bam_c_next __P((DB *, CURSOR *, int));
 static int __bam_c_physdel __P((DB *, CURSOR *, PAGE *));
 static int __bam_c_prev __P((DB *, CURSOR *));
-static int __bam_c_put __P((DBC *, DBT *, DBT *, int));
-static int __bam_c_rget __P((DB *, CURSOR *, DBT *, int));
-static int __bam_c_search __P((DB *, CURSOR *, const DBT *, u_int, int, int *));
+static int __bam_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
+static int __bam_c_rget __P((DB *, CURSOR *, DBT *, u_int32_t));
+static int __bam_c_search
+	       __P((DB *, CURSOR *, const DBT *, u_int32_t, int, int *));
 
 /* Discard the current page/lock held by a cursor. */
 #undef	DISCARD
 #define	DISCARD(dbp, cp) {						\
-	(void)memp_fput(dbp->mpf, (cp)->page, 0);			\
-	(cp)->page = NULL;						\
-	(void)__BT_TLPUT((dbp), (cp)->lock);				\
-	(cp)->lock = LOCK_INVALID;					\
+	if ((cp)->page != NULL) {					\
+		(void)memp_fput(dbp->mpf, (cp)->page, 0);		\
+		(cp)->page = NULL;					\
+	}								\
+	if ((cp)->lock != LOCK_INVALID) {				\
+		(void)__BT_TLPUT((dbp), (cp)->lock);			\
+		(cp)->lock = LOCK_INVALID;				\
+	}								\
 }
 
 /*
@@ -85,9 +89,9 @@ __bam_cursor(dbp, txn, dbcp)
 	 * All cursors are queued from the master DB structure.  Add the
 	 * cursor to that queue.
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	TAILQ_INSERT_HEAD(&dbp->curs_queue, dbc, links);
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 
 	*dbcp = dbc;
 	return (0);
@@ -128,13 +132,6 @@ __bam_c_iclose(dbp, dbc)
 	CURSOR *cp;
 	int ret;
 
-	/*
-	 * All cursors are queued from the master DB structure.  For
-	 * now, discard the DB handle which triggered this call, and
-	 * replace it with the cursor's reference.
-	 */
-	dbp = dbc->dbp;
-
 	/* If a cursor key was deleted, perform the actual deletion.  */
 	cp = dbc->internal;
 	ret = F_ISSET(cp, C_DELETED) ? __bam_c_physdel(dbp, cp, NULL) : 0;
@@ -144,9 +141,9 @@ __bam_c_iclose(dbp, dbc)
 		(void)__BT_TLPUT(dbp, cp->lock);
 
 	/* Remove the cursor from the queue. */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	TAILQ_REMOVE(&dbp->curs_queue, dbc, links);
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 
 	/* Discard the structures. */
 	FREE(dbc->internal, sizeof(CURSOR));
@@ -162,8 +159,9 @@ __bam_c_iclose(dbp, dbc)
 static int
 __bam_c_del(dbc, flags)
 	DBC *dbc;
-	int flags;
+	u_int32_t flags;
 {
+	BTREE *t;
 	CURSOR *cp;
 	DB *dbp;
 	DB_LOCK lock;
@@ -175,6 +173,7 @@ __bam_c_del(dbc, flags)
 	DEBUG_LWRITE(dbc->dbp, dbc->txn, "bam_c_del", NULL, NULL, flags);
 
 	cp = dbc->internal;
+	h = NULL;
 
 	/* Check for invalid flags. */
 	if ((ret = __db_cdelchk(dbc->dbp, flags,
@@ -186,6 +185,7 @@ __bam_c_del(dbc, flags)
 		return (DB_KEYEMPTY);
 
 	GETHANDLE(dbc->dbp, dbc->txn, &dbp, ret);
+	t = dbp->internal;
 
 	/*
 	 * We don't physically delete the record until the cursor moves,
@@ -235,8 +235,21 @@ __bam_c_del(dbc, flags)
 	(void)__bam_ca_delete(dbp, pgno, indx, NULL, 0);
 
 	ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
+	h = NULL;
+
+	/*
+	 * If it's a btree with record numbers, we have to adjust the
+	 * counts.
+	 */
+	if (F_ISSET(dbp, DB_BT_RECNUM) &&
+	    (ret = __bam_c_getstack(dbp, cp)) == 0) {
+		ret = __bam_adjust(dbp, t, -1);
+		(void)__bam_stkrel(dbp);
+	}
 
-err:	PUTHANDLE(dbp);
+err:	if (h != NULL)
+		(void)memp_fput(dbp->mpf, h, 0);
+	PUTHANDLE(dbp);
 	return (ret);
 }
 
@@ -244,14 +257,14 @@ err:	PUTHANDLE(dbp);
  * __bam_get --
  *	Retrieve a key/data pair from the tree.
  *
- * PUBLIC: int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
+ * PUBLIC: int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
  */
 int
 __bam_get(argdbp, txn, key, data, flags)
 	DB *argdbp;
 	DB_TXN *txn;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	DBC dbc;
 	CURSOR cp;
@@ -289,7 +302,7 @@ static int
 __bam_c_get(dbc, key, data, flags)
 	DBC *dbc;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	CURSOR *cp, copy;
@@ -448,7 +461,7 @@ __bam_c_rget(dbp, cp, data, flags)
 	DB *dbp;
 	CURSOR *cp;
 	DBT *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	DBT dbt;
@@ -491,7 +504,7 @@ static int
 __bam_c_put(dbc, key, data, flags)
 	DBC *dbc;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	CURSOR *cp, copy;
@@ -499,7 +512,8 @@ __bam_c_put(dbc, key, data, flags)
 	DBT dbt;
 	db_indx_t indx;
 	db_pgno_t pgno;
-	int exact, needkey, ret;
+	u_int32_t iiflags;
+	int exact, needkey, ret, stack;
 	void *arg;
 
 	DEBUG_LWRITE(dbc->dbp, dbc->txn, "bam_c_put",
@@ -524,29 +538,34 @@ __bam_c_put(dbc, key, data, flags)
 	 * To split, we need a valid key for the page.  Since it's a cursor,
 	 * we have to build one.
 	 */
+	stack = 0;
 	if (0) {
-split:		if (needkey) {
+split:		/* Acquire a copy of a key from the page. */
+		if (needkey) {
 			memset(&dbt, 0, sizeof(DBT));
-			ret = __db_ret(dbp, cp->page, indx,
-			    &dbt, &t->bt_rkey.data, &t->bt_rkey.ulen);
-
-			DISCARD(dbp, cp);
-
-			if (ret)
+			if ((ret = __db_ret(dbp, cp->page, indx,
+			    &dbt, &t->bt_rkey.data, &t->bt_rkey.ulen)) != 0)
 				goto err;
 			arg = &dbt;
-		} else {
-			(void)__bam_stkrel(dbp);
+		} else
 			arg = key;
-		}
+
+		/* Discard any pinned pages. */
+		if (stack) {
+			(void)__bam_stkrel(dbp);
+			stack = 0;
+		} else
+			DISCARD(dbp, cp);
+
 		if ((ret = __bam_split(dbp, arg)) != 0)
 			goto err;
 	}
 
-	/* If there's no key supplied, use the cursor. */
-	if (flags == DB_KEYFIRST || flags == DB_KEYLAST)
-		needkey = 0;
-	else {
+	ret = 0;
+	switch (flags) {
+	case DB_AFTER:
+	case DB_BEFORE:
+	case DB_CURRENT:
 		needkey = 1;
 		if (cp->dpgno == PGNO_INVALID) {
 			pgno = cp->pgno;
@@ -555,41 +574,53 @@ split:		if (needkey) {
 			pgno = cp->dpgno;
 			indx = cp->dindx;
 		}
-		/* Acquire the current page. */
-		if ((ret = __bam_lget(dbp,
-		    0, cp->pgno, DB_LOCK_WRITE, &cp->lock)) != 0)
-			goto err;
-		if ((ret = __bam_pget(dbp, &cp->page, &pgno, 0)) != 0)
-			goto err;
-	}
+		/*
+		 * XXX
+		 * This test is right -- we don't currently support duplicates
+		 * in the presence of record numbers, so we don't worry about
+		 * them if DB_BT_RECNUM is set.
+		 */
+		if (F_ISSET(dbp, DB_BT_RECNUM) &&
+		    (flags != DB_CURRENT || F_ISSET(cp, C_DELETED))) {
+			/* Acquire a complete stack. */
+			if ((ret = __bam_c_getstack(dbp, cp)) != 0)
+				goto err;
+			cp->page = t->bt_csp->page;
 
-	ret = 0;
-	switch (flags) {
-	case DB_AFTER:
-	case DB_BEFORE:
-	case DB_CURRENT:
+			stack = 1;
+			iiflags = BI_DOINCR;
+		} else {
+			/* Acquire the current page. */
+			if ((ret = __bam_lget(dbp,
+			    0, cp->pgno, DB_LOCK_WRITE, &cp->lock)) == 0)
+				ret = __bam_pget(dbp, &cp->page, &pgno, 0);
+			if (ret != 0)
+				goto err;
+
+			iiflags = 0;
+		}
 		if ((ret = __bam_iitem(dbp, &cp->page,
-		    &indx, key, data, flags, 0)) == DB_NEEDSPLIT)
+		    &indx, key, data, flags, iiflags)) == DB_NEEDSPLIT)
 			goto split;
 		break;
 	case DB_KEYFIRST:
-		exact = 0;
+		exact = needkey = 0;
 		if ((ret =
 		    __bam_c_search(dbp, cp, key, S_KEYFIRST, 0, &exact)) != 0)
 			goto err;
+		stack = 1;
 
 		indx = cp->dpgno == PGNO_INVALID ? cp->indx : cp->dindx;
 		if ((ret = __bam_iitem(dbp, &cp->page, &indx, key,
 		    data, DB_BEFORE, exact ? 0 : BI_NEWKEY)) == DB_NEEDSPLIT)
 			goto split;
-		if (ret)
-			goto err;
 		break;
 	case DB_KEYLAST:
-		exact = 0;
+		exact = needkey = 0;
 		if ((ret =
 		    __bam_c_search(dbp, cp, key, S_KEYLAST, 0, &exact)) != 0)
 			goto err;
+		stack = 1;
 
 		indx = cp->dpgno == PGNO_INVALID ? cp->indx : cp->dindx;
 		if ((ret = __bam_iitem(dbp, &cp->page, &indx, key,
@@ -623,13 +654,27 @@ split:		if (needkey) {
 	if (copy.lock != LOCK_INVALID)
 		(void)__BT_TLPUT(dbp, copy.lock);
 
-	/* Discard the pinned page. */
-	ret = memp_fput(dbp->mpf, cp->page, 0);
+	/*
+	 * Discard any pages pinned in the tree and their locks, except for
+	 * the leaf page, for which we only discard the pin, not the lock.
+	 *
+	 * Note, the leaf page participated in the stack we acquired, and so
+	 * we have to adjust the stack as necessary.  If there was only a
+	 * single page on the stack, we don't have to free further stack pages.
+	 */
+
+	if (stack && BT_STK_POP(t) != NULL)
+		(void)__bam_stkrel(dbp);
+
+	if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0)
+		goto err;
+
 	if (0) {
-err:		if (cp->page != NULL)
-			(void)memp_fput(dbp->mpf, cp->page, 0);
-		if (cp->lock != LOCK_INVALID)
-			(void)__BT_TLPUT(dbp, cp->lock);
+err:		/* Discard any pinned pages. */
+		if (stack)
+			(void)__bam_stkrel(dbp);
+		else
+			DISCARD(dbp, cp);
 		*cp = copy;
 	}
 
@@ -976,7 +1021,7 @@ __bam_c_search(dbp, cp, key, flags, isrecno, exactp)
 	DB *dbp;
 	CURSOR *cp;
 	const DBT *key;
-	u_int flags;
+	u_int32_t flags;
 	int isrecno, *exactp;
 {
 	BTREE *t;
@@ -1032,6 +1077,18 @@ __bam_c_search(dbp, cp, key, flags, isrecno, exactp)
 		} else
 			if ((ret = __bam_c_next(dbp, cp, 0)) != 0)
 				return (ret);
+	/*
+	 * If we don't specify an exact match (the DB_KEYFIRST/DB_KEYLAST or
+	 * DB_SET_RANGE flags were set) __bam_search() may return a deleted
+	 * item.  For DB_KEYFIRST/DB_KEYLAST, we don't care since we're only
+	 * using it for a tree position.  For DB_SET_RANGE, we're returning
+	 * the key, so we have to adjust it.
+	 */
+	if (LF_ISSET(S_DELNO) && cp->dpgno == PGNO_INVALID &&
+	    B_DISSET(GET_BKEYDATA(cp->page, cp->indx + O_INDX)->type))
+		if ((ret = __bam_c_next(dbp, cp, 0)) != 0)
+			return (ret);
+
 	return (0);
 }
 
@@ -1101,7 +1158,7 @@ __bam_cprint(dbp)
 	CURSOR *cp;
 	DBC *dbc;
 
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (CURSOR *)dbc->internal;
@@ -1113,7 +1170,8 @@ __bam_cprint(dbp)
 			fprintf(stderr, "(deleted)");
 		fprintf(stderr, "\n");
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
+
 	return (0);
 }
 #endif /* DEBUG */
@@ -1135,7 +1193,7 @@ __bam_ca_delete(dbp, pgno, indx, curs, key_delete)
 {
 	DBC *dbc;
 	CURSOR *cp;
-	int count;
+	int count;		/* !!!: Has to contain max number of cursors. */
 
 	/*
 	 * Adjust the cursors.  We don't have to review the cursors for any
@@ -1148,8 +1206,7 @@ __bam_ca_delete(dbp, pgno, indx, curs, key_delete)
 	 * locks on the same page, but, cursors within a thread must be single
 	 * threaded, so all we're locking here is the cursor linked list.
 	 */
-	DB_THREAD_LOCK(dbp);
-
+	CURSOR_SETUP(dbp);
 	for (count = 0, dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (CURSOR *)dbc->internal;
@@ -1180,8 +1237,8 @@ __bam_ca_delete(dbp, pgno, indx, curs, key_delete)
 				F_SET(cp, C_DELETED);
 			}
 	}
+	CURSOR_TEARDOWN(dbp);
 
-	DB_THREAD_UNLOCK(dbp);
 	return (count);
 }
 
@@ -1192,11 +1249,11 @@ __bam_ca_delete(dbp, pgno, indx, curs, key_delete)
  * PUBLIC: void __bam_ca_di __P((DB *, db_pgno_t, u_int32_t, int));
  */
 void
-__bam_ca_di(dbp, pgno, indx, value)
+__bam_ca_di(dbp, pgno, indx, adjust)
 	DB *dbp;
 	db_pgno_t pgno;
 	u_int32_t indx;
-	int value;
+	int adjust;
 {
 	CURSOR *cp;
 	DBC *dbc;
@@ -1208,16 +1265,16 @@ __bam_ca_di(dbp, pgno, indx, value)
 	/*
 	 * Adjust the cursors.  See the comment in __bam_ca_delete().
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (CURSOR *)dbc->internal;
 		if (cp->pgno == pgno && cp->indx >= indx)
-			cp->indx += value;
+			cp->indx += adjust;
 		if (cp->dpgno == pgno && cp->dindx >= indx)
-			cp->dindx += value;
+			cp->dindx += adjust;
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 }
 
 /*
@@ -1242,7 +1299,7 @@ __bam_ca_dup(dbp, fpgno, first, fi, tpgno, ti)
 	 * No need to test duplicates, this only gets called when moving
 	 * leaf page data items onto a duplicates page.
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (CURSOR *)dbc->internal;
@@ -1258,7 +1315,7 @@ __bam_ca_dup(dbp, fpgno, first, fi, tpgno, ti)
 			cp->dindx = ti;
 		}
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 }
 
 /*
@@ -1285,14 +1342,14 @@ __bam_ca_move(dbp, fpgno, tpgno)
 	 * No need to test duplicates, this only gets called when copying
 	 * over the root page with a leaf or internal page.
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (CURSOR *)dbc->internal;
 		if (cp->pgno == fpgno)
 			cp->pgno = tpgno;
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 }
 
 /*
@@ -1333,7 +1390,7 @@ __bam_ca_replace(dbp, pgno, indx, pass)
 	 * for the cursor as it may have been changed by other cursor update
 	 * routines as the item was deleted/inserted.
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	switch (pass) {
 	case REPLACE_SETUP:			/* Setup. */
 		for (dbc = TAILQ_FIRST(&dbp->curs_queue);
@@ -1372,7 +1429,7 @@ __bam_ca_replace(dbp, pgno, indx, pass)
 		}
 		break;
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 }
 
 /*
@@ -1406,7 +1463,7 @@ __bam_ca_split(dbp, ppgno, lpgno, rpgno, split_indx, cleft)
 	 * the cursor is on the right page, it is decremented by the number of
 	 * records split to the left page.
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (CURSOR *)dbc->internal;
@@ -1427,7 +1484,7 @@ __bam_ca_split(dbp, ppgno, lpgno, rpgno, split_indx, cleft)
 				cp->dindx -= split_indx;
 			}
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 }
 
 /*
@@ -1440,16 +1497,17 @@ __bam_c_physdel(dbp, cp, h)
 	CURSOR *cp;
 	PAGE *h;
 {
+	enum { DELETE_ITEM, DELETE_PAGE, NOTHING_FURTHER } cmd;
 	BOVERFLOW bo;
 	BTREE *t;
 	DBT dbt;
 	DB_LOCK lock;
 	db_indx_t indx;
 	db_pgno_t pgno, next_pgno, prev_pgno;
-	int local, normal, ret;
+	int delete_page, local_page, ret;
 
 	t = dbp->internal;
-	ret = 0;
+	delete_page = ret = 0;
 
 	/* Figure out what we're deleting. */
 	if (cp->dpgno == PGNO_INVALID) {
@@ -1476,9 +1534,9 @@ __bam_c_physdel(dbp, cp, h)
 			return (ret);
 		if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
 			return (ret);
-		local = 1;
+		local_page = 1;
 	} else
-		local = 0;
+		local_page = 0;
 
 	/*
 	 * If we're deleting a duplicate entry and there are other duplicate
@@ -1515,9 +1573,9 @@ __bam_c_physdel(dbp, cp, h)
 
 		if (NUM_ENT(h) == 1 &&
 		    prev_pgno == PGNO_INVALID && next_pgno == PGNO_INVALID)
-			normal = 1;
+			cmd = DELETE_PAGE;
 		else {
-			normal = 0;
+			cmd = DELETE_ITEM;
 
 			/* Delete the duplicate. */
 			if ((ret = __db_drem(dbp, &h, indx, __bam_free)) != 0)
@@ -1536,18 +1594,27 @@ __bam_c_physdel(dbp, cp, h)
 			 */
 			if ((h != NULL && pgno == h->pgno) ||
 			    prev_pgno != PGNO_INVALID)
-				goto done;
+				cmd = NOTHING_FURTHER;
 		}
 
-		/* Release any page we're holding and its lock. */
-		if (local) {
+		/*
+		 * Release any page we're holding and its lock.
+		 *
+		 * !!!
+		 * If there is no subsequent page in the duplicate chain, then
+		 * __db_drem will have put page "h" and set it to NULL.
+		*/
+		if (local_page) {
 			if (h != NULL)
 				(void)memp_fput(dbp->mpf, h, 0);
 			(void)__BT_TLPUT(dbp, lock);
-			local = 0;
+			local_page = 0;
 		}
 
-		/* Acquire the parent page. */
+		if (cmd == NOTHING_FURTHER)
+			goto done;
+
+		/* Acquire the parent page and switch the index to its entry. */
 		if ((ret =
 		    __bam_lget(dbp, 0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0)
 			goto err;
@@ -1555,11 +1622,10 @@ __bam_c_physdel(dbp, cp, h)
 			(void)__BT_TLPUT(dbp, lock);
 			goto err;
 		}
-		local = 1;
-
-		/* Switch to the parent page's entry. */
+		local_page = 1;
 		indx = cp->indx;
-		if (normal)
+
+		if (cmd == DELETE_PAGE)
 			goto btd;
 
 		/*
@@ -1582,47 +1648,60 @@ __bam_c_physdel(dbp, cp, h)
 		goto done;
 	}
 
-	/* Otherwise, do a normal btree delete. */
-btd:	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
-		goto err;
-	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
-		goto err;
-
-	/*
-	 * If the page is empty, delete it.  To delete a leaf page we need a
-	 * copy of a key from the page.  We use the first one that was there,
-	 * since it's the last key that the page held.  We malloc the page
-	 * information instead of using the return key/data memory because
-	 * we've already set them -- the reason that we've already set them
-	 * is because we're (potentially) about to do a reverse split, which
-	 * would make our saved page information useless.
+btd:	/*
+	 * If the page is going to be emptied, delete it.  To delete a leaf
+	 * page we need a copy of a key from the page.  We use the 0th page
+	 * index since it's the last key that the page held.
+	 *
+	 * We malloc the page information instead of using the return key/data
+	 * memory because we've already set them -- the reason we've already
+	 * set them is because we're (potentially) about to do a reverse split,
+	 * which would make our saved page information useless.
 	 *
 	 * XXX
 	 * The following operations to delete a page might deadlock.  I think
 	 * that's OK.  The problem is if we're deleting an item because we're
 	 * closing cursors because we've already deadlocked and want to call
-	 * txn_abort().  If we fail due to deadlock, we'll leave an locked
-	 * empty page in the tree, which won't be empty long because we're
-	 * going to undo the delete.
+	 * txn_abort().  If we fail due to deadlock, we leave a locked empty
+	 * page in the tree, which won't be empty long because we're going to
+	 * undo the delete.
 	 */
-	if (NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT) {
+	if (NUM_ENT(h) == 2 && h->pgno != PGNO_ROOT) {
 		memset(&dbt, 0, sizeof(DBT));
 		dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL;
 		if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0)
 			goto err;
+		delete_page = 1;
+	}
 
-		if (local) {
-			(void)memp_fput(dbp->mpf, h, 0);
-			(void)__BT_TLPUT(dbp, lock);
-			local = 0;
-		}
+	/*
+	 * Do a normal btree delete.
+	 *
+	 * XXX
+	 * Delete the key item first, otherwise the duplicate checks in
+	 * __bam_ditem() won't work!
+	 */
+	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+		goto err;
+	if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+		goto err;
 
-		ret = __bam_dpage(dbp, &dbt);
-		__db_free(dbt.data);
+	/* Discard any remaining locks/pages. */
+	if (local_page) {
+		(void)memp_fput(dbp->mpf, h, 0);
+		(void)__BT_TLPUT(dbp, lock);
+		local_page = 0;
 	}
 
+	/* Delete the page if it was emptied. */
+	if (delete_page)
+		ret = __bam_dpage(dbp, &dbt);
+
 err:
-done:	if (local) {
+done:	if (delete_page)
+		__db_free(dbt.data);
+
+	if (local_page) {
 		(void)memp_fput(dbp->mpf, h, 0);
 		(void)__BT_TLPUT(dbp, lock);
 	}
@@ -1631,3 +1710,43 @@ done:	if (local) {
 		++t->lstat.bt_deleted;
 	return (ret);
 }
+
+/*
+ * __bam_c_getstack --
+ *	Acquire a full stack for a cursor.
+ */
+static int
+__bam_c_getstack(dbp, cp)
+	DB *dbp;
+	CURSOR *cp;
+{
+	DBT dbt;
+	PAGE *h;
+	db_pgno_t pgno;
+	int exact, ret;
+
+	ret = 0;
+	h = NULL;
+	memset(&dbt, 0, sizeof(DBT));
+
+	/* Get the page with the current item on it. */
+	pgno = cp->pgno;
+	if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
+		return (ret);
+
+	/* Get a copy of a key from the page. */
+	dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL;
+	if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0)
+		goto err;
+
+	/* Get a write-locked stack for that page. */
+	exact = 0;
+	ret = __bam_search(dbp, &dbt, S_KEYFIRST, 1, NULL, &exact);
+
+	/* We no longer need the key or the page. */
+err:	if (h != NULL)
+		(void)memp_fput(dbp->mpf, h, 0);
+	if (dbt.data != NULL)
+		__db_free(dbt.data);
+	return (ret);
+}
diff --git a/db2/btree/bt_delete.c b/db2/btree/bt_delete.c
index baa8a25401..7e71037e46 100644
--- a/db2/btree/bt_delete.c
+++ b/db2/btree/bt_delete.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,13 +47,12 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_delete.c	10.25 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)bt_delete.c	10.31 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
-#include <stdio.h>
 #include <string.h>
 #endif
 
@@ -67,14 +66,14 @@ static int __bam_dpages __P((DB *, BTREE *));
  * __bam_delete --
  *	Delete the items referenced by a key.
  *
- * PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, int));
+ * PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
  */
 int
 __bam_delete(argdbp, txn, key, flags)
 	DB *argdbp;
 	DB_TXN *txn;
 	DBT *key;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	DB *dbp;
@@ -87,8 +86,8 @@ __bam_delete(argdbp, txn, key, flags)
 	stack = 0;
 
 	/* Check for invalid flags. */
-	if ((ret =
-	    __db_delchk(argdbp, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0)
+	if ((ret = __db_delchk(argdbp,
+	    key, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0)
 		return (ret);
 
 	GETHANDLE(argdbp, txn, &dbp, ret);
@@ -107,6 +106,11 @@ __bam_delete(argdbp, txn, key, flags)
 			break;
 	for (; cnt > 0; --cnt, ++t->lstat.bt_deleted)
 		if (__bam_ca_delete(dbp, h->pgno, indx, NULL, 1) == 0) {
+			/*
+			 * XXX
+			 * Delete the key item first, otherwise the duplicate
+			 * checks in __bam_ditem() won't work!
+			 */
 			if ((ret = __bam_ditem(dbp, h, indx)) != 0)
 				goto err;
 			if ((ret = __bam_ditem(dbp, h, indx)) != 0)
@@ -138,14 +142,14 @@ err:	if (stack)
  * __ram_delete --
  *	Delete the items referenced by a key.
  *
- * PUBLIC: int __ram_delete __P((DB *, DB_TXN *, DBT *, int));
+ * PUBLIC: int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
  */
 int
 __ram_delete(argdbp, txn, key, flags)
 	DB *argdbp;
 	DB_TXN *txn;
 	DBT *key;
-	int flags;
+	u_int32_t flags;
 {
 	BKEYDATA bk;
 	BTREE *t;
@@ -159,8 +163,8 @@ __ram_delete(argdbp, txn, key, flags)
 	stack = 0;
 
 	/* Check for invalid flags. */
-	if ((ret =
-	    __db_delchk(argdbp, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0)
+	if ((ret = __db_delchk(argdbp,
+	    key, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0)
 		return (ret);
 
 	GETHANDLE(argdbp, txn, &dbp, ret);
@@ -284,19 +288,32 @@ __bam_ditem(dbp, h, indx)
 	case P_LBTREE:
 		/*
 		 * If it's a duplicate key, discard the index and don't touch
-		 * the actual page item.  This works because no data item can
-		 * have an index that matches any other index so even if the
-		 * data item is in an index "slot", it won't match any other
-		 * index.
+		 * the actual page item.
+		 *
+		 * XXX
+		 * This works because no data item can have an index matching
+		 * any other index so even if the data item is in a key "slot",
+		 * it won't match any other index.
 		 */
-		if (!(indx % 2)) {
-			if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
-				return (__bam_adjindx(dbp,
-				    h, indx, indx - P_INDX, 0));
+		if ((indx % 2) == 0) {
+			/*
+			 * Check for a duplicate after us on the page.  NOTE:
+			 * we have to delete the key item before deleting the
+			 * data item, otherwise the "indx + P_INDX" calculation
+			 * won't work!
+			 */
 			if (indx + P_INDX < (u_int32_t)NUM_ENT(h) &&
 			    h->inp[indx] == h->inp[indx + P_INDX])
 				return (__bam_adjindx(dbp,
 				    h, indx, indx + O_INDX, 0));
+			/*
+			 * Check for a duplicate before us on the page.  It
+			 * doesn't matter if we delete the key item before or
+			 * after the data item for the purposes of this one.
+			 */
+			if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
+				return (__bam_adjindx(dbp,
+				    h, indx, indx - P_INDX, 0));
 		}
 		/* FALLTHROUGH */
 	case P_LRECNO:
@@ -396,7 +413,8 @@ __bam_dpage(dbp, key)
 	DB_LOCK lock;
 	PAGE *h;
 	db_pgno_t pgno;
-	int exact, level, ret;
+	int level;		/* !!!: has to hold number of tree levels. */
+	int exact, ret;
 
 	ret = 0;
 	t = dbp->internal;
@@ -527,13 +545,14 @@ __bam_dpages(dbp, t)
 		goto release;
 
 	/*
-	 * If we deleted the next-to-last item from the root page, the tree
-	 * can collapse a level.  Try and write lock the remaining root + 1
-	 * page and copy it onto the root page.  If we can't get the lock,
-	 * that's okay, the tree just stays a level deeper than we'd like.
+	 * If we just deleted the last or next-to-last item from the root page,
+	 * the tree can collapse a level.  Write lock the last page referenced
+	 * by the root page and copy it over the root page.  If we can't get a
+	 * write lock, that's okay, the tree just remains a level deeper than
+	 * we'd like.
 	 */
 	h = epg->page;
-	if (h->pgno == PGNO_ROOT && NUM_ENT(h) == 1) {
+	if (h->pgno == PGNO_ROOT && NUM_ENT(h) <= 1) {
 		pgno = TYPE(epg->page) == P_IBTREE ?
 		    GET_BINTERNAL(epg->page, 0)->pgno :
 		    GET_RINTERNAL(epg->page, 0)->pgno;
@@ -573,13 +592,21 @@ __bam_dpages(dbp, t)
 		(void)memp_fset(dbp->mpf, epg->page, DB_MPOOL_DIRTY);
 
 		/*
-		 * Free the last page in that level of the btree and discard
-		 * the lock.  (The call to __bam_free discards our reference
+		 * Free the page copied onto the root page and discard its
+		 * lock.  (The call to __bam_free() discards our reference
 		 * to the page.)
+		 *
+		 * It's possible that the reverse split we're doing involves
+		 * pages from the stack of pages we're deleting.  Don't free
+		 * the page twice.
 		 */
-		(void)__bam_free(dbp, h);
+		 if (h->pgno == (epg + 1)->page->pgno)
+			(void)memp_fput(dbp->mpf, h, 0);
+		else {
+			(void)__bam_free(dbp, h);
+			++t->lstat.bt_freed;
+		}
 		(void)__BT_TLPUT(dbp, lock);
-		++t->lstat.bt_freed;
 
 		/* Adjust the cursors. */
 		__bam_ca_move(dbp, h->pgno, PGNO_ROOT);
@@ -596,12 +623,17 @@ __bam_dpages(dbp, t)
 	 * Don't bother checking for errors.  We've unlinked the subtree from
 	 * the tree, and there's no possibility of recovery.
 	 */
-	for (; ++epg <= t->bt_csp; ++t->lstat.bt_freed) {
+	while (++epg <= t->bt_csp) {
+		/*
+		 * XXX
+		 * Why do we need to do this?  Isn't the page already empty?
+		 */
 		if (NUM_ENT(epg->page) != 0)
 			(void)__bam_ditem(dbp, epg->page, epg->indx);
 
 		(void)__bam_free(dbp, epg->page);
 		(void)__BT_TLPUT(dbp, epg->lock);
+		++t->lstat.bt_freed;
 	}
 	return (0);
 
diff --git a/db2/btree/bt_open.c b/db2/btree/bt_open.c
index dd9f10927a..f5974ec61e 100644
--- a/db2/btree/bt_open.c
+++ b/db2/btree/bt_open.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_open.c	10.22 (Sleepycat) 1/6/98";
+static const char sccsid[] = "@(#)bt_open.c	10.27 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 /*
@@ -60,21 +60,15 @@ static const char sccsid[] = "@(#)bt_open.c	10.22 (Sleepycat) 1/6/98";
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <fcntl.h>
 #include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
 #include "db_page.h"
 #include "btree.h"
-#include "common_ext.h"
 
 static int __bam_keyalloc __P((BTREE *));
 static int __bam_setmeta __P((DB *, BTREE *));
@@ -295,6 +289,7 @@ __bam_setmeta(dbp, t)
 	}
 
 	/* Initialize the tree structure metadata information. */
+	memset(meta, 0, sizeof(BTMETA));
 	ZERO_LSN(meta->lsn);
 	meta->pgno = PGNO_METADATA;
 	meta->magic = DB_BTREEMAGIC;
@@ -303,7 +298,6 @@ __bam_setmeta(dbp, t)
 	meta->maxkey = t->bt_maxkey;
 	meta->minkey = t->bt_minkey;
 	meta->free = PGNO_INVALID;
-	meta->flags = 0;
 	if (dbp->type == DB_RECNO)
 		F_SET(meta, BTM_RECNO);
 	if (F_ISSET(dbp, DB_AM_DUP))
@@ -314,8 +308,6 @@ __bam_setmeta(dbp, t)
 		F_SET(meta, BTM_RECNUM);
 	if (F_ISSET(dbp, DB_RE_RENUMBER))
 		F_SET(meta, BTM_RENUMBER);
-	meta->re_len = 0;
-	meta->re_pad = 0;
 	memcpy(meta->uid, dbp->lock.fileid, DB_FILE_ID_LEN);
 
 	/* Create and initialize a root page. */
diff --git a/db2/btree/bt_page.c b/db2/btree/bt_page.c
index 853317e835..87f2811398 100644
--- a/db2/btree/bt_page.c
+++ b/db2/btree/bt_page.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,14 +47,13 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_page.c	10.7 (Sleepycat) 1/7/98";
+static const char sccsid[] = "@(#)bt_page.c	10.12 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdio.h>
 #include <string.h>
 #endif
 
@@ -142,7 +141,8 @@ __bam_free(dbp, h)
 	DBT ldbt;
 	DB_LOCK metalock;
 	db_pgno_t pgno;
-	int is_dirty, ret, t_ret;
+	u_int32_t dirty_flag;
+	int ret, t_ret;
 
 	/*
 	 * Retrieve the metadata page and insert the page at the head of
@@ -150,7 +150,7 @@ __bam_free(dbp, h)
 	 * fail, then we need to put the page with which we were called
 	 * back because our caller assumes we take care of it.
 	 */
-	is_dirty = 0;
+	dirty_flag = 0;
 	pgno = PGNO_METADATA;
 	if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &metalock)) != 0)
 		goto err;
@@ -178,7 +178,7 @@ __bam_free(dbp, h)
 	 * The page should have nothing interesting on it, re-initialize it,
 	 * leaving only the page number and the LSN.
 	 */
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	{ db_pgno_t __pgno; DB_LSN __lsn;
 		__pgno = h->pgno;
 		__lsn = h->lsn;
@@ -198,8 +198,8 @@ __bam_free(dbp, h)
 		ret = t_ret;
 
 	/* Discard the caller's page reference. */
-	is_dirty = DB_MPOOL_DIRTY;
-err:	if ((t_ret = memp_fput(dbp->mpf, h, is_dirty)) != 0 && ret == 0)
+	dirty_flag = DB_MPOOL_DIRTY;
+err:	if ((t_ret = memp_fput(dbp->mpf, h, dirty_flag)) != 0 && ret == 0)
 		ret = t_ret;
 
 	/*
@@ -248,8 +248,10 @@ __bam_lget(dbp, do_couple, pgno, mode, lockp)
 	u_int32_t locker;
 	int ret;
 
-	if (!F_ISSET(dbp, DB_AM_LOCKING))
+	if (!F_ISSET(dbp, DB_AM_LOCKING)) {
+		*lockp = LOCK_INVALID;
 		return (0);
+	}
 
 	locker = dbp->txn == NULL ? dbp->locker : dbp->txn->txnid;
 	dbp->lock.pgno = pgno;
@@ -300,15 +302,15 @@ __bam_lput(dbp, lock)
  * __bam_pget --
  *	The standard page get call.
  *
- * PUBLIC: int __bam_pget __P((DB *, PAGE **, db_pgno_t *, int));
+ * PUBLIC: int __bam_pget __P((DB *, PAGE **, db_pgno_t *, u_int32_t));
  */
 int
-__bam_pget(dbp, hp, pgnop, mflags)
+__bam_pget(dbp, hp, pgnop, mpool_flags)
 	DB *dbp;
 	PAGE **hp;
 	db_pgno_t *pgnop;
-	int mflags;
+	u_int32_t mpool_flags;
 {
 	return (memp_fget((dbp)->mpf,
-	    pgnop, mflags, hp) == 0 ? 0 : __db_pgerr(dbp, *pgnop));
+	    pgnop, mpool_flags, hp) == 0 ? 0 : __db_pgerr(dbp, *pgnop));
 }
diff --git a/db2/btree/bt_put.c b/db2/btree/bt_put.c
index 87f3fd9aff..a93faac98c 100644
--- a/db2/btree/bt_put.c
+++ b/db2/btree/bt_put.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,15 +47,13 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_put.c	10.38 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)bt_put.c	10.45 (Sleepycat) 5/25/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -75,21 +73,22 @@ static u_int32_t __bam_partsize __P((DBT *, PAGE *, u_int32_t));
  * __bam_put --
  *	Add a new key/data pair or replace an existing pair (btree).
  *
- * PUBLIC: int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
+ * PUBLIC: int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
  */
 int
 __bam_put(argdbp, txn, key, data, flags)
 	DB *argdbp;
 	DB_TXN *txn;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	CURSOR c;
 	DB *dbp;
 	PAGE *h;
 	db_indx_t indx;
-	int exact, iflags, isdeleted, newkey, replace, ret, stack;
+	u_int32_t iitem_flags, insert_flags;
+	int exact, isdeleted, newkey, ret, stack;
 
 	DEBUG_LWRITE(argdbp, txn, "bam_put", key, data, flags);
 
@@ -121,14 +120,13 @@ retry:	/*
 	 * been marked for deletion, we do a replace, otherwise, it has to be
 	 * a set of duplicates, and we simply append a new one to the set.
 	 */
-	isdeleted = replace = 0;
+	isdeleted = 0;
 	if (exact) {
 		if ((ret = __bam_isdeleted(dbp, h, indx, &isdeleted)) != 0)
 			goto err;
-		if (isdeleted) {
-			replace = 1;
+		if (isdeleted)
 			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP);
-		} else
+		else
 			if (flags == DB_NOOVERWRITE) {
 				ret = DB_KEYEXIST;
 				goto err;
@@ -179,42 +177,38 @@ retry:	/*
 				t->bt_csp->page = h = c.page;
 				indx = c.dindx;
 			}
-			iflags = DB_AFTER;
+			insert_flags = DB_AFTER;
 		} else
-			iflags = DB_CURRENT;
+			insert_flags = DB_CURRENT;
 	} else
-		iflags = DB_BEFORE;
+		insert_flags = DB_BEFORE;
 
 	/*
 	 * The pages we're using may be modified by __bam_iitem(), so make
 	 * sure we reset the stack.
 	 */
-	ret = __bam_iitem(dbp,
-	    &h, &indx, key, data, iflags, newkey ? BI_NEWKEY : 0);
+	iitem_flags = 0;
+	if (newkey)
+		iitem_flags |= BI_NEWKEY;
+	if (isdeleted)
+		iitem_flags |= BI_DOINCR;
+	ret = __bam_iitem(dbp, &h, &indx, key, data, insert_flags, iitem_flags);
 	t->bt_csp->page = h;
 	t->bt_csp->indx = indx;
 
 	switch (ret) {
 	case 0:
-		/*
-		 * Done.  Clean up the cursor, and, if we're doing record
-		 * numbers, adjust the internal page counts.
-		 */
-		if (replace)
+		/* Done.  Clean up the cursor. */
+		if (isdeleted)
 			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_SUCCESS);
-
-		if (!replace && F_ISSET(dbp, DB_BT_RECNUM))
-			ret = __bam_adjust(dbp, t, 1);
 		break;
 	case DB_NEEDSPLIT:
 		/*
 		 * We have to split the page.  Back out the cursor setup,
 		 * discard the stack of pages, and do the split.
 		 */
-		if (replace) {
-			replace = 0;
+		if (isdeleted)
 			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
-		}
 
 		(void)__bam_stkrel(dbp);
 		stack = 0;
@@ -225,7 +219,7 @@ retry:	/*
 		goto retry;
 		/* NOTREACHED */
 	default:
-		if (replace)
+		if (isdeleted)
 			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
 		break;
 	}
@@ -393,7 +387,8 @@ __bam_lookup(dbp, key, exactp)
 				for (indx = 0;
 				    indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
 				    h->inp[indx] == h->inp[indx + P_INDX];
-				    indx += P_INDX);
+				    indx += P_INDX)
+					;
 				e.indx = indx;
 			}
 			goto fast;
@@ -427,7 +422,7 @@ slow:	return (__bam_search(dbp, key, S_INSERT, 1, NULL, exactp));
  *	Insert an item into the tree.
  *
  * PUBLIC: int __bam_iitem __P((DB *,
- * PUBLIC:    PAGE **, db_indx_t *, DBT *, DBT *, int, int));
+ * PUBLIC:    PAGE **, db_indx_t *, DBT *, DBT *, u_int32_t, u_int32_t));
  */
 int
 __bam_iitem(dbp, hp, indxp, key, data, op, flags)
@@ -435,13 +430,13 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 	PAGE **hp;
 	db_indx_t *indxp;
 	DBT *key, *data;
-	int op, flags;
+	u_int32_t op, flags;
 {
 	BTREE *t;
 	BKEYDATA *bk;
 	DBT tdbt;
 	PAGE *h;
-	db_indx_t indx;
+	db_indx_t indx, nbytes;
 	u_int32_t data_size, have_bytes, need_bytes, needed;
 	int bigkey, bigdata, dupadjust, replace, ret;
 
@@ -466,12 +461,27 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 			++*indxp;
 
 		/* Remove the current item if it's a DB_CURRENT op. */
-		if (op == DB_CURRENT && (ret = __db_ditem(dbp, *hp, *indxp,
-		    BKEYDATA_SIZE(GET_BKEYDATA(*hp, *indxp)->len))) != 0)
-			return (ret);
+		if (op == DB_CURRENT) {
+			bk = GET_BKEYDATA(*hp, *indxp);
+			switch (B_TYPE(bk->type)) {
+			case B_KEYDATA:
+				nbytes = BKEYDATA_SIZE(bk->len);
+				break;
+			case B_OVERFLOW:
+				nbytes = BOVERFLOW_SIZE;
+				break;
+			default:
+				return (__db_pgfmt(dbp, h->pgno));
+			}
+			if ((ret = __db_ditem(dbp, *hp, *indxp, nbytes)) != 0)
+				return (ret);
+		}
 
 		/* Put the new/replacement item onto the page. */
-		return (__db_dput(dbp, data, hp, indxp, __bam_new));
+		if ((ret = __db_dput(dbp, data, hp, indxp, __bam_new)) != 0)
+			return (ret);
+
+		goto done;
 	}
 
 	/* Handle fixed-length records: build the real record. */
@@ -568,7 +578,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 		case DB_BEFORE:		/* 2. Insert a new key/data pair. */
 			break;
 		default:
-			abort();
+			return (EINVAL);
 		}
 
 		/* Add the key. */
@@ -638,7 +648,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 			replace = 1;
 			break;
 		default:
-			abort();
+			return (EINVAL);
 		}
 	}
 
@@ -666,9 +676,8 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 			return (ret);
 	}
 
-	++t->lstat.bt_added;
-
-	ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
+	if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
+		return (ret);
 
 	/*
 	 * If the page is at least 50% full, and we added a duplicate, see if
@@ -681,9 +690,25 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 			return (ret);
 	}
 
+	/*
+	 * If we've changed the record count, update the tree.  Record counts
+	 * need to be updated in recno databases and in btree databases where
+	 * we are supporting records.  In both cases, adjust the count if the
+	 * operation wasn't performed on the current record or when the caller
+	 * overrides and wants the adjustment made regardless.
+	 */
+done:	if (LF_ISSET(BI_DOINCR) ||
+	    (op != DB_CURRENT &&
+	    (F_ISSET(dbp, DB_BT_RECNUM) || dbp->type == DB_RECNO)))
+		if ((ret = __bam_adjust(dbp, t, 1)) != 0)
+			return (ret);
+
+	/* If we've modified a recno file, set the flag */
 	if (t->bt_recno != NULL)
 		F_SET(t->bt_recno, RECNO_MODIFIED);
 
+	++t->lstat.bt_added;
+
 	return (ret);
 }
 
@@ -1036,8 +1061,8 @@ __bam_partial(dbp, dbt, h, indx, nbytes)
 	BOVERFLOW *bo;
 	DBT copy;
 	u_int32_t len, tlen;
-	int ret;
 	u_int8_t *p;
+	int ret;
 
 	COMPQUIET(bo, NULL);
 
@@ -1065,59 +1090,62 @@ __bam_partial(dbp, dbt, h, indx, nbytes)
 		bk->len = 0;
 	}
 
-	/* We use nul bytes for extending the record, get it over with. */
+	/*
+	 * We use nul bytes for any part of the record that isn't specified,
+	 * get it over with.
+	 */
 	memset(t->bt_rdata.data, 0, nbytes);
 
-	tlen = 0;
 	if (B_TYPE(bk->type) == B_OVERFLOW) {
-		/* Take up to doff bytes from the record. */
+		/*
+		 * In the case of an overflow record, we shift things around
+		 * in the current record rather than allocate a separate copy.
+		 */
 		memset(&copy, 0, sizeof(copy));
 		if ((ret = __db_goff(dbp, &copy, bo->tlen,
 		    bo->pgno, &t->bt_rdata.data, &t->bt_rdata.ulen)) != 0)
 			return (ret);
-		tlen += dbt->doff;
+
+		/* Skip any leading data from the original record. */
+		tlen = dbt->doff;
+		p = (u_int8_t *)t->bt_rdata.data + dbt->doff;
 
 		/*
-		 * If the original record was larger than the offset:
-		 *	If dlen > size, shift the remaining data down.
-		 *	If dlen < size, shift the remaining data up.
+		 * Copy in any trailing data from the original record.
+		 *
+		 * If the original record was larger than the original offset
+		 * plus the bytes being deleted, there is trailing data in the
+		 * original record we need to preserve.  If we aren't deleting
+		 * the same number of bytes as we're inserting, copy it up or
+		 * down, into place.
+		 *
 		 * Use memmove(), the regions may overlap.
 		 */
-		p = t->bt_rdata.data;
-		if (bo->tlen > dbt->doff)
-			if (dbt->dlen > dbt->size) {
-				tlen += len = bo->tlen -
-				    dbt->doff - (dbt->dlen - dbt->size);
-				memmove(p + dbt->doff + dbt->size,
-				    p + dbt->doff + dbt->dlen, len);
-			} else if (dbt->dlen < dbt->size) {
-				tlen += len = bo->tlen -
-				    dbt->doff - (dbt->size - dbt->dlen);
-				memmove(p + dbt->doff + dbt->dlen,
-				    p + dbt->doff + dbt->size, len);
-			} else
-				tlen += bo->tlen - dbt->doff;
+		if (bo->tlen > dbt->doff + dbt->dlen) {
+			len = bo->tlen - (dbt->doff + dbt->dlen);
+			if (dbt->dlen != dbt->size)
+				memmove(p + dbt->size, p + dbt->dlen, len);
+			tlen += len;
+		}
 
-		/* Copy in the user's data. */
-		memcpy((u_int8_t *)t->bt_rdata.data + dbt->doff,
-		    dbt->data, dbt->size);
+		/* Copy in the application provided data. */
+		memcpy(p, dbt->data, dbt->size);
 		tlen += dbt->size;
 	} else {
-		/* Take up to doff bytes from the record. */
+		/* Copy in any leading data from the original record. */
 		memcpy(t->bt_rdata.data,
 		    bk->data, dbt->doff > bk->len ? bk->len : dbt->doff);
-		tlen += dbt->doff;
+		tlen = dbt->doff;
+		p = (u_int8_t *)t->bt_rdata.data + dbt->doff;
 
-		/* Copy in the user's data. */
-		memcpy((u_int8_t *)t->bt_rdata.data +
-		    dbt->doff, dbt->data, dbt->size);
+		/* Copy in the application provided data. */
+		memcpy(p, dbt->data, dbt->size);
 		tlen += dbt->size;
 
-		/* Copy in any remaining data. */
+		/* Copy in any trailing data from the original record. */
 		len = dbt->doff + dbt->dlen;
 		if (bk->len > len) {
-			memcpy((u_int8_t *)t->bt_rdata.data + dbt->doff +
-			    dbt->size, bk->data + len, bk->len - len);
+			memcpy(p + dbt->size, bk->data + len, bk->len - len);
 			tlen += bk->len - len;
 		}
 	}
diff --git a/db2/btree/bt_rec.c b/db2/btree/bt_rec.c
index 90ee13764e..fe33825ec4 100644
--- a/db2/btree/bt_rec.c
+++ b/db2/btree/bt_rec.c
@@ -1,23 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_rec.c	10.18 (Sleepycat) 12/15/97";
+static const char sccsid[] = "@(#)bt_rec.c	10.21 (Sleepycat) 4/28/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
-#include <ctype.h>
 #include <errno.h>
-#include <stddef.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -27,7 +24,6 @@ static const char sccsid[] = "@(#)bt_rec.c	10.18 (Sleepycat) 12/15/97";
 #include "hash.h"
 #include "btree.h"
 #include "log.h"
-#include "db_dispatch.h"
 #include "common_ext.h"
 
 /*
@@ -51,7 +47,7 @@ __bam_pg_alloc_recover(logp, dbtp, lsnp, redo, info)
 	PAGE *pagep;
 	DB *file_dbp, *mdbp;
 	db_pgno_t pgno;
-	int cmp_n, cmp_p, created, modified, ret;
+	int cmp_n, cmp_p, modified, ret;
 
 	REC_PRINT(__bam_pg_alloc_print);
 	REC_INTRO(__bam_pg_alloc_read);
@@ -86,18 +82,17 @@ __bam_pg_alloc_recover(logp, dbtp, lsnp, redo, info)
 	}
 
 	/* Fix up the allocated page. */
-	created = IS_ZERO_LSN(LSN(pagep));
 	modified = 0;
 	cmp_n = log_compare(lsnp, &LSN(pagep));
 	cmp_p = log_compare(&LSN(pagep), &argp->page_lsn);
-	if ((created || cmp_p == 0) && redo) {
+	if (cmp_p == 0 && redo) {
 		/* Need to redo update described. */
 		P_INIT(pagep, file_dbp->pgsize,
 		    argp->pgno, PGNO_INVALID, PGNO_INVALID, 0, argp->ptype);
 
 		pagep->lsn = *lsnp;
 		modified = 1;
-	} else if ((created || cmp_n == 0) && !redo) {
+	} else if (cmp_n == 0 && !redo) {
 		/* Need to undo update described. */
 		P_INIT(pagep, file_dbp->pgsize,
 		    argp->pgno, PGNO_INVALID, meta->free, 0, P_INVALID);
diff --git a/db2/btree/bt_recno.c b/db2/btree/bt_recno.c
index 70ab63b8d4..38dbbd1c55 100644
--- a/db2/btree/bt_recno.c
+++ b/db2/btree/bt_recno.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_recno.c	10.26 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)bt_recno.c	10.37 (Sleepycat) 5/23/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -16,8 +16,6 @@ static const char sccsid[] = "@(#)bt_recno.c	10.26 (Sleepycat) 1/8/98";
 
 #include <errno.h>
 #include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -25,16 +23,17 @@ static const char sccsid[] = "@(#)bt_recno.c	10.26 (Sleepycat) 1/8/98";
 #include "db_page.h"
 #include "btree.h"
 
-static int __ram_add __P((DB *, db_recno_t *, DBT *, int, int));
+static int __ram_add __P((DB *, db_recno_t *, DBT *, u_int32_t, u_int32_t));
 static int __ram_c_close __P((DBC *));
-static int __ram_c_del __P((DBC *, int));
-static int __ram_c_get __P((DBC *, DBT *, DBT *, int));
-static int __ram_c_put __P((DBC *, DBT *, DBT *, int));
+static int __ram_c_del __P((DBC *, u_int32_t));
+static int __ram_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
+static int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
 static int __ram_fmap __P((DB *, db_recno_t));
-static int __ram_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
-static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
+static int __ram_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+static int __ram_iget __P((DB *, DBT *, DBT *));
+static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
 static int __ram_source __P((DB *, RECNO *, const char *));
-static int __ram_sync __P((DB *, int));
+static int __ram_sync __P((DB *, u_int32_t));
 static int __ram_update __P((DB *, db_recno_t, int));
 static int __ram_vmap __P((DB *, db_recno_t));
 static int __ram_writeback __P((DB *));
@@ -142,7 +141,7 @@ __ram_open(dbp, type, dbinfo)
 
 err:	/* If we mmap'd a source file, discard it. */
 	if (rp->re_smap != NULL)
-		(void)__db_unmap(rp->re_smap, rp->re_msize);
+		(void)__db_unmapfile(rp->re_smap, rp->re_msize);
 
 	/* If we opened a source file, discard it. */
 	if (rp->re_fd != -1)
@@ -199,9 +198,9 @@ __ram_cursor(dbp, txn, dbcp)
 	 * All cursors are queued from the master DB structure.  Add the
 	 * cursor to that queue.
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	TAILQ_INSERT_HEAD(&dbp->curs_queue, dbc, links);
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 
 	*dbcp = dbc;
 	return (0);
@@ -216,16 +215,10 @@ __ram_get(argdbp, txn, key, data, flags)
 	DB *argdbp;
 	DB_TXN *txn;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
-	BTREE *t;
 	DB *dbp;
-	PAGE *h;
-	db_indx_t indx;
-	db_recno_t recno;
-	int exact, ret, stack;
-
-	stack = 0;
+	int ret;
 
 	DEBUG_LWRITE(argdbp, txn, "ram_get", key, NULL, flags);
 
@@ -234,6 +227,30 @@ __ram_get(argdbp, txn, key, data, flags)
 		return (ret);
 
 	GETHANDLE(argdbp, txn, &dbp, ret);
+
+	ret = __ram_iget(dbp, key, data);
+
+	PUTHANDLE(dbp);
+	return (ret);
+}
+
+/*
+ * __ram_iget --
+ *	Internal ram get function, called for both standard and cursor
+ *	get after the flags have been checked.
+ */
+static int
+__ram_iget(dbp, key, data)
+	DB *dbp;
+	DBT *key, *data;
+{
+	BTREE *t;
+	PAGE *h;
+	db_indx_t indx;
+	db_recno_t recno;
+	int exact, ret, stack;
+
+	stack = 0;
 	t = dbp->internal;
 
 	/* Check the user's record number and fill in as necessary. */
@@ -265,7 +282,6 @@ done:	/* Discard the stack. */
 	if (stack)
 		__bam_stkrel(dbp);
 
-	PUTHANDLE(dbp);
 	return (ret);
 }
 
@@ -278,7 +294,7 @@ __ram_put(argdbp, txn, key, data, flags)
 	DB *argdbp;
 	DB_TXN *txn;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	DB *dbp;
@@ -324,7 +340,7 @@ __ram_put(argdbp, txn, key, data, flags)
 static int
 __ram_sync(argdbp, flags)
 	DB *argdbp;
-	int flags;
+	u_int32_t flags;
 {
 	DB *dbp;
 	int ret;
@@ -361,7 +377,7 @@ __ram_close(argdbp)
 
 	/* Close any underlying mmap region. */
 	if (rp->re_smap != NULL)
-		(void)__db_unmap(rp->re_smap, rp->re_msize);
+		(void)__db_unmapfile(rp->re_smap, rp->re_msize);
 
 	/* Close any backing source file descriptor. */
 	if (rp->re_fd != -1)
@@ -403,17 +419,10 @@ __ram_c_iclose(dbp, dbc)
 	DB *dbp;
 	DBC *dbc;
 {
-	/*
-	 * All cursors are queued from the master DB structure.  For
-	 * now, discard the DB handle which triggered this call, and
-	 * replace it with the cursor's reference.
-	 */
-	dbp = dbc->dbp;
-
 	/* Remove the cursor from the queue. */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	TAILQ_REMOVE(&dbp->curs_queue, dbc, links);
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 
 	/* Discard the structures. */
 	FREE(dbc->internal, sizeof(RCURSOR));
@@ -429,7 +438,7 @@ __ram_c_iclose(dbp, dbc)
 static int
 __ram_c_del(dbc, flags)
 	DBC *dbc;
-	int flags;
+	u_int32_t flags;
 {
 	DBT key;
 	RCURSOR *cp;
@@ -466,7 +475,7 @@ static int
 __ram_c_get(dbc, key, data, flags)
 	DBC *dbc;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	DB *dbp;
@@ -537,7 +546,7 @@ retry:	/* Update the record number. */
 
 	/*
 	 * Return the key if the user didn't give us one, and then pass it
-	 * into __ram_get().
+	 * into __ram_iget().
 	 */
 	if (flags != DB_SET && flags != DB_SET_RANGE &&
 	    (ret = __db_retcopy(key, &cp->recno, sizeof(cp->recno),
@@ -555,7 +564,7 @@ retry:	/* Update the record number. */
 	 *
 	 * Skip any keys that don't really exist.
 	 */
-	if ((ret = __ram_get(dbp, dbc->txn, key, data, 0)) != 0)
+	if ((ret = __ram_iget(dbp, key, data)) != 0)
 		if (ret == DB_KEYEMPTY &&
 		    (flags == DB_NEXT || flags == DB_PREV))
 			goto retry;
@@ -575,7 +584,7 @@ static int
 __ram_c_put(dbc, key, data, flags)
 	DBC *dbc;
 	DBT *key, *data;
-	int flags;
+	u_int32_t flags;
 {
 	BTREE *t;
 	RCURSOR *cp, copy;
@@ -624,28 +633,21 @@ split:		arg = &cp->recno;
 	if ((ret = __bam_stkrel(dbp)) != 0)
 		goto err;
 
-	if (flags != DB_CURRENT) {
-		/* Adjust the counts. */
-		if ((ret = __bam_adjust(dbp, t, 1)) != 0)
-			goto err;
-
-		switch (flags) {
-		case DB_AFTER:
-			/* Adjust the cursors. */
-			__ram_ca(dbp, cp->recno, CA_IAFTER);
-
-			/* Set this cursor to reference the new record. */
-			cp->recno = copy.recno + 1;
-			break;
-		case DB_BEFORE:
-			/* Adjust the cursors. */
-			__ram_ca(dbp, cp->recno, CA_IBEFORE);
+	switch (flags) {
+	case DB_AFTER:
+		/* Adjust the cursors. */
+		__ram_ca(dbp, cp->recno, CA_IAFTER);
 
-			/* Set this cursor to reference the new record. */
-			cp->recno = copy.recno;
-			break;
-		}
+		/* Set this cursor to reference the new record. */
+		cp->recno = copy.recno + 1;
+		break;
+	case DB_BEFORE:
+		/* Adjust the cursors. */
+		__ram_ca(dbp, cp->recno, CA_IBEFORE);
 
+		/* Set this cursor to reference the new record. */
+		cp->recno = copy.recno;
+		break;
 	}
 
 	/*
@@ -679,7 +681,7 @@ __ram_ca(dbp, recno, op)
 	/*
 	 * Adjust the cursors.  See the comment in __bam_ca_delete().
 	 */
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (RCURSOR *)dbc->internal;
@@ -698,7 +700,7 @@ __ram_ca(dbp, recno, op)
 			break;
 		}
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
 }
 
 #ifdef DEBUG
@@ -715,14 +717,15 @@ __ram_cprint(dbp)
 	DBC *dbc;
 	RCURSOR *cp;
 
-	DB_THREAD_LOCK(dbp);
+	CURSOR_SETUP(dbp);
 	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (RCURSOR *)dbc->internal;
 		fprintf(stderr,
 		    "%#0x: recno: %lu\n", (u_int)cp, (u_long)cp->recno);
 	}
-	DB_THREAD_UNLOCK(dbp);
+	CURSOR_TEARDOWN(dbp);
+
 	return (0);
 }
 #endif /* DEBUG */
@@ -853,11 +856,11 @@ __ram_source(dbp, rp, fname)
 	const char *fname;
 {
 	size_t size;
-	u_int32_t mbytes, bytes;
-	int oflags, ret;
+	u_int32_t bytes, mbytes, oflags;
+	int ret;
 
 	if ((ret = __db_appname(dbp->dbenv,
-	    DB_APP_DATA, NULL, fname, NULL, &rp->re_source)) != 0)
+	    DB_APP_DATA, NULL, fname, 0, NULL, &rp->re_source)) != 0)
 		return (ret);
 
 	oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0;
@@ -886,7 +889,8 @@ __ram_source(dbp, rp, fname)
 	}
 
 	size = mbytes * MEGABYTE + bytes;
-	if ((ret = __db_map(rp->re_fd, (size_t)size, 1, 1, &rp->re_smap)) != 0)
+	if ((ret = __db_mapfile(rp->re_source,
+	    rp->re_fd, (size_t)size, 1, &rp->re_smap)) != 0)
 		goto err;
 	rp->re_cmap = rp->re_smap;
 	rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size);
@@ -952,7 +956,7 @@ __ram_writeback(dbp)
 	 * open will fail.
 	 */
 	if (rp->re_smap != NULL) {
-		(void)__db_unmap(rp->re_smap, rp->re_msize);
+		(void)__db_unmapfile(rp->re_smap, rp->re_msize);
 		rp->re_smap = NULL;
 	}
 
@@ -1078,19 +1082,22 @@ __ram_fmap(dbp, top)
 
 	sp = (u_int8_t *)rp->re_cmap;
 	ep = (u_int8_t *)rp->re_emap;
-	while (recno <= top) {
+	while (recno < top) {
 		if (sp >= ep) {
 			F_SET(rp, RECNO_EOF);
 			return (DB_NOTFOUND);
 		}
 		len = rp->re_len;
 		for (p = t->bt_rdata.data;
-		    sp < ep && len > 0; *p++ = *sp++, --len);
+		    sp < ep && len > 0; *p++ = *sp++, --len)
+			;
 
 		/*
-		 * Another process may have read some portion of the input
-		 * file already, in which case we just want to discard the
-		 * new record.
+		 * Another process may have read this record from the input
+		 * file and stored it into the database already, in which
+		 * case we don't need to repeat that operation.  We detect
+		 * this by checking if the last record we've read is greater
+		 * or equal to the number of records in the database.
 		 *
 		 * XXX
 		 * We should just do a seek, since the records are fixed
@@ -1138,17 +1145,20 @@ __ram_vmap(dbp, top)
 
 	sp = (u_int8_t *)rp->re_cmap;
 	ep = (u_int8_t *)rp->re_emap;
-	while (recno <= top) {
+	while (recno < top) {
 		if (sp >= ep) {
 			F_SET(rp, RECNO_EOF);
 			return (DB_NOTFOUND);
 		}
-		for (data.data = sp; sp < ep && *sp != delim; ++sp);
+		for (data.data = sp; sp < ep && *sp != delim; ++sp)
+			;
 
 		/*
-		 * Another process may have read some portion of the input
-		 * file already, in which case we just want to discard the
-		 * new record.
+		 * Another process may have read this record from the input
+		 * file and stored it into the database already, in which
+		 * case we don't need to repeat that operation.  We detect
+		 * this by checking if the last record we've read is greater
+		 * or equal to the number of records in the database.
 		 */
 		if (rp->re_last >= recno) {
 			data.size = sp - (u_int8_t *)data.data;
@@ -1172,12 +1182,13 @@ __ram_add(dbp, recnop, data, flags, bi_flags)
 	DB *dbp;
 	db_recno_t *recnop;
 	DBT *data;
-	int flags, bi_flags;
+	u_int32_t flags, bi_flags;
 {
+	BKEYDATA *bk;
 	BTREE *t;
 	PAGE *h;
 	db_indx_t indx;
-	int exact, ret, stack;
+	int exact, isdeleted, ret, stack;
 
 	t = dbp->internal;
 
@@ -1190,34 +1201,63 @@ retry:	/* Find the slot for insertion. */
 	stack = 1;
 
 	/*
-	 * The recno access method doesn't currently support duplicates, so
-	 * if an identical key is already in the tree we're either overwriting
-	 * it or an error is returned.
+	 * If DB_NOOVERWRITE is set and the item already exists in the tree,
+	 * return an error unless the item has been marked for deletion.
 	 */
-	if (exact && LF_ISSET(DB_NOOVERWRITE)) {
-		ret = DB_KEYEXIST;
-		goto err;
+	isdeleted = 0;
+	if (exact) {
+		bk = GET_BKEYDATA(h, indx);
+		if (B_DISSET(bk->type)) {
+			isdeleted = 1;
+			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP);
+		} else
+			if (LF_ISSET(DB_NOOVERWRITE)) {
+				ret = DB_KEYEXIST;
+				goto err;
+			}
 	}
 
 	/*
 	 * Select the arguments for __bam_iitem() and do the insert.  If the
 	 * key is an exact match, or we're replacing the data item with a
-	 * new data item.  If the key isn't an exact match, we're inserting
-	 * a new key/data pair, before the search location.
+	 * new data item, replace the current item.  If the key isn't an exact
+	 * match, we're inserting a new key/data pair, before the search
+	 * location.
 	 */
-	if ((ret = __bam_iitem(dbp, &h, &indx, NULL,
-	    data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) == DB_NEEDSPLIT) {
+	switch (ret = __bam_iitem(dbp,
+	    &h, &indx, NULL, data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) {
+	case 0:
+		/*
+		 * Done.  Clean up the cursor and adjust the internal page
+		 * counts.
+		 */
+		if (isdeleted)
+			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_SUCCESS);
+		break;
+	case DB_NEEDSPLIT:
+		/*
+		 * We have to split the page.  Back out the cursor setup,
+		 * discard the stack of pages, and do the split.
+		 */
+		if (isdeleted)
+			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
+
 		(void)__bam_stkrel(dbp);
 		stack = 0;
+
 		if ((ret = __bam_split(dbp, recnop)) != 0)
-			goto err;
+			break;
+
 		goto retry;
+		/* NOTREACHED */
+	default:
+		if (isdeleted)
+			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
+		break;
 	}
 
-	if (!exact && ret == 0)
-		__bam_adjust(dbp, t, 1);
-
 err:	if (stack)
 		__bam_stkrel(dbp);
+
 	return (ret);
 }
diff --git a/db2/btree/bt_rsearch.c b/db2/btree/bt_rsearch.c
index ee26221e25..caa6b3515e 100644
--- a/db2/btree/bt_rsearch.c
+++ b/db2/btree/bt_rsearch.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -44,14 +44,11 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_rsearch.c	10.8 (Sleepycat) 8/24/97";
+static const char sccsid[] = "@(#)bt_rsearch.c	10.15 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-
-#include <stdio.h>
-#include <stdlib.h>
 #endif
 
 #include "db_int.h"
@@ -62,13 +59,13 @@ static const char sccsid[] = "@(#)bt_rsearch.c	10.8 (Sleepycat) 8/24/97";
  * __bam_rsearch --
  *	Search a btree for a record number.
  *
- * PUBLIC: int __bam_rsearch __P((DB *, db_recno_t *, u_int, int, int *));
+ * PUBLIC: int __bam_rsearch __P((DB *, db_recno_t *, u_int32_t, int, int *));
  */
 int
 __bam_rsearch(dbp, recnop, flags, stop, exactp)
 	DB *dbp;
 	db_recno_t *recnop;
-	u_int flags;
+	u_int32_t flags;
 	int stop, *exactp;
 {
 	BINTERNAL *bi;
@@ -78,7 +75,7 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
 	RINTERNAL *ri;
 	db_indx_t indx, top;
 	db_pgno_t pg;
-	db_recno_t recno, total;
+	db_recno_t i, recno, total;
 	int isappend, ret, stack;
 
 	t = dbp->internal;
@@ -136,8 +133,7 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
 			*exactp = 1;
 		else {
 			*exactp = 0;
-			if (flags == S_DELETE ||
-			    flags == S_FIND || recno > total + 1) {
+			if (!PAST_END_OK(flags) || recno > total + 1) {
 				(void)memp_fput(dbp->mpf, h, 0);
 				(void)__BT_LPUT(dbp, lock);
 				return (DB_NOTFOUND);
@@ -164,30 +160,65 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp)
 		stack = 1;
 	}
 
-	/* Records in the tree are 0-based, and record numbers are 1-based. */
-	--recno;
-
+	/*
+	 * !!!
+	 * Record numbers in the tree are 0-based, but the recno is
+	 * 1-based.  All of the calculations below have to take this
+	 * into account.
+	 */
 	for (total = 0;;) {
 		switch (TYPE(h)) {
 		case P_LBTREE:
-			BT_STK_ENTER(t, h, (recno - total) * P_INDX, lock, ret);
+			recno -= total;
+
+			/*
+			 * There may be logically deleted records on the page,
+			 * walk the page correcting for them.  The record may
+			 * not exist if there are enough deleted records in the
+			 * page.
+			 */
+			if (recno <= NUM_ENT(h))
+				for (i = recno - 1;; --i) {
+					if (B_DISSET(GET_BKEYDATA(h,
+					    i * P_INDX + O_INDX)->type))
+						++recno;
+					if (i == 0)
+						break;
+				}
+			if (recno > NUM_ENT(h)) {
+				*exactp = 0;
+				if (!PAST_END_OK(flags) ||
+				    recno > (db_recno_t)(NUM_ENT(h) + 1)) {
+					ret = DB_NOTFOUND;
+					goto err;
+				}
+
+			}
+
+			/* Correct from 1-based to 0-based for a page offset. */
+			--recno;
+			BT_STK_ENTER(t, h, recno * P_INDX, lock, ret);
 			return (ret);
 		case P_IBTREE:
 			for (indx = 0, top = NUM_ENT(h);;) {
 				bi = GET_BINTERNAL(h, indx);
-				if (++indx == top || total + bi->nrecs > recno)
+				if (++indx == top || total + bi->nrecs >= recno)
 					break;
 				total += bi->nrecs;
 			}
 			pg = bi->pgno;
 			break;
 		case P_LRECNO:
-			BT_STK_ENTER(t, h, recno - total, lock, ret);
+			recno -= total;
+
+			/* Correct from 1-based to 0-based for a page offset. */
+			--recno;
+			BT_STK_ENTER(t, h, recno, lock, ret);
 			return (ret);
 		case P_IRECNO:
 			for (indx = 0, top = NUM_ENT(h);;) {
 				ri = GET_RINTERNAL(h, indx);
-				if (++indx == top || total + ri->nrecs > recno)
+				if (++indx == top || total + ri->nrecs >= recno)
 					break;
 				total += ri->nrecs;
 			}
@@ -244,13 +275,13 @@ err:	BT_STK_POP(t);
  * __bam_adjust --
  *	Adjust the tree after adding or deleting a record.
  *
- * PUBLIC: int __bam_adjust __P((DB *, BTREE *, int));
+ * PUBLIC: int __bam_adjust __P((DB *, BTREE *, int32_t));
  */
 int
 __bam_adjust(dbp, t, adjust)
 	DB *dbp;
 	BTREE *t;
-	int adjust;
+	int32_t adjust;
 {
 	EPG *epg;
 	PAGE *h;
@@ -264,7 +295,7 @@ __bam_adjust(dbp, t, adjust)
 			    (ret = __bam_cadjust_log(dbp->dbenv->lg_info,
 			    dbp->txn, &LSN(h), 0, dbp->log_fileid,
 			    PGNO(h), &LSN(h), (u_int32_t)epg->indx,
-			    (int32_t)adjust, 1)) != 0)
+			    adjust, 1)) != 0)
 				return (ret);
 
 			if (TYPE(h) == P_IBTREE)
@@ -322,26 +353,31 @@ db_recno_t
 __bam_total(h)
 	PAGE *h;
 {
-	db_recno_t recs;
-	db_indx_t nxt, top;
+	db_recno_t nrecs;
+	db_indx_t indx, top;
+
+	nrecs = 0;
+	top = NUM_ENT(h);
 
 	switch (TYPE(h)) {
 	case P_LBTREE:
-		recs = NUM_ENT(h) / 2;
+		/* Check for logically deleted records. */
+		for (indx = 0; indx < top; indx += P_INDX)
+			if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type))
+				++nrecs;
 		break;
 	case P_IBTREE:
-		for (recs = 0, nxt = 0, top = NUM_ENT(h); nxt < top; ++nxt)
-			recs += GET_BINTERNAL(h, nxt)->nrecs;
+		for (indx = 0; indx < top; indx += O_INDX)
+			nrecs += GET_BINTERNAL(h, indx)->nrecs;
 		break;
 	case P_LRECNO:
-		recs = NUM_ENT(h);
+		nrecs = NUM_ENT(h);
 		break;
 	case P_IRECNO:
-		for (recs = 0, nxt = 0, top = NUM_ENT(h); nxt < top; ++nxt)
-			recs += GET_RINTERNAL(h, nxt)->nrecs;
+		for (indx = 0; indx < top; indx += O_INDX)
+			nrecs += GET_RINTERNAL(h, indx)->nrecs;
 		break;
-	default:
-		abort();
 	}
-	return (recs);
+
+	return (nrecs);
 }
diff --git a/db2/btree/bt_search.c b/db2/btree/bt_search.c
index c39c9af322..09ce46d90a 100644
--- a/db2/btree/bt_search.c
+++ b/db2/btree/bt_search.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,15 +47,13 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_search.c	10.9 (Sleepycat) 11/18/97";
+static const char sccsid[] = "@(#)bt_search.c	10.15 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -68,13 +66,13 @@ static const char sccsid[] = "@(#)bt_search.c	10.9 (Sleepycat) 11/18/97";
  *	Search a btree for a key.
  *
  * PUBLIC: int __bam_search __P((DB *,
- * PUBLIC:     const DBT *, u_int, int, db_recno_t *, int *));
+ * PUBLIC:     const DBT *, u_int32_t, int, db_recno_t *, int *));
  */
 int
 __bam_search(dbp, key, flags, stop, recnop, exactp)
 	DB *dbp;
 	const DBT *key;
-	u_int flags;
+	u_int32_t flags;
 	int stop, *exactp;
 	db_recno_t *recnop;
 {
@@ -109,8 +107,7 @@ __bam_search(dbp, key, flags, stop, recnop, exactp)
 	 * Retrieve the root page.
 	 */
 	pg = PGNO_ROOT;
-	stack = F_ISSET(dbp, DB_BT_RECNUM) &&
-	    (flags == S_INSERT || flags == S_DELETE);
+	stack = F_ISSET(dbp, DB_BT_RECNUM) && LF_ISSET(S_STACK);
 	if ((ret = __bam_lget(dbp,
 	    0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
 		return (ret);
@@ -179,6 +176,14 @@ __bam_search(dbp, key, flags, stop, recnop, exactp)
 			if (LF_ISSET(S_EXACT))
 				goto notfound;
 
+			/*
+			 * !!!
+			 * Possibly returning a deleted record -- DB_SET_RANGE,
+			 * DB_KEYFIRST and DB_KEYLAST don't require an exact
+			 * match, and we don't want to walk multiple pages here
+			 * to find an undeleted record.  This is handled in the
+			 * __bam_c_search() routine.
+			 */
 			BT_STK_ENTER(t, h, base, lock, ret);
 			return (ret);
 		}
@@ -249,7 +254,10 @@ match:	*exactp = 1;
 	/*
 	 * If we got here, we know that we have a btree leaf page.
 	 *
-	 * If there are duplicates, go to the first/last one.
+	 * If there are duplicates, go to the first/last one.  This is
+	 * safe because we know that we're not going to leave the page,
+	 * all duplicate sets that are not on overflow pages exist on a
+	 * single leaf page.
 	 */
 	if (LF_ISSET(S_DUPLAST))
 		while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
@@ -261,8 +269,8 @@ match:	*exactp = 1;
 			indx -= P_INDX;
 
 	/*
-	 * Now check if we are allowed to return deleted item; if not
-	 * find/last the first non-deleted item.
+	 * Now check if we are allowed to return deleted items; if not
+	 * find the next (or previous) non-deleted item.
 	 */
 	if (LF_ISSET(S_DELNO)) {
 		if (LF_ISSET(S_DUPLAST))
diff --git a/db2/btree/bt_split.c b/db2/btree/bt_split.c
index 219d486dc5..da9417c781 100644
--- a/db2/btree/bt_split.c
+++ b/db2/btree/bt_split.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -44,7 +44,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_split.c	10.18 (Sleepycat) 11/23/97";
+static const char sccsid[] = "@(#)bt_split.c	10.23 (Sleepycat) 5/23/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -52,8 +52,6 @@ static const char sccsid[] = "@(#)bt_split.c	10.18 (Sleepycat) 11/23/97";
 
 #include <errno.h>
 #include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -168,8 +166,10 @@ __bam_root(dbp, cp)
 	t = dbp->internal;
 
 	/* Yeah, right. */
-	if (cp->page->level >= MAXBTREELEVEL)
-		return (ENOSPC);
+	if (cp->page->level >= MAXBTREELEVEL) {
+		ret = ENOSPC;
+		goto err;
+	}
 
 	/* Create new left and right pages for the split. */
 	lp = rp = NULL;
@@ -237,18 +237,16 @@ __bam_page(dbp, pp, cp)
 	DB *dbp;
 	EPG *pp, *cp;
 {
-	BTREE *t;
 	DB_LOCK tplock;
 	PAGE *lp, *rp, *tp;
 	int ret;
 
-	t = dbp->internal;
 	lp = rp = tp = NULL;
 	ret = -1;
 
 	/* Create new right page for the split. */
 	if ((ret = __bam_new(dbp, TYPE(cp->page), &rp)) != 0)
-		return (ret);
+		goto err;
 	P_INIT(rp, dbp->pgsize, rp->pgno,
 	    ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->pgno,
 	    ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->next_pgno,
@@ -259,7 +257,7 @@ __bam_page(dbp, pp, cp)
 		ret = ENOMEM;
 		goto err;
 	}
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	memset(lp, 0xff, dbp->pgsize);
 #endif
 	P_INIT(lp, dbp->pgsize, cp->page->pgno,
@@ -906,13 +904,13 @@ __bam_copy(dbp, pp, cp, nxt, stop)
 	PAGE *pp, *cp;
 	u_int32_t nxt, stop;
 {
-	db_indx_t dup, nbytes, off;
+	db_indx_t nbytes, off;
 
 	/*
 	 * Copy the rest of the data to the right page.  Nxt is the next
 	 * offset placed on the target page.
 	 */
-	for (dup = off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) {
+	for (off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) {
 		switch (TYPE(pp)) {
 		case P_IBTREE:
 			if (B_TYPE(GET_BINTERNAL(pp, nxt)->type) == B_KEYDATA)
diff --git a/db2/btree/bt_stat.c b/db2/btree/bt_stat.c
index e88b5dac2d..2236434b38 100644
--- a/db2/btree/bt_stat.c
+++ b/db2/btree/bt_stat.c
@@ -1,21 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_stat.c	10.14 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)bt_stat.c	10.17 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -29,14 +28,14 @@ static void __bam_add_rstat __P((DB_BTREE_LSTAT *, DB_BTREE_STAT *));
  * __bam_stat --
  *	Gather/print the btree statistics
  *
- * PUBLIC: int __bam_stat __P((DB *, void *, void *(*)(size_t), int));
+ * PUBLIC: int __bam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t));
  */
 int
 __bam_stat(argdbp, spp, db_malloc, flags)
 	DB *argdbp;
 	void *spp;
 	void *(*db_malloc) __P((size_t));
-	int flags;
+	u_int32_t flags;
 {
 	BTMETA *meta;
 	BTREE *t;
diff --git a/db2/btree/btree.src b/db2/btree/btree.src
index 6145696d28..928dce2196 100644
--- a/db2/btree/btree.src
+++ b/db2/btree/btree.src
@@ -1,16 +1,12 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
+ *
+ *	@(#)btree.src	10.8 (Sleepycat) 4/10/98
  */
 
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)btree.src	10.6 (Sleepycat) 11/2/97";
-#endif /* not lint */
-
 PREFIX	bam
 
 /*
diff --git a/db2/btree/btree_auto.c b/db2/btree/btree_auto.c
index 18bbd5db37..75eadb1d62 100644
--- a/db2/btree/btree_auto.c
+++ b/db2/btree/btree_auto.c
@@ -15,8 +15,6 @@
 #include "db_dispatch.h"
 #include "btree.h"
 #include "db_am.h"
-#include "common_ext.h"
-
 /*
  * PUBLIC: int __bam_pg_alloc_log
  * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
@@ -85,7 +83,7 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(ptype);
 	memcpy(bp, &next, sizeof(next));
 	bp += sizeof(next);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -101,22 +99,23 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_pg_alloc_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_pg_alloc_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_pg_alloc_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_pg_alloc_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -249,7 +248,7 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags,
 	}
 	memcpy(bp, &next, sizeof(next));
 	bp += sizeof(next);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -265,22 +264,23 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_pg_free_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_pg_free_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_pg_free_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_pg_free_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -297,11 +297,11 @@ __bam_pg_free_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
 	printf("\theader: ");
 	for (i = 0; i < argp->header.size; i++) {
-		c = ((char *)argp->header.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->header.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tnext: %lu\n", (u_long)argp->next);
@@ -443,7 +443,7 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags,
 		memcpy(bp, pg->data, pg->size);
 		bp += pg->size;
 	}
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -459,22 +459,23 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_split_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_split_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_split_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_split_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -498,11 +499,11 @@ __bam_split_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->nlsn.file, (u_long)argp->nlsn.offset);
 	printf("\tpg: ");
 	for (i = 0; i < argp->pg.size; i++) {
-		c = ((char *)argp->pg.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->pg.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\n");
@@ -639,7 +640,7 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*rootlsn));
 	bp += sizeof(*rootlsn);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -655,22 +656,23 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_rsplit_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_rsplit_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_rsplit_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -685,21 +687,21 @@ __bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tpgno: %lu\n", (u_long)argp->pgno);
 	printf("\tpgdbt: ");
 	for (i = 0; i < argp->pgdbt.size; i++) {
-		c = ((char *)argp->pgdbt.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->pgdbt.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tnrec: %lu\n", (u_long)argp->nrec);
 	printf("\trootent: ");
 	for (i = 0; i < argp->rootent.size; i++) {
-		c = ((char *)argp->rootent.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->rootent.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\trootlsn: [%lu][%lu]\n",
@@ -817,7 +819,7 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(indx_copy);
 	memcpy(bp, &is_insert, sizeof(is_insert));
 	bp += sizeof(is_insert);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -833,22 +835,23 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_adj_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_adj_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_adj_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_adj_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -975,7 +978,7 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(adjust);
 	memcpy(bp, &total, sizeof(total));
 	bp += sizeof(total);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -991,22 +994,23 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_cadjust_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_cadjust_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_cadjust_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_cadjust_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -1124,7 +1128,7 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(*lsn);
 	memcpy(bp, &indx, sizeof(indx));
 	bp += sizeof(indx);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -1140,22 +1144,23 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_cdel_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_cdel_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_cdel_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_cdel_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -1307,7 +1312,7 @@ int __bam_repl_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(prefix);
 	memcpy(bp, &suffix, sizeof(suffix));
 	bp += sizeof(suffix);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -1323,22 +1328,23 @@ int __bam_repl_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__bam_repl_print(notused1, dbtp, lsnp, notused3, notused4)
+__bam_repl_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__bam_repl_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __bam_repl_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -1357,20 +1363,20 @@ __bam_repl_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tisdeleted: %lu\n", (u_long)argp->isdeleted);
 	printf("\torig: ");
 	for (i = 0; i < argp->orig.size; i++) {
-		c = ((char *)argp->orig.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->orig.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\trepl: ");
 	for (i = 0; i < argp->repl.size; i++) {
-		c = ((char *)argp->repl.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->repl.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tprefix: %lu\n", (u_long)argp->prefix);
diff --git a/db2/clib/getlong.c b/db2/clib/getlong.c
index 85f4e8c9e2..4e144b14dc 100644
--- a/db2/clib/getlong.c
+++ b/db2/clib/getlong.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)getlong.c	10.2 (Sleepycat) 5/1/97";
+static const char sccsid[] = "@(#)getlong.c	10.3 (Sleepycat) 4/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
diff --git a/db2/common/db_appinit.c b/db2/common/db_appinit.c
index 4ee9e4f40c..6ec007be0a 100644
--- a/db2/common/db_appinit.c
+++ b/db2/common/db_appinit.c
@@ -1,23 +1,21 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_appinit.c	10.38 (Sleepycat) 1/7/98";
+static const char sccsid[] = "@(#)db_appinit.c	10.52 (Sleepycat) 6/2/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
-#include <sys/param.h>
-#include <sys/stat.h>
+#include <sys/types.h>
 
 #include <ctype.h>
 #include <errno.h>
-#include <fcntl.h>
 #include <signal.h>
 #include <stdlib.h>
 #include <string.h>
@@ -34,14 +32,14 @@ static const char sccsid[] = "@(#)db_appinit.c	10.38 (Sleepycat) 1/7/98";
 #include "clib_ext.h"
 #include "common_ext.h"
 
-static int __db_home __P((DB_ENV *, const char *, int));
+static int __db_home __P((DB_ENV *, const char *, u_int32_t));
 static int __db_parse __P((DB_ENV *, char *));
-static int __db_tmp_dir __P((DB_ENV *, int));
-static int __db_tmp_open __P((DB_ENV *, char *, int *));
+static int __db_tmp_dir __P((DB_ENV *, u_int32_t));
+static int __db_tmp_open __P((DB_ENV *, u_int32_t, char *, int *));
 
 /*
  * db_version --
- *	Return verision information.
+ *	Return version information.
  */
 char *
 db_version(majverp, minverp, patchp)
@@ -65,16 +63,18 @@ db_appinit(db_home, db_config, dbenv, flags)
 	const char *db_home;
 	char * const *db_config;
 	DB_ENV *dbenv;
-	int flags;
+	u_int32_t flags;
 {
 	FILE *fp;
-	int ret;
+	int mode, ret;
 	char * const *p;
 	char *lp, buf[MAXPATHLEN * 2];
 
 	/* Validate arguments. */
 	if (dbenv == NULL)
 		return (EINVAL);
+
+
 #ifdef HAVE_SPINLOCKS
 #define	OKFLAGS								\
    (DB_CREATE | DB_NOMMAP | DB_THREAD | DB_INIT_LOCK | DB_INIT_LOG |	\
@@ -89,10 +89,9 @@ db_appinit(db_home, db_config, dbenv, flags)
 	if ((ret = __db_fchk(dbenv, "db_appinit", flags, OKFLAGS)) != 0)
 		return (ret);
 
-#define	RECOVERY_FLAGS (DB_CREATE | DB_INIT_TXN | DB_INIT_LOG)
-	if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) &&
-	    LF_ISSET(RECOVERY_FLAGS) != RECOVERY_FLAGS)
-		return (__db_ferr(dbenv, "db_appinit", 1));
+	/* Transactions imply logging. */
+	if (LF_ISSET(DB_INIT_TXN))
+		LF_SET(DB_INIT_LOG);
 
 	/* Convert the db_appinit(3) flags. */
 	if (LF_ISSET(DB_THREAD))
@@ -147,47 +146,48 @@ db_appinit(db_home, db_config, dbenv, flags)
 	F_SET(dbenv, DB_ENV_APPINIT);
 
 	/*
-	 * If we are doing recovery, remove all the regions.
+	 * If we are doing recovery, remove all the old shared memory
+	 * regions.
 	 */
 	if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) {
-		/* Remove all the old shared memory regions.  */
-		if ((ret = log_unlink(NULL, 1 /* force */, dbenv)) != 0)
+		if ((ret = log_unlink(NULL, 1, dbenv)) != 0)
 			goto err;
-		if ((ret = memp_unlink(NULL, 1 /* force */, dbenv)) != 0)
+		if ((ret = memp_unlink(NULL, 1, dbenv)) != 0)
 			goto err;
-		if ((ret = lock_unlink(NULL, 1 /* force */, dbenv)) != 0)
+		if ((ret = lock_unlink(NULL, 1, dbenv)) != 0)
 			goto err;
-		if ((ret = txn_unlink(NULL, 1 /* force */, dbenv)) != 0)
+		if ((ret = txn_unlink(NULL, 1, dbenv)) != 0)
 			goto err;
 	}
 
-	/* Transactions imply logging. */
-	if (LF_ISSET(DB_INIT_TXN))
-		LF_SET(DB_INIT_LOG);
-
-	/* Default permissions are 0660. */
-#undef	DB_DEFPERM
-#define	DB_DEFPERM	(S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)
-
-	/* Initialize the subsystems. */
+	/*
+	 * Create the new shared regions.
+	 *
+	 * Default permissions are read-write for both owner and group.
+	 */
+	mode = __db_omode("rwrw--");
 	if (LF_ISSET(DB_INIT_LOCK) && (ret = lock_open(NULL,
 	    LF_ISSET(DB_CREATE | DB_THREAD),
-	    DB_DEFPERM, dbenv, &dbenv->lk_info)) != 0)
+	    mode, dbenv, &dbenv->lk_info)) != 0)
 		goto err;
 	if (LF_ISSET(DB_INIT_LOG) && (ret = log_open(NULL,
 	    LF_ISSET(DB_CREATE | DB_THREAD),
-	    DB_DEFPERM, dbenv, &dbenv->lg_info)) != 0)
+	    mode, dbenv, &dbenv->lg_info)) != 0)
 		goto err;
 	if (LF_ISSET(DB_INIT_MPOOL) && (ret = memp_open(NULL,
 	    LF_ISSET(DB_CREATE | DB_MPOOL_PRIVATE | DB_NOMMAP | DB_THREAD),
-	    DB_DEFPERM, dbenv, &dbenv->mp_info)) != 0)
+	    mode, dbenv, &dbenv->mp_info)) != 0)
 		goto err;
 	if (LF_ISSET(DB_INIT_TXN) && (ret = txn_open(NULL,
 	    LF_ISSET(DB_CREATE | DB_THREAD | DB_TXN_NOSYNC),
-	    DB_DEFPERM, dbenv, &dbenv->tx_info)) != 0)
+	    mode, dbenv, &dbenv->tx_info)) != 0)
 		goto err;
 
-	/* Initialize recovery. */
+	/*
+	 * If the application is running with transactions, initialize the
+	 * function tables.  Once that's done, do recovery for any previous
+	 * run.
+	 */
 	if (LF_ISSET(DB_INIT_TXN)) {
 		if ((ret = __bam_init_recover(dbenv)) != 0)
 			goto err;
@@ -199,12 +199,12 @@ db_appinit(db_home, db_config, dbenv, flags)
 			goto err;
 		if ((ret = __txn_init_recover(dbenv)) != 0)
 			goto err;
-	}
 
-	/* Run recovery if necessary. */
-	if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) && (ret =
-	    __db_apprec(dbenv, LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL))) != 0)
-		goto err;
+		if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) &&
+		    (ret = __db_apprec(dbenv,
+		    LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL))) != 0)
+			goto err;
+	}
 
 	return (ret);
 
@@ -282,21 +282,21 @@ db_appexit(dbenv)
  *	it in allocated space.
  *
  * PUBLIC: int __db_appname __P((DB_ENV *,
- * PUBLIC:    APPNAME, const char *, const char *, int *, char **));
+ * PUBLIC:    APPNAME, const char *, const char *, u_int32_t, int *, char **));
  */
 int
-__db_appname(dbenv, appname, dir, file, fdp, namep)
+__db_appname(dbenv, appname, dir, file, tmp_oflags, fdp, namep)
 	DB_ENV *dbenv;
 	APPNAME appname;
 	const char *dir, *file;
+	u_int32_t tmp_oflags;
 	int *fdp;
 	char **namep;
 {
 	DB_ENV etmp;
 	size_t len;
-	int ret, slash, tmp_create, tmp_free;
+	int data_entry, ret, slash, tmp_create, tmp_free;
 	const char *a, *b, *c;
-	int data_entry;
 	char *p, *start;
 
 	a = b = c = NULL;
@@ -349,8 +349,8 @@ __db_appname(dbenv, appname, dir, file, fdp, namep)
 	 *
 	 * DB_ENV	   APPNAME	   RESULT
 	 * -------------------------------------------
-	 * null		   DB_APP_TMP	   <tmp>/<create>
-	 * set		   DB_APP_TMP	   DB_HOME/DB_TMP_DIR/<create>
+	 * null		   DB_APP_TMP*	   <tmp>/<create>
+	 * set		   DB_APP_TMP*	   DB_HOME/DB_TMP_DIR/<create>
 	 */
 retry:	switch (appname) {
 	case DB_APP_NONE:
@@ -431,7 +431,14 @@ done:	len =
 	    (c == NULL ? 0 : strlen(c) + 1) +
 	    (file == NULL ? 0 : strlen(file) + 1);
 
-	if ((start = (char *)__db_malloc(len)) == NULL) {
+	/*
+	 * Allocate space to hold the current path information, as well as any
+	 * temporary space that we're going to need to create a temporary file
+	 * name.
+	 */
+#define	DB_TRAIL	"XXXXXX"
+	if ((start =
+	    (char *)__db_malloc(len + sizeof(DB_TRAIL) + 10)) == NULL) {
 		__db_err(dbenv, "%s", strerror(ENOMEM));
 		if (tmp_free)
 			FREES(etmp.db_tmp_dir);
@@ -460,14 +467,15 @@ done:	len =
 		FREES(etmp.db_tmp_dir);
 
 	/* Create the file if so requested. */
-	if (tmp_create) {
-		ret = __db_tmp_open(dbenv, start, fdp);
+	if (tmp_create &&
+	    (ret = __db_tmp_open(dbenv, tmp_oflags, start, fdp)) != 0) {
 		FREES(start);
-	} else {
-		*namep = start;
-		ret = 0;
+		return (ret);
 	}
-	return (ret);
+
+	if (namep != NULL)
+		*namep = start;
+	return (0);
 }
 
 /*
@@ -478,7 +486,7 @@ static int
 __db_home(dbenv, db_home, flags)
 	DB_ENV *dbenv;
 	const char *db_home;
-	int flags;
+	u_int32_t flags;
 {
 	const char *p;
 
@@ -532,10 +540,12 @@ __db_parse(dbenv, s)
 		return (ENOMEM);
 
 	tp = local_s;
-	while ((name = strsep(&tp, " \t")) != NULL && *name == '\0');
+	while ((name = strsep(&tp, " \t")) != NULL && *name == '\0')
+		;
 	if (name == NULL)
 		goto illegal;
-	while ((value = strsep(&tp, " \t")) != NULL && *value == '\0');
+	while ((value = strsep(&tp, " \t")) != NULL && *value == '\0')
+		;
 	if (value == NULL) {
 illegal:	ret = EINVAL;
 		__db_err(dbenv, "illegal name-value pair: %s", s);
@@ -591,7 +601,7 @@ static char *sTempFolder;
 static int
 __db_tmp_dir(dbenv, flags)
 	DB_ENV *dbenv;
-	int flags;
+	u_int32_t flags;
 {
 	static const char * list[] = {	/* Ordered: see db_appinit(3). */
 		"/var/tmp",
@@ -671,49 +681,45 @@ __db_tmp_dir(dbenv, flags)
  *	Create a temporary file.
  */
 static int
-__db_tmp_open(dbenv, dir, fdp)
+__db_tmp_open(dbenv, flags, path, fdp)
 	DB_ENV *dbenv;
-	char *dir;
+	u_int32_t flags;
+	char *path;
 	int *fdp;
 {
 #ifdef HAVE_SIGFILLSET
 	sigset_t set, oset;
 #endif
 	u_long pid;
-	size_t len;
-	int isdir, ret;
-	char *trv, buf[MAXPATHLEN];
+	int mode, isdir, ret;
+	const char *p;
+	char *trv;
 
 	/*
 	 * Check the target directory; if you have six X's and it doesn't
 	 * exist, this runs for a *very* long time.
 	 */
-	if ((ret = __db_exists(dir, &isdir)) != 0) {
-		__db_err(dbenv, "%s: %s", dir, strerror(ret));
+	if ((ret = __db_exists(path, &isdir)) != 0) {
+		__db_err(dbenv, "%s: %s", path, strerror(ret));
 		return (ret);
 	}
 	if (!isdir) {
-		__db_err(dbenv, "%s: %s", dir, strerror(EINVAL));
+		__db_err(dbenv, "%s: %s", path, strerror(EINVAL));
 		return (EINVAL);
 	}
 
 	/* Build the path. */
-#define	DB_TRAIL	"/XXXXXX"
-	if ((len = strlen(dir)) + sizeof(DB_TRAIL) > sizeof(buf)) {
-		__db_err(dbenv,
-		    "tmp_open: %s: %s", buf, strerror(ENAMETOOLONG));
-		return (ENAMETOOLONG);
-	}
-	(void)strcpy(buf, dir);
-	(void)strcpy(buf + len, DB_TRAIL);
-	buf[len] = PATH_SEPARATOR[0];			/* WIN32 */
+	for (trv = path; *trv != '\0'; ++trv)
+		;
+	*trv = PATH_SEPARATOR[0];
+	for (p = DB_TRAIL; (*++trv = *p) != '\0'; ++p)
+		;
 
 	/*
 	 * Replace the X's with the process ID.  Pid should be a pid_t,
 	 * but we use unsigned long for portability.
 	 */
-	for (pid = getpid(),
-	    trv = buf + len + sizeof(DB_TRAIL) - 1; *--trv == 'X'; pid /= 10)
+	for (pid = getpid(); *--trv == 'X'; pid /= 10)
 		switch (pid % 10) {
 		case 0: *trv = '0'; break;
 		case 1: *trv = '1'; break;
@@ -728,30 +734,33 @@ __db_tmp_open(dbenv, dir, fdp)
 		}
 	++trv;
 
+	/* Set up open flags and mode. */
+	LF_SET(DB_CREATE | DB_EXCL);
+	mode = __db_omode("rw----");
+
 	/*
-	 * Try and open a file.  We block every signal we can get our hands
+	 * Try to open a file.  We block every signal we can get our hands
 	 * on so that, if we're interrupted at the wrong time, the temporary
 	 * file isn't left around -- of course, if we drop core in-between
 	 * the calls we'll hang forever, but that's probably okay.  ;-}
 	 */
 #ifdef HAVE_SIGFILLSET
-	(void)sigfillset(&set);
+	if (LF_ISSET(DB_TEMPORARY))
+		(void)sigfillset(&set);
 #endif
 	for (;;) {
 #ifdef HAVE_SIGFILLSET
-		(void)sigprocmask(SIG_BLOCK, &set, &oset);
+		if (LF_ISSET(DB_TEMPORARY))
+			(void)sigprocmask(SIG_BLOCK, &set, &oset);
 #endif
-#define	DB_TEMPOPEN	DB_CREATE | DB_EXCL | DB_TEMPORARY
-		if ((ret = __db_open(buf,
-		    DB_TEMPOPEN, DB_TEMPOPEN, S_IRUSR | S_IWUSR, fdp)) == 0) {
+		ret = __db_open(path, flags, flags, mode, fdp);
 #ifdef HAVE_SIGFILLSET
+		if (LF_ISSET(DB_TEMPORARY))
 			(void)sigprocmask(SIG_SETMASK, &oset, NULL);
 #endif
+		if (ret == 0)
 			return (0);
-		}
-#ifdef HAVE_SIGFILLSET
-		(void)sigprocmask(SIG_SETMASK, &oset, NULL);
-#endif
+
 		/*
 		 * XXX:
 		 * If we don't get an EEXIST error, then there's something
@@ -761,7 +770,7 @@ __db_tmp_open(dbenv, dir, fdp)
 		 */
 		if (ret != EEXIST) {
 			__db_err(dbenv,
-			    "tmp_open: %s: %s", buf, strerror(ret));
+			    "tmp_open: %s: %s", path, strerror(ret));
 			return (ret);
 		}
 
diff --git a/db2/common/db_apprec.c b/db2/common/db_apprec.c
index 7a42e13317..df707eafef 100644
--- a/db2/common/db_apprec.c
+++ b/db2/common/db_apprec.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
@@ -9,18 +9,17 @@
 
 #ifndef lint
 static const char copyright[] =
-"@(#) Copyright (c) 1997\n\
+"@(#) Copyright (c) 1996, 1997, 1998\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_apprec.c	10.23 (Sleepycat) 1/17/98";
+static const char sccsid[] = "@(#)db_apprec.c	10.30 (Sleepycat) 5/3/98";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <time.h>
 #include <string.h>
-#include <stdlib.h>
+#include <time.h>
 #endif
 
 #include "db_int.h"
@@ -36,18 +35,19 @@ static const char sccsid[] = "@(#)db_apprec.c	10.23 (Sleepycat) 1/17/98";
  * __db_apprec --
  *	Perform recovery.
  *
- * PUBLIC: int __db_apprec __P((DB_ENV *, int));
+ * PUBLIC: int __db_apprec __P((DB_ENV *, u_int32_t));
  */
 int
 __db_apprec(dbenv, flags)
 	DB_ENV *dbenv;
-	int flags;
+	u_int32_t flags;
 {
 	DBT data;
 	DB_LOG *lp;
 	DB_LSN ckp_lsn, first_lsn, lsn;
 	time_t now;
-	int is_thread, ret;
+	u_int32_t is_thread;
+	int ret;
 	void *txninfo;
 
 	lp = dbenv->lg_info;
@@ -91,14 +91,14 @@ __db_apprec(dbenv, flags)
 	if ((ret = log_get(lp, &ckp_lsn, &data, DB_CHECKPOINT)) != 0) {
 		/*
 		 * If we don't find a checkpoint, start from the beginning.
-		 * If that fails, we're done.  Note, we require that there
-		 * be log records if we're performing recovery, and fail if
-		 * there aren't.
+		 * If that fails, we're done.  Note, we do not require that
+		 * there be log records if we're performing recovery.
 		 */
 		if ((ret = log_get(lp, &ckp_lsn, &data, DB_FIRST)) != 0) {
-			__db_err(dbenv, "First log record not found");
 			if (ret == DB_NOTFOUND)
-				ret = EINVAL;
+				ret = 0;
+			else
+				__db_err(dbenv, "First log record not found");
 			goto out;
 		}
 	}
@@ -134,14 +134,17 @@ __db_apprec(dbenv, flags)
 	} else
 		if ((ret = __log_findckp(lp, &first_lsn)) == DB_NOTFOUND) {
 			/*
-			 * If recovery was specified, there must be log files.
-			 * If we don't find one, it's an error.  (This should
-			 * have been caught above, when a log_get() of DB_FIRST
-			 * or DB_CHECKPOINT succeeded, but paranoia is good.)
+			 * We don't require that log files exist if recovery
+			 * was specified.
 			 */
-			ret = EINVAL;
+			ret = 0;
 			goto out;
 		}
+
+	if (dbenv->db_verbose)
+		__db_err(lp->dbenv, "Recovery starting from [%lu][%lu]",
+		    (u_long)first_lsn.file, (u_long)first_lsn.offset);
+
 	for (ret = log_get(lp, &lsn, &data, DB_LAST);
 	    ret == 0 && log_compare(&lsn, &first_lsn) > 0;
 	    ret = log_get(lp, &lsn, &data, DB_PREV)) {
@@ -175,21 +178,21 @@ __db_apprec(dbenv, flags)
 	__log_close_files(lp);
 
 	/*
-	 * Now set the maximum transaction id, set the last checkpoint lsn,
-	 * and the current time.  Then take a checkpoint.
+	 * Now set the last checkpoint lsn and the current time,
+	 * take a checkpoint, and reset the txnid.
 	 */
 	(void)time(&now);
-	dbenv->tx_info->region->last_txnid = ((__db_txnhead *)txninfo)->maxid;
 	dbenv->tx_info->region->last_ckp = ckp_lsn;
 	dbenv->tx_info->region->time_ckp = (u_int32_t)now;
 	if ((ret = txn_checkpoint(dbenv->tx_info, 0, 0)) != 0)
 		goto out;
+	dbenv->tx_info->region->last_txnid = TXN_MINIMUM;
 
 	if (dbenv->db_verbose) {
 		__db_err(lp->dbenv, "Recovery complete at %.24s", ctime(&now));
-		__db_err(lp->dbenv, "%s %lu %s [%lu][%lu]",
+		__db_err(lp->dbenv, "%s %lx %s [%lu][%lu]",
 		    "Maximum transaction id",
-		    (u_long)dbenv->tx_info->region->last_txnid,
+		    ((DB_TXNHEAD *)txninfo)->maxid,
 		    "Recovery checkpoint",
 		    (u_long)dbenv->tx_info->region->last_ckp.file,
 		    (u_long)dbenv->tx_info->region->last_ckp.offset);
diff --git a/db2/common/db_byteorder.c b/db2/common/db_byteorder.c
index e486132073..cadf742851 100644
--- a/db2/common/db_byteorder.c
+++ b/db2/common/db_byteorder.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_byteorder.c	10.4 (Sleepycat) 9/4/97";
+static const char sccsid[] = "@(#)db_byteorder.c	10.5 (Sleepycat) 4/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
diff --git a/db2/common/db_err.c b/db2/common/db_err.c
index fc59aadbaf..98a414279e 100644
--- a/db2/common/db_err.c
+++ b/db2/common/db_err.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_err.c	10.21 (Sleepycat) 1/13/98";
+static const char sccsid[] = "@(#)db_err.c	10.25 (Sleepycat) 5/2/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -26,6 +26,7 @@ static const char sccsid[] = "@(#)db_err.c	10.21 (Sleepycat) 1/13/98";
 #include "db_int.h"
 #include "common_ext.h"
 
+static int __db_keyempty __P((const DB_ENV *));
 static int __db_rdonly __P((const DB_ENV *, const char *));
 
 /*
@@ -81,11 +82,11 @@ __db_err(dbenv, fmt, va_alist)
  * appears before the assignment in the __db__panic() call.
  */
 static int __db_ecursor __P((DB *, DB_TXN *, DBC **));
-static int __db_edel __P((DB *, DB_TXN *, DBT *, int));
+static int __db_edel __P((DB *, DB_TXN *, DBT *, u_int32_t));
 static int __db_efd __P((DB *, int *));
-static int __db_egp __P((DB *, DB_TXN *, DBT *, DBT *, int));
-static int __db_estat __P((DB *, void *, void *(*)(size_t), int));
-static int __db_esync __P((DB *, int));
+static int __db_egp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+static int __db_estat __P((DB *, void *, void *(*)(size_t), u_int32_t));
+static int __db_esync __P((DB *, u_int32_t));
 
 /*
  * __db_ecursor --
@@ -113,7 +114,7 @@ __db_edel(a, b, c, d)
 	DB *a;
 	DB_TXN *b;
 	DBT *c;
-	int d;
+	u_int32_t d;
 {
 	COMPQUIET(a, NULL);
 	COMPQUIET(b, NULL);
@@ -147,7 +148,7 @@ __db_egp(a, b, c, d, e)
 	DB *a;
 	DB_TXN *b;
 	DBT *c, *d;
-	int e;
+	u_int32_t e;
 {
 	COMPQUIET(a, NULL);
 	COMPQUIET(b, NULL);
@@ -167,7 +168,7 @@ __db_estat(a, b, c, d)
 	DB *a;
 	void *b;
 	void *(*c) __P((size_t));
-	int d;
+	u_int32_t d;
 {
 	COMPQUIET(a, NULL);
 	COMPQUIET(b, NULL);
@@ -184,7 +185,7 @@ __db_estat(a, b, c, d)
 static int
 __db_esync(a, b)
 	DB *a;
-	int b;
+	u_int32_t b;
 {
 	COMPQUIET(a, NULL);
 	COMPQUIET(b, 0);
@@ -208,6 +209,10 @@ __db_panic(dbp)
 	 *
 	 * We should call mpool and have it shut down the file, so we get
 	 * other processes sharing this file as well.
+	 *
+	 *	Chaos reigns within.
+	 *	Reflect, repent, and reboot.
+	 *	Order shall return.
 	 */
 	dbp->cursor = __db_ecursor;
 	dbp->del = __db_edel;
@@ -235,13 +240,13 @@ __db_panic(dbp)
  * __db_fchk --
  *	General flags checking routine.
  *
- * PUBLIC: int __db_fchk __P((DB_ENV *, const char *, int, int));
+ * PUBLIC: int __db_fchk __P((DB_ENV *, const char *, u_int32_t, u_int32_t));
  */
 int
 __db_fchk(dbenv, name, flags, ok_flags)
 	DB_ENV *dbenv;
 	const char *name;
-	int flags, ok_flags;
+	u_int32_t flags, ok_flags;
 {
 	DB_CHECK_FLAGS(dbenv, name, flags, ok_flags);
 	return (0);
@@ -251,13 +256,14 @@ __db_fchk(dbenv, name, flags, ok_flags)
  * __db_fcchk --
  *	General combination flags checking routine.
  *
- * PUBLIC: int __db_fcchk __P((DB_ENV *, const char *, int, int, int));
+ * PUBLIC: int __db_fcchk
+ * PUBLIC:    __P((DB_ENV *, const char *, u_int32_t, u_int32_t, u_int32_t));
  */
 int
 __db_fcchk(dbenv, name, flags, flag1, flag2)
 	DB_ENV *dbenv;
 	const char *name;
-	int flags, flag1, flag2;
+	u_int32_t flags, flag1, flag2;
 {
 	DB_CHECK_FCOMBO(dbenv, name, flags, flag1, flag2);
 	return (0);
@@ -267,12 +273,13 @@ __db_fcchk(dbenv, name, flags, flag1, flag2)
  * __db_cdelchk --
  *	Common cursor delete argument checking routine.
  *
- * PUBLIC: int __db_cdelchk __P((const DB *, int, int, int));
+ * PUBLIC: int __db_cdelchk __P((const DB *, u_int32_t, int, int));
  */
 int
 __db_cdelchk(dbp, flags, isrdonly, isvalid)
 	const DB *dbp;
-	int flags, isrdonly, isvalid;
+	u_int32_t flags;
+	int isrdonly, isvalid;
 {
 	/* Check for changes to a read-only tree. */
 	if (isrdonly)
@@ -292,17 +299,18 @@ __db_cdelchk(dbp, flags, isrdonly, isvalid)
  * __db_cgetchk --
  *	Common cursor get argument checking routine.
  *
- * PUBLIC: int __db_cgetchk __P((const DB *, DBT *, DBT *, int, int));
+ * PUBLIC: int __db_cgetchk __P((const DB *, DBT *, DBT *, u_int32_t, int));
  */
 int
 __db_cgetchk(dbp, key, data, flags, isvalid)
 	const DB *dbp;
 	DBT *key, *data;
-	int flags, isvalid;
+	u_int32_t flags;
+	int isvalid;
 {
-	int check_key;
+	int key_einval, key_flags;
 
-	check_key = 0;
+	key_flags = key_einval = 0;
 
 	/* Check for invalid dbc->c_get() function flags. */
 	switch (flags) {
@@ -311,10 +319,13 @@ __db_cgetchk(dbp, key, data, flags, isvalid)
 	case DB_LAST:
 	case DB_NEXT:
 	case DB_PREV:
+		key_flags = 1;
+		break;
 	case DB_SET_RANGE:
-		check_key = 1;
+		key_einval = key_flags = 1;
 		break;
 	case DB_SET:
+		key_einval = 1;
 		break;
 	case DB_GET_RECNO:
 		if (!F_ISSET(dbp, DB_BT_RECNUM))
@@ -323,14 +334,14 @@ __db_cgetchk(dbp, key, data, flags, isvalid)
 	case DB_SET_RECNO:
 		if (!F_ISSET(dbp, DB_BT_RECNUM))
 			goto err;
-		check_key = 1;
+		key_einval = key_flags = 1;
 		break;
 	default:
 err:		return (__db_ferr(dbp->dbenv, "c_get", 0));
 	}
 
 	/* Check for invalid key/data flags. */
-	if (check_key)
+	if (key_flags)
 		DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
 		    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
 	DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
@@ -340,11 +351,15 @@ err:		return (__db_ferr(dbp->dbenv, "c_get", 0));
 	if (F_ISSET(dbp, DB_AM_THREAD)) {
 		if (!F_ISSET(data, DB_DBT_USERMEM | DB_DBT_MALLOC))
 			return (__db_ferr(dbp->dbenv, "threaded data", 1));
-		if (check_key &&
+		if (key_flags &&
 		    !F_ISSET(key, DB_DBT_USERMEM | DB_DBT_MALLOC))
 			return (__db_ferr(dbp->dbenv, "threaded key", 1));
 	}
 
+	/* Check for missing keys. */
+	if (key_einval && (key->data == NULL || key->size == 0))
+		return (__db_keyempty(dbp->dbenv));
+
 	/*
 	 * The cursor must be initialized for DB_CURRENT, return -1 for an
 	 * invalid cursor, otherwise 0.
@@ -357,23 +372,24 @@ err:		return (__db_ferr(dbp->dbenv, "c_get", 0));
  *	Common cursor put argument checking routine.
  *
  * PUBLIC: int __db_cputchk __P((const DB *,
- * PUBLIC:    const DBT *, DBT *, int, int, int));
+ * PUBLIC:    const DBT *, DBT *, u_int32_t, int, int));
  */
 int
 __db_cputchk(dbp, key, data, flags, isrdonly, isvalid)
 	const DB *dbp;
 	const DBT *key;
 	DBT *data;
-	int flags, isrdonly, isvalid;
+	u_int32_t flags;
+	int isrdonly, isvalid;
 {
-	int check_key;
+	int key_einval, key_flags;
 
 	/* Check for changes to a read-only tree. */
 	if (isrdonly)
 		return (__db_rdonly(dbp->dbenv, "c_put"));
 
 	/* Check for invalid dbc->c_put() function flags. */
-	check_key = 0;
+	key_einval = key_flags = 0;
 	switch (flags) {
 	case DB_AFTER:
 	case DB_BEFORE:
@@ -388,19 +404,23 @@ __db_cputchk(dbp, key, data, flags, isrdonly, isvalid)
 	case DB_KEYLAST:
 		if (dbp->type == DB_RECNO)
 			goto err;
-		check_key = 1;
+		key_einval = key_flags = 1;
 		break;
 	default:
 err:		return (__db_ferr(dbp->dbenv, "c_put", 0));
 	}
 
 	/* Check for invalid key/data flags. */
-	if (check_key)
+	if (key_flags)
 		DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
 		    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
 	DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
 	    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
 
+	/* Check for missing keys. */
+	if (key_einval && (key->data == NULL || key->size == 0))
+		return (__db_keyempty(dbp->dbenv));
+
 	/*
 	 * The cursor must be initialized for anything other than DB_KEYFIRST
 	 * and DB_KEYLAST, return -1 for an invalid cursor, otherwise 0.
@@ -413,12 +433,14 @@ err:		return (__db_ferr(dbp->dbenv, "c_put", 0));
  * __db_delchk --
  *	Common delete argument checking routine.
  *
- * PUBLIC: int __db_delchk __P((const DB *, int, int));
+ * PUBLIC: int __db_delchk __P((const DB *, DBT *, u_int32_t, int));
  */
 int
-__db_delchk(dbp, flags, isrdonly)
+__db_delchk(dbp, key, flags, isrdonly)
 	const DB *dbp;
-	int flags, isrdonly;
+	DBT *key;
+	u_int32_t flags;
+	int isrdonly;
 {
 	/* Check for changes to a read-only tree. */
 	if (isrdonly)
@@ -427,6 +449,10 @@ __db_delchk(dbp, flags, isrdonly)
 	/* Check for invalid db->del() function flags. */
 	DB_CHECK_FLAGS(dbp->dbenv, "delete", flags, 0);
 
+	/* Check for missing keys. */
+	if (key->data == NULL || key->size == 0)
+		return (__db_keyempty(dbp->dbenv));
+
 	return (0);
 }
 
@@ -434,14 +460,14 @@ __db_delchk(dbp, flags, isrdonly)
  * __db_getchk --
  *	Common get argument checking routine.
  *
- * PUBLIC: int __db_getchk __P((const DB *, const DBT *, DBT *, int));
+ * PUBLIC: int __db_getchk __P((const DB *, const DBT *, DBT *, u_int32_t));
  */
 int
 __db_getchk(dbp, key, data, flags)
 	const DB *dbp;
 	const DBT *key;
 	DBT *data;
-	int flags;
+	u_int32_t flags;
 {
 	/* Check for invalid db->get() function flags. */
 	DB_CHECK_FLAGS(dbp->dbenv,
@@ -457,6 +483,10 @@ __db_getchk(dbp, key, data, flags)
 	    !F_ISSET(data, DB_DBT_MALLOC | DB_DBT_USERMEM))
 		return (__db_ferr(dbp->dbenv, "threaded data", 1));
 
+	/* Check for missing keys. */
+	if (key->data == NULL || key->size == 0)
+		return (__db_keyempty(dbp->dbenv));
+
 	return (0);
 }
 
@@ -464,14 +494,16 @@ __db_getchk(dbp, key, data, flags)
  * __db_putchk --
  *	Common put argument checking routine.
  *
- * PUBLIC: int __db_putchk __P((const DB *, DBT *, const DBT *, int, int, int));
+ * PUBLIC: int __db_putchk
+ * PUBLIC:    __P((const DB *, DBT *, const DBT *, u_int32_t, int, int));
  */
 int
 __db_putchk(dbp, key, data, flags, isrdonly, isdup)
 	const DB *dbp;
 	DBT *key;
 	const DBT *data;
-	int flags, isrdonly, isdup;
+	u_int32_t flags;
+	int isrdonly, isdup;
 {
 	/* Check for changes to a read-only tree. */
 	if (isrdonly)
@@ -488,12 +520,17 @@ __db_putchk(dbp, key, data, flags, isrdonly, isdup)
 	DB_CHECK_FCOMBO(dbp->dbenv,
 	    "data", data->flags, DB_DBT_MALLOC, DB_DBT_USERMEM);
 
+	/* Check for missing keys. */
+	if (key->data == NULL || key->size == 0)
+		return (__db_keyempty(dbp->dbenv));
+
 	/* Check for partial puts in the presence of duplicates. */
 	if (isdup && F_ISSET(data, DB_DBT_PARTIAL)) {
 		__db_err(dbp->dbenv,
 "a partial put in the presence of duplicates requires a cursor operation");
 		return (EINVAL);
 	}
+
 	return (0);
 }
 
@@ -501,12 +538,12 @@ __db_putchk(dbp, key, data, flags, isrdonly, isdup)
  * __db_statchk --
  *	Common stat argument checking routine.
  *
- * PUBLIC: int __db_statchk __P((const DB *, int));
+ * PUBLIC: int __db_statchk __P((const DB *, u_int32_t));
  */
 int
 __db_statchk(dbp, flags)
 	const DB *dbp;
-	int flags;
+	u_int32_t flags;
 {
 	/* Check for invalid db->stat() function flags. */
 	DB_CHECK_FLAGS(dbp->dbenv, "stat", flags, DB_RECORDCOUNT);
@@ -522,12 +559,12 @@ __db_statchk(dbp, flags)
  * __db_syncchk --
  *	Common sync argument checking routine.
  *
- * PUBLIC: int __db_syncchk __P((const DB *, int));
+ * PUBLIC: int __db_syncchk __P((const DB *, u_int32_t));
  */
 int
 __db_syncchk(dbp, flags)
 	const DB *dbp;
-	int flags;
+	u_int32_t flags;
 {
 	/* Check for invalid db->sync() function flags. */
 	DB_CHECK_FLAGS(dbp->dbenv, "sync", flags, 0);
@@ -542,13 +579,13 @@ __db_syncchk(dbp, flags)
  * PUBLIC: int __db_ferr __P((const DB_ENV *, const char *, int));
  */
 int
-__db_ferr(dbenv, name, combo)
+__db_ferr(dbenv, name, iscombo)
 	const DB_ENV *dbenv;
 	const char *name;
-	int combo;
+	int iscombo;
 {
 	__db_err(dbenv, "illegal flag %sspecified to %s",
-	    combo ? "combination " : "", name);
+	    iscombo ? "combination " : "", name);
 	return (EINVAL);
 }
 
@@ -564,3 +601,15 @@ __db_rdonly(dbenv, name)
 	__db_err(dbenv, "%s: attempt to modify a read-only tree", name);
 	return (EACCES);
 }
+
+/*
+ * __db_keyempty --
+ *	Common missing or empty key value message.
+ */
+static int
+__db_keyempty(dbenv)
+	const DB_ENV *dbenv;
+{
+	__db_err(dbenv, "missing or empty key value specified");
+	return (EINVAL);
+}
diff --git a/db2/common/db_log2.c b/db2/common/db_log2.c
index 9af01116f6..d6b14f540b 100644
--- a/db2/common/db_log2.c
+++ b/db2/common/db_log2.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -43,7 +43,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_log2.c	10.3 (Sleepycat) 6/21/97";
+static const char sccsid[] = "@(#)db_log2.c	10.5 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -63,6 +63,7 @@ __db_log2(num)
 	u_int32_t i, limit;
 
 	limit = 1;
-	for (i = 0; limit < num; limit = limit << 1, i++);
+	for (i = 0; limit < num; limit = limit << 1, i++)
+		;
 	return (i);
 }
diff --git a/db2/common/db_region.c b/db2/common/db_region.c
index 02d939e3e6..6d15f7f092 100644
--- a/db2/common/db_region.c
+++ b/db2/common/db_region.c
@@ -1,59 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
-/*
- * Copyright (c) 1995, 1996
- *	The President and Fellows of Harvard University.  All rights reserved.
- *
- * This code is derived from software contributed to Harvard by
- * Margo Seltzer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_region.c	10.21 (Sleepycat) 1/16/98";
+static const char sccsid[] = "@(#)db_region.c	10.46 (Sleepycat) 5/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #endif
@@ -61,548 +22,840 @@ static const char sccsid[] = "@(#)db_region.c	10.21 (Sleepycat) 1/16/98";
 #include "db_int.h"
 #include "common_ext.h"
 
-static int __db_rmap __P((DB_ENV *, int, size_t, void *));
+static int __db_growregion __P((REGINFO *, size_t));
 
 /*
- * __db_rcreate --
- *
- * Common interface for creating a shared region.  Handles synchronization
- * across multiple processes.
- *
- * The dbenv contains the environment for this process, including naming
- * information.  The path argument represents the parameters passed to
- * the open routines and may be either a file or a directory.  If it is
- * a directory, it must exist.  If it is a file, then the file parameter
- * must be NULL, otherwise, file is the name to be created inside the
- * directory path.
- *
- * The function returns a pointer to the shared region that has been mapped
- * into memory, NULL on error.
+ * __db_rattach --
+ *	Optionally create and attach to a shared memory region.
  *
- * PUBLIC: int __db_rcreate __P((DB_ENV *, APPNAME,
- * PUBLIC:    const char *, const char *, int, size_t, int, int *, void *));
+ * PUBLIC: int __db_rattach __P((REGINFO *));
  */
 int
-__db_rcreate(dbenv, appname, path, file, mode, size, oflags, fdp, retp)
-	DB_ENV *dbenv;
-	APPNAME appname;
-	const char *path, *file;
-	int mode, oflags, *fdp;
-	size_t size;
-	void *retp;
+__db_rattach(infop)
+	REGINFO *infop;
 {
-	RLAYOUT *rp;
-	int fd, ret;
-	char *name;
+	RLAYOUT *rlp, rl;
+	size_t grow_region, size;
+	ssize_t nr, nw;
+	u_int32_t flags, mbytes, bytes;
+	u_int8_t *p;
+	int malloc_possible, ret, retry_cnt;
+
+	grow_region = 0;
+	malloc_possible = 1;
+	ret = retry_cnt = 0;
+
+	/* Round off the requested size to the next page boundary. */
+	DB_ROUNDOFF(infop->size);
+
+	/* Some architectures have hard limits on the maximum region size. */
+#ifdef DB_REGIONSIZE_MAX
+	if (infop->size > DB_REGIONSIZE_MAX) {
+		__db_err(infop->dbenv, "__db_rattach: cache size too large");
+		return (EINVAL);
+	}
+#endif
 
-	fd = -1;
-	rp = NULL;
+	/* Intialize the return information in the REGINFO structure. */
+loop:	infop->addr = NULL;
+	infop->fd = -1;
+	infop->segid = INVALID_SEGID;
+	if (infop->name != NULL) {
+		FREES(infop->name);
+		infop->name = NULL;
+	}
+	F_CLR(infop, REGION_CANGROW | REGION_CREATED);
 
+#ifndef HAVE_SPINLOCKS
 	/*
-	 * Get the filename -- note, if it's a temporary file, it will
-	 * be created by the underlying temporary file creation code,
-	 * so we have to check the file descriptor to be sure it's an
-	 * error.
+	 * XXX
+	 * Lacking spinlocks, we must have a file descriptor for fcntl(2)
+	 * locking, which implies using mmap(2) to map in a regular file.
+	 * (Theoretically, we could probably get a file descriptor to lock
+	 * other types of shared regions, but I don't see any reason to
+	 * bother.)
 	 */
-	if ((ret = __db_appname(dbenv, appname, path, file, &fd, &name)) != 0)
-		return (ret);
+	malloc_possible = 0;
+#endif
 
+#ifdef __hppa
 	/*
-	 * Now open the file. We need to make sure that multiple processes
-	 * that attempt to create the region at the same time are properly
-	 * ordered, so we open it DB_EXCL and DB_CREATE so two simultaneous
-	 * attempts to create the region will return failure in one of the
-	 * attempts.
+	 * XXX
+	 * HP-UX won't permit mutexes to live in anything but shared memory.
+	 * Instantiate a shared region file on that architecture, regardless.
 	 */
-	oflags |= DB_CREATE | DB_EXCL;
-	if (fd == -1 &&
-	    (ret = __db_open(name, oflags, oflags, mode, &fd)) != 0) {
-		if (ret != EEXIST)
-			__db_err(dbenv,
-			    "region create: %s: %s", name, strerror(ret));
-		goto err;
+	malloc_possible = 0;
+#endif
+	/*
+	 * If a region is truly private, malloc the memory.  That's faster
+	 * than either anonymous memory or a shared file.
+	 */
+	if (malloc_possible && F_ISSET(infop, REGION_PRIVATE)) {
+		if ((infop->addr = __db_malloc(infop->size)) == NULL)
+			return (ENOMEM);
+
+		/*
+		 * It's sometimes significantly faster to page-fault in all
+		 * of the region's pages before we run the application, as
+		 * we can see fairly nasty side-effects when we page-fault
+		 * while holding various locks, i.e., the lock takes a long
+		 * time, and other threads convoy behind the lock holder.
+		 */
+		if (DB_GLOBAL(db_region_init))
+			for (p = infop->addr;
+			    p < (u_int8_t *)infop->addr + infop->size;
+			    p += DB_VMPAGESIZE)
+				p[0] = '\0';
+
+		F_SET(infop, REGION_CREATED | REGION_MALLOC);
+		goto region_init;
 	}
-	*fdp = fd;
 
-	/* Grow the region to the correct size. */
-	if ((ret = __db_rgrow(dbenv, fd, size)) != 0)
-		goto err;
+	/*
+	 * Get the name of the region (creating the file if a temporary file
+	 * is being used).  The dbenv contains the current DB environment,
+	 * including naming information.  The path argument may be a file or
+	 * a directory.  If path is a directory, it must exist and file is the
+	 * file name to be created inside the directory.  If path is a file,
+	 * then file must be NULL.
+	 */
+	if ((ret = __db_appname(infop->dbenv, infop->appname, infop->path,
+	    infop->file, infop->dbflags, &infop->fd, &infop->name)) != 0)
+		return (ret);
+	if (infop->fd != -1)
+		F_SET(infop, REGION_CREATED);
 
-	/* Map the region in. */
-	if ((ret = __db_rmap(dbenv, fd, size, &rp)) != 0)
-		goto err;
+	/*
+	 * Try to create the file, if we have authority.  We have to make sure
+	 * that multiple threads/processes attempting to simultaneously create
+	 * the region are properly ordered, so we open it using DB_CREATE and
+	 * DB_EXCL, so two attempts to create the region will return failure in
+	 * one.
+	 */
+	if (infop->fd == -1 && infop->dbflags & DB_CREATE) {
+		flags = infop->dbflags;
+		LF_SET(DB_EXCL);
+		if ((ret = __db_open(infop->name,
+		    flags, flags, infop->mode, &infop->fd)) == 0)
+			F_SET(infop, REGION_CREATED);
+		else
+			if (ret != EEXIST)
+				goto errmsg;
+	}
 
-	/* Initialize the region. */
-	if ((ret = __db_rinit(dbenv, rp, fd, size, 1)) != 0)
-		goto err;
+	/* If we couldn't create the file, try and open it. */
+	if (infop->fd == -1) {
+		flags = infop->dbflags;
+		LF_CLR(DB_CREATE | DB_EXCL);
+		if ((ret = __db_open(infop->name,
+		    flags, flags, infop->mode, &infop->fd)) != 0)
+			goto errmsg;
+	}
 
-	if (name != NULL)
-		FREES(name);
+	/*
+	 * There are three cases we support:
+	 *    1. Named anonymous memory (shmget(2)).
+	 *    2. Unnamed anonymous memory (mmap(2): MAP_ANON/MAP_ANONYMOUS).
+	 *    3. Memory backed by a regular file (mmap(2)).
+	 *
+	 * We instantiate a backing file in all cases, which contains at least
+	 * the RLAYOUT structure, and in case #4, contains the actual region.
+	 * This is necessary for a couple of reasons:
+	 *
+	 * First, the mpool region uses temporary files to name regions, and
+	 * since you may have multiple regions in the same directory, we need
+	 * a filesystem name to ensure that they don't collide.
+	 *
+	 * Second, applications are allowed to forcibly remove regions, even
+	 * if they don't know anything about them other than the name.  If a
+	 * region is backed by anonymous memory, there has to be some way for
+	 * the application to find out that information, and, in some cases,
+	 * determine ID information for the anonymous memory.
+	 */
+	if (F_ISSET(infop, REGION_CREATED)) {
+		/*
+		 * If we're using anonymous memory to back this region, set
+		 * the flag.
+		 */
+		if (DB_GLOBAL(db_region_anon))
+			F_SET(infop, REGION_ANONYMOUS);
 
-	*(void **)retp = rp;
-	return (0);
+		/*
+		 * If we're using a regular file to back a region we created,
+		 * grow it to the specified size.
+		 */
+		if (!DB_GLOBAL(db_region_anon) &&
+		    (ret = __db_growregion(infop, infop->size)) != 0)
+			goto err;
+	} else {
+		/*
+		 * If we're joining a region, figure out what it looks like.
+		 *
+		 * XXX
+		 * We have to figure out if the file is a regular file backing
+		 * a region that we want to map into our address space, or a
+		 * file with the information we need to find a shared anonymous
+		 * region that we want to map into our address space.
+		 *
+		 * All this noise is because some systems don't have a coherent
+		 * VM and buffer cache, and worse, if you mix operations on the
+		 * VM and buffer cache, half the time you hang the system.
+		 *
+		 * There are two possibilities.  If the file is the size of an
+		 * RLAYOUT structure, then we know that the real region is in
+		 * shared memory, because otherwise it would be bigger.  (As
+		 * the RLAYOUT structure size is smaller than a disk sector,
+		 * the only way it can be this size is if deliberately written
+		 * that way.)  In which case, retrieve the information we need
+		 * from the RLAYOUT structure and use it to acquire the shared
+		 * memory.
+		 *
+		 * If the structure is larger than an RLAYOUT structure, then
+		 * the file is backing the shared memory region, and we use
+		 * the current size of the file without reading any information
+		 * from the file itself so that we don't confuse the VM.
+		 *
+		 * And yes, this makes me want to take somebody and kill them,
+		 * but I can't think of any other solution.
+		 */
+		if ((ret = __db_ioinfo(infop->name,
+		    infop->fd, &mbytes, &bytes, NULL)) != 0)
+			goto errmsg;
+		size = mbytes * MEGABYTE + bytes;
+
+		if (size <= sizeof(RLAYOUT)) {
+			/*
+			 * If the size is too small, the read fails or the
+			 * valid flag is incorrect, assume it's because the
+			 * RLAYOUT information hasn't been written out yet,
+			 * and retry.
+			 */
+			if (size < sizeof(RLAYOUT))
+				goto retry;
+			if ((ret =
+			    __db_read(infop->fd, &rl, sizeof(rl), &nr)) != 0)
+				goto retry;
+			if (rl.valid != DB_REGIONMAGIC)
+				goto retry;
+
+			/* Copy the size, memory id and characteristics. */
+			size = rl.size;
+			infop->segid = rl.segid;
+			if (F_ISSET(&rl, REGION_ANONYMOUS))
+				F_SET(infop, REGION_ANONYMOUS);
+		}
 
-err:	if (fd != -1) {
-		if (rp != NULL)
-			(void)__db_unmap(rp, rp->size);
-		(void)__db_unlink(name);
-		(void)__db_close(fd);
+		/*
+		 * If the region is larger than we think, that's okay, use the
+		 * current size.  If it's smaller than we think, and we were
+		 * just using the default size, that's okay, use the current
+		 * size.  If it's smaller than we think and we really care,
+		 * save the size and we'll catch that further down -- we can't
+		 * correct it here because we have to have a lock to grow the
+		 * region.
+		 */
+		if (infop->size > size && !F_ISSET(infop, REGION_SIZEDEF))
+			grow_region = infop->size;
+		infop->size = size;
 	}
-	if (name != NULL)
-		FREES(name);
-	return (ret);
-}
-
-/*
- * __db_rinit --
- *	Initialize the region.
- *
- * PUBLIC: int __db_rinit __P((DB_ENV *, RLAYOUT *, int, size_t, int));
- */
-int
-__db_rinit(dbenv, rp, fd, size, lock_region)
-	DB_ENV *dbenv;
-	RLAYOUT *rp;
-	size_t size;
-	int fd, lock_region;
-{
-	int ret;
 
-	COMPQUIET(dbenv, NULL);
+	/*
+	 * Map the region into our address space.  If we're creating it, the
+	 * underlying routines will make it the right size.
+	 *
+	 * There are at least two cases where we can "reasonably" fail when
+	 * we attempt to map in the region.  On Windows/95, closing the last
+	 * reference to a region causes it to be zeroed out.  On UNIX, when
+	 * using the shmget(2) interfaces, the region will no longer exist
+	 * if the system was rebooted.  In these cases, the underlying map call
+	 * returns EAGAIN, and we *remove* our file and try again.  There are
+	 * obvious races in doing this, but it should eventually settle down
+	 * to a winner and then things should proceed normally.
+	 */
+	if ((ret = __db_mapregion(infop->name, infop)) != 0)
+		if (ret == EAGAIN) {
+			/*
+			 * Pretend we created the region even if we didn't so
+			 * that our error processing unlinks it.
+			 */
+			F_SET(infop, REGION_CREATED);
+			ret = 0;
+			goto retry;
+		} else
+			goto err;
 
+region_init:
 	/*
-	 * Initialize the common information.
+	 * Initialize the common region information.
 	 *
 	 * !!!
 	 * We have to order the region creates so that two processes don't try
-	 * to simultaneously create the region and so that processes that are
-	 * joining the region never see inconsistent data.  We'd like to play
-	 * file permissions games, but we can't because WNT filesystems won't
-	 * open a file mode 0.
-	 *
-	 * If the lock_region flag is set, the process creating the region
-	 * acquires the lock before the setting the version number.  Any
-	 * process joining the region checks the version number before
-	 * attempting to acquire the lock.  (The lock_region flag may not be
-	 * set -- the mpool code sometimes malloc's private regions but still
-	 * needs to initialize them, specifically, the mutex for threads.)
+	 * to simultaneously create the region.  This is handled by using the
+	 * DB_CREATE and DB_EXCL flags when we create the "backing" region file.
 	 *
-	 * We have to check the version number first, because if the version
-	 * number has not been written, it's possible that the mutex has not
-	 * been initialized in which case an attempt to get it could lead to
-	 * random behavior.  If the version number isn't there (the file size
-	 * is too small) or it's 0, we know that the region is being created.
-	 *
-	 * We also make sure to check the return of __db_mutex_lock() here,
-	 * even though we don't usually check elsewhere.  This is the first
-	 * lock we attempt to acquire, and if it fails we have to know.  (It
-	 * can fail -- SunOS, using fcntl(2) for locking, with an in-memory
-	 * filesystem specified as the database home.)
+	 * We also have to order region joins so that processes joining regions
+	 * never see inconsistent data.  We'd like to play permissions games
+	 * with the backing file, but we can't because WNT filesystems won't
+	 * open a file mode 0.
 	 */
-	__db_mutex_init(&rp->lock, MUTEX_LOCK_OFFSET(rp, &rp->lock));
-	if (lock_region && (ret = __db_mutex_lock(&rp->lock, fd)) != 0)
-		return (ret);
-
-	rp->refcnt = 1;
-	rp->size = size;
-	rp->flags = 0;
-	db_version(&rp->majver, &rp->minver, &rp->patch);
+	rlp = (RLAYOUT *)infop->addr;
+	if (F_ISSET(infop, REGION_CREATED)) {
+		/*
+		 * The process creating the region acquires a lock before it
+		 * sets the valid flag.  Any processes joining the region will
+		 * check the valid flag before acquiring the lock.
+		 *
+		 * Check the return of __db_mutex_init() and __db_mutex_lock(),
+		 * even though we don't usually check elsewhere.  This is the
+		 * first lock we initialize and acquire, and we have to know if
+		 * it fails.  (It CAN fail, e.g., SunOS, when using fcntl(2)
+		 * for locking, with an in-memory filesystem specified as the
+		 * database home.)
+		 */
+		if ((ret = __db_mutex_init(&rlp->lock,
+		    MUTEX_LOCK_OFFSET(rlp, &rlp->lock))) != 0 ||
+		    (ret = __db_mutex_lock(&rlp->lock, infop->fd)) != 0)
+			goto err;
 
-	return (0);
-}
+		/* Initialize the remaining region information. */
+		rlp->refcnt = 1;
+		rlp->size = infop->size;
+		db_version(&rlp->majver, &rlp->minver, &rlp->patch);
+		rlp->segid = infop->segid;
+		rlp->flags = 0;
+		if (F_ISSET(infop, REGION_ANONYMOUS))
+			F_SET(rlp, REGION_ANONYMOUS);
 
-/*
- * __db_ropen --
- *	Construct the name of a file, open it and map it in.
- *
- * PUBLIC: int __db_ropen __P((DB_ENV *,
- * PUBLIC:    APPNAME, const char *, const char *, int, int *, void *));
- */
-int
-__db_ropen(dbenv, appname, path, file, flags, fdp, retp)
-	DB_ENV *dbenv;
-	APPNAME appname;
-	const char *path, *file;
-	int flags, *fdp;
-	void *retp;
-{
-	RLAYOUT *rp;
-	size_t size;
-	u_int32_t mbytes, bytes;
-	int fd, ret;
-	char *name;
+		/*
+		 * Fill in the valid field last -- use a magic number, memory
+		 * may not be zero-filled, and we want to minimize the chance
+		 * for collision.
+		 */
+		rlp->valid = DB_REGIONMAGIC;
 
-	fd = -1;
-	rp = NULL;
+		/*
+		 * If the region is anonymous, write the RLAYOUT information
+		 * into the backing file so that future region join and unlink
+		 * calls can find it.
+		 *
+		 * XXX
+		 * We MUST do the seek before we do the write.  On Win95, while
+		 * closing the last reference to an anonymous shared region
+		 * doesn't discard the region, it does zero it out.  So, the
+		 * REGION_CREATED may be set, but the file may have already
+		 * been written and the file descriptor may be at the end of
+		 * the file.
+		 */
+		if (F_ISSET(infop, REGION_ANONYMOUS)) {
+			if ((ret = __db_seek(infop->fd, 0, 0, 0, 0, 0)) != 0)
+				goto err;
+			if ((ret =
+			    __db_write(infop->fd, rlp, sizeof(*rlp), &nw)) != 0)
+				goto err;
+		}
+	} else {
+		/*
+		 * Check the valid flag to ensure the region is initialized.
+		 * If the valid flag has not been set, the mutex may not have
+		 * been initialized, and an attempt to get it could lead to
+		 * random behavior.
+		 */
+		if (rlp->valid != DB_REGIONMAGIC)
+			goto retry;
 
-	/* Get the filename. */
-	if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0)
-		return (ret);
+		/* Get the region lock. */
+		(void)__db_mutex_lock(&rlp->lock, infop->fd);
 
-	/* Open the file. */
-	if ((ret = __db_open(name, flags, DB_MUTEXDEBUG, 0, &fd)) != 0) {
-		__db_err(dbenv, "region open: %s: %s", name, strerror(ret));
-		goto err2;
-	}
+		/*
+		 * We now own the region.  There are a couple of things that
+		 * may have gone wrong, however.
+		 *
+		 * Problem #1: while we were waiting for the lock, the region
+		 * was deleted.  Detected by re-checking the valid flag, since
+		 * it's cleared by the delete region routines.
+		 */
+		if (rlp->valid != DB_REGIONMAGIC) {
+			(void)__db_mutex_unlock(&rlp->lock, infop->fd);
+			goto retry;
+		}
 
-	*fdp = fd;
+		/*
+		 * Problem #2: We want a bigger region than has previously been
+		 * created.  Detected by checking if the region is smaller than
+		 * our caller requested.  If it is, we grow the region, (which
+		 * does the detach and re-attach for us).
+		 */
+		if (grow_region != 0 &&
+		    (ret = __db_rgrow(infop, grow_region)) != 0) {
+			(void)__db_mutex_unlock(&rlp->lock, infop->fd);
+			goto err;
+		}
 
-	/*
-	 * Map the file in.  We have to do things in a strange order so that
-	 * we don't get into a situation where the file was just created and
-	 * isn't yet initialized.  See the comment in __db_rcreate() above.
-	 *
-	 * XXX
-	 * We'd like to test to see if the file is too big to mmap.  Since we
-	 * don't know what size or type off_t's or size_t's are, or the largest
-	 * unsigned integral type is, or what random insanity the local C
-	 * compiler will perpetrate, doing the comparison in a portable way is
-	 * flatly impossible.  Hope that mmap fails if the file is too large.
-	 *
-	 */
-	if ((ret = __db_ioinfo(name, fd, &mbytes, &bytes, NULL)) != 0) {
-		__db_err(dbenv, "%s: %s", name, strerror(ret));
-		goto err2;
-	}
-	size = mbytes * MEGABYTE + bytes;
+		/*
+		 * Problem #3: when we checked the size of the file, it was
+		 * still growing as part of creation.  Detected by the fact
+		 * that infop->size isn't the same size as the region.
+		 */
+		if (infop->size != rlp->size) {
+			(void)__db_mutex_unlock(&rlp->lock, infop->fd);
+			goto retry;
+		}
 
-	/* Check to make sure the first block has been written. */
-	if (size < sizeof(RLAYOUT)) {
-		ret = EAGAIN;
-		goto err2;
+		/* Increment the reference count. */
+		++rlp->refcnt;
 	}
 
-	/* Map in whatever is there. */
-	if ((ret = __db_rmap(dbenv, fd, size, &rp)) != 0)
-		goto err2;
+	/* Return the region in a locked condition. */
 
-	/*
-	 * Check to make sure the region has been initialized.  We can't just
-	 * grab the lock because the lock may not have been initialized yet.
-	 */
-	if (rp->majver == 0) {
-		ret = EAGAIN;
-		goto err2;
-	}
-
-	/* Get the region lock. */
-	if (!LF_ISSET(DB_MUTEXDEBUG))
-		(void)__db_mutex_lock(&rp->lock, fd);
+	if (0) {
+errmsg:		__db_err(infop->dbenv, "%s: %s", infop->name, strerror(ret));
 
-	/*
-	 * The file may have been half-written if we were descheduled between
-	 * getting the size of the file and checking the major version.  Check
-	 * to make sure we got the entire file.
-	 */
-	if ((ret = __db_ioinfo(name, fd, &mbytes, &bytes, NULL)) != 0) {
-		__db_err(dbenv, "%s: %s", name, strerror(ret));
-		goto err1;
-	}
-	if (size != mbytes * MEGABYTE + bytes) {
-		ret = EAGAIN;
-		goto err1;
-	}
+err:
+retry:		/* Discard the region. */
+		if (infop->addr != NULL) {
+			(void)__db_unmapregion(infop);
+			infop->addr = NULL;
+		}
 
-	/* The file may have just been deleted. */
-	if (F_ISSET(rp, DB_R_DELETED)) {
-		ret = EAGAIN;
-		goto err1;
-	}
+		/* Discard the backing file. */
+		if (infop->fd != -1) {
+			(void)__db_close(infop->fd);
+			infop->fd = -1;
 
-	/* Increment the reference count. */
-	++rp->refcnt;
+			if (F_ISSET(infop, REGION_CREATED))
+				(void)__db_unlink(infop->name);
+		}
 
-	/* Release the lock. */
-	if (!LF_ISSET(DB_MUTEXDEBUG))
-		(void)__db_mutex_unlock(&rp->lock, fd);
+		/* Discard the name. */
+		if (infop->name != NULL) {
+			FREES(infop->name);
+			infop->name = NULL;
+		}
 
-	FREES(name);
+		/*
+		 * If we had a temporary error, wait a few seconds and
+		 * try again.
+		 */
+		if (ret == 0) {
+			if (++retry_cnt <= 3) {
+				__db_sleep(retry_cnt * 2, 0);
+				goto loop;
+			}
+			ret = EAGAIN;
+		}
+	}
 
-	*(void **)retp = rp;
-	return (0);
+	/*
+	 * XXX
+	 * HP-UX won't permit mutexes to live in anything but shared memory.
+	 * Instantiate a shared region file on that architecture, regardless.
+	 *
+	 * XXX
+	 * There's a problem in cleaning this up on application exit, or on
+	 * application failure.  If an application opens a database without
+	 * an environment, we create a temporary backing mpool region for it.
+	 * That region is marked REGION_PRIVATE, but as HP-UX won't permit
+	 * mutexes to live in anything but shared memory, we instantiate a
+	 * real file plus a memory region of some form.  If the application
+	 * crashes, the necessary information to delete the backing file and
+	 * any system region (e.g., the shmget(2) segment ID) is no longer
+	 * available.  We can't completely fix the problem, but we try.
+	 *
+	 * The underlying UNIX __db_mapregion() code preferentially uses the
+	 * mmap(2) interface with the MAP_ANON/MAP_ANONYMOUS flags for regions
+	 * that are marked REGION_PRIVATE.  This means that we normally aren't
+	 * holding any system resources when we get here, in which case we can
+	 * delete the backing file.  This results in a short race, from the
+	 * __db_open() call above to here.
+	 *
+	 * If, for some reason, we are holding system resources when we get
+	 * here, we don't have any choice -- we can't delete the backing file
+	 * because we may need it to detach from the resources.  Set the
+	 * REGION_LASTDETACH flag, so that we do all necessary cleanup when
+	 * the application closes the region.
+	 */
+	if (F_ISSET(infop, REGION_PRIVATE) && !F_ISSET(infop, REGION_MALLOC))
+		if (F_ISSET(infop, REGION_HOLDINGSYS))
+			F_SET(infop, REGION_LASTDETACH);
+		else {
+			F_SET(infop, REGION_REMOVED);
+			F_CLR(infop, REGION_CANGROW);
+
+			(void)__db_close(infop->fd);
+			(void)__db_unlink(infop->name);
+		}
 
-err1:	if (!LF_ISSET(DB_MUTEXDEBUG))
-		(void)__db_mutex_unlock(&rp->lock, fd);
-err2:	if (rp != NULL)
-		(void)__db_unmap(rp, rp->size);
-	if (fd != -1)
-		(void)__db_close(fd);
-	FREES(name);
 	return (ret);
 }
 
 /*
- * __db_rclose --
- *	Close a shared memory region.
+ * __db_rdetach --
+ *	De-attach from a shared memory region.
  *
- * PUBLIC: int __db_rclose __P((DB_ENV *, int, void *));
+ * PUBLIC: int __db_rdetach __P((REGINFO *));
  */
 int
-__db_rclose(dbenv, fd, ptr)
-	DB_ENV *dbenv;
-	int fd;
-	void *ptr;
+__db_rdetach(infop)
+	REGINFO *infop;
 {
-	RLAYOUT *rp;
-	int ret, t_ret;
-	const char *fail;
+	RLAYOUT *rlp;
+	int detach, ret, t_ret;
 
-	rp = ptr;
-	fail = NULL;
+	ret = 0;
 
-	/* Get the lock. */
-	if ((ret = __db_mutex_lock(&rp->lock, fd)) != 0) {
-		fail = "lock get";
-		goto err;
+	/*
+	 * If the region was removed when it was created, no further action
+	 * is required.
+	 */
+	if (F_ISSET(infop, REGION_REMOVED))
+		goto done;
+	/*
+	 * If the region was created in memory returned by malloc, the only
+	 * action required is freeing the memory.
+	 */
+	if (F_ISSET(infop, REGION_MALLOC)) {
+		__db_free(infop->addr);
+		goto done;
 	}
 
+	/* Otherwise, attach to the region and optionally delete it. */
+	rlp = infop->addr;
+
+	/* Get the lock. */
+	(void)__db_mutex_lock(&rlp->lock, infop->fd);
+
 	/* Decrement the reference count. */
-	--rp->refcnt;
+	if (rlp->refcnt == 0)
+		__db_err(infop->dbenv,
+		    "region rdetach: reference count went to zero!");
+	else
+		--rlp->refcnt;
+
+	/*
+	 * If we're going to remove the region, clear the valid flag so
+	 * that any region join that's blocked waiting for us will know
+	 * what happened.
+	 */
+	detach = 0;
+	if (F_ISSET(infop, REGION_LASTDETACH))
+		if (rlp->refcnt == 0) {
+			detach = 1;
+			rlp->valid = 0;
+		} else
+			ret = EBUSY;
 
 	/* Release the lock. */
-	if ((t_ret = __db_mutex_unlock(&rp->lock, fd)) != 0 && fail == NULL) {
-		ret = t_ret;
-		fail = "lock release";
-	}
+	(void)__db_mutex_unlock(&rlp->lock, infop->fd);
 
-	/* Discard the region. */
-	if ((t_ret = __db_unmap(ptr, rp->size)) != 0 && fail == NULL) {
-		ret = t_ret;
-		fail = "munmap";
-	}
+	/* Close the backing file descriptor. */
+	(void)__db_close(infop->fd);
+	infop->fd = -1;
 
-	if ((t_ret = __db_close(fd)) != 0 && fail == NULL) {
+	/* Discard our mapping of the region. */
+	if ((t_ret = __db_unmapregion(infop)) != 0 && ret == 0)
 		ret = t_ret;
-		fail = "close";
+
+	/* Discard the region itself. */
+	if (detach) {
+		if ((t_ret =
+		    __db_unlinkregion(infop->name, infop) != 0) && ret == 0)
+			ret = t_ret;
+		if ((t_ret = __db_unlink(infop->name) != 0) && ret == 0)
+			ret = t_ret;
 	}
 
-	if (fail == NULL)
-		return (0);
+done:	/* Discard the name. */
+	if (infop->name != NULL) {
+		FREES(infop->name);
+		infop->name = NULL;
+	}
 
-err:	__db_err(dbenv, "region detach: %s: %s", fail, strerror(ret));
 	return (ret);
 }
 
 /*
  * __db_runlink --
- *	Remove a shared memory region.
+ *	Remove a region.
  *
- * PUBLIC: int __db_runlink __P((DB_ENV *,
- * PUBLIC:    APPNAME, const char *, const char *, int));
+ * PUBLIC: int __db_runlink __P((REGINFO *, int));
  */
 int
-__db_runlink(dbenv, appname, path, file, force)
-	DB_ENV *dbenv;
-	APPNAME appname;
-	const char *path, *file;
+__db_runlink(infop, force)
+	REGINFO *infop;
 	int force;
 {
-	RLAYOUT *rp;
-	int cnt, fd, ret, t_ret;
+	RLAYOUT rl, *rlp;
+	size_t size;
+	ssize_t nr;
+	u_int32_t mbytes, bytes;
+	int fd, ret, t_ret;
 	char *name;
 
-	rp = NULL;
+	/*
+	 * XXX
+	 * We assume that we've created a new REGINFO structure for this
+	 * call, not used one that was already initialized.  Regardless,
+	 * if anyone is planning to use it after we're done, they're going
+	 * to be sorely disappointed.
+	 *
+	 * If force isn't set, we attach to the region, set a flag to delete
+	 * the region on last close, and let the region delete code do the
+	 * work.
+	 */
+	if (!force) {
+		if ((ret = __db_rattach(infop)) != 0)
+			return (ret);
 
-	/* Get the filename. */
-	if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0)
-		return (ret);
+		rlp = (RLAYOUT *)infop->addr;
+		(void)__db_mutex_unlock(&rlp->lock, infop->fd);
 
-	/* If the file doesn't exist, we're done. */
-	if (__db_exists(name, NULL))
-		goto done;
+		F_SET(infop, REGION_LASTDETACH);
+
+		return (__db_rdetach(infop));
+	}
 
 	/*
-	 * If we're called with a force flag, try and unlink the file.  This
-	 * may not succeed if the file is currently open, but there's nothing
-	 * we can do about that.  There is a race condition between the check
-	 * for existence above and the actual unlink.  If someone else snuck
-	 * in and removed it before we do the remove, then we might get an
-	 * ENOENT error.  If we get the ENOENT, we treat it as success, just
-	 * as we do above.
+	 * Otherwise, we don't want to attach to the region.  We may have been
+	 * called to clean up if a process died leaving a region locked and/or
+	 * corrupted, which could cause the attach to hang.
 	 */
-	if (force) {
-		if ((ret = __db_unlink(name)) != 0 && ret != ENOENT)
-			goto err1;
-		goto done;
+	if ((ret = __db_appname(infop->dbenv, infop->appname,
+	    infop->path, infop->file, infop->dbflags, NULL, &name)) != 0)
+		return (ret);
+
+	/*
+	 * An underlying file is created for all regions other than private
+	 * (REGION_PRIVATE) ones, regardless of whether or not it's used to
+	 * back the region.  If that file doesn't exist, we're done.
+	 */
+	if (__db_exists(name, NULL) != 0) {
+		FREES(name);
+		return (0);
 	}
 
-	/* Open and lock the region. */
-	if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
-		goto err1;
-	(void)__db_mutex_lock(&rp->lock, fd);
+	/*
+	 * See the comments in __db_rattach -- figure out if this is a regular
+	 * file backing a region or if it's a regular file with information
+	 * about a region.
+	 */
+	if ((ret = __db_open(name, DB_RDONLY, DB_RDONLY, 0, &fd)) != 0)
+		goto errmsg;
+	if ((ret = __db_ioinfo(name, fd, &mbytes, &bytes, NULL)) != 0)
+		goto errmsg;
+	size = mbytes * MEGABYTE + bytes;
 
-	/* If the region is currently being deleted, fail. */
-	if (F_ISSET(rp, DB_R_DELETED)) {
-		ret = ENOENT;		/* XXX: ENOENT? */
-		goto err2;
-	}
+	if (size <= sizeof(RLAYOUT)) {
+		if ((ret = __db_read(fd, &rl, sizeof(rl), &nr)) != 0)
+			goto errmsg;
+		if (rl.valid != DB_REGIONMAGIC) {
+			__db_err(infop->dbenv,
+			    "%s: illegal region magic number", name);
+			ret = EINVAL;
+			goto err;
+		}
 
-	/* If the region is currently in use by someone else, fail. */
-	if (rp->refcnt > 1) {
-		ret = EBUSY;
-		goto err2;
+		/* Set the size, memory id and characteristics. */
+		infop->size = rl.size;
+		infop->segid = rl.segid;
+		if (F_ISSET(&rl, REGION_ANONYMOUS))
+			F_SET(infop, REGION_ANONYMOUS);
+	} else {
+		infop->size = size;
+		infop->segid = INVALID_SEGID;
 	}
 
-	/* Set the delete flag. */
-	F_SET(rp, DB_R_DELETED);
-
-	/* Release the lock and close the region. */
-	(void)__db_mutex_unlock(&rp->lock, fd);
-	if ((t_ret = __db_rclose(dbenv, fd, rp)) != 0 && ret == 0)
-		goto err1;
+	/* Remove the underlying region. */
+	ret = __db_unlinkregion(name, infop);
 
 	/*
-	 * Unlink the region.  There's a race here -- other threads or
-	 * processes might be opening the region while we're trying to
-	 * remove it.  They'll fail, because we've set the DELETED flag,
-	 * but they could still stop us from succeeding in the unlink.
+	 * Unlink the backing file.  Close the open file descriptor first,
+	 * because some architectures (e.g., Win32) won't unlink a file if
+	 * open file descriptors remain.
 	 */
-	for (cnt = 5; cnt > 0; --cnt) {
-		if ((ret = __db_unlink(name)) == 0)
-			break;
-		(void)__db_sleep(0, 250000);
-	}
-	if (ret == 0) {
-done:		FREES(name);
-		return (0);
-	}
-
-	/* Not a clue.  Try to clear the DB_R_DELETED flag. */
-	if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
-		goto err1;
-	(void)__db_mutex_lock(&rp->lock, fd);
-	F_CLR(rp, DB_R_DELETED);
-	/* FALLTHROUGH */
+	(void)__db_close(fd);
+	if ((t_ret = __db_unlink(name)) != 0 && ret == 0)
+		ret = t_ret;
 
-err2:	(void)__db_mutex_unlock(&rp->lock, fd);
-	(void)__db_rclose(dbenv, fd, rp);
-err1:	__db_err(dbenv, "region unlink: %s: %s", name, strerror(ret));
+	if (0) {
+errmsg:		__db_err(infop->dbenv, "%s: %s", name, strerror(ret));
+err:		(void)__db_close(fd);
+	}
 
 	FREES(name);
 	return (ret);
 }
 
 /*
- * DB creates all regions on 4K boundaries so that we don't make the
- * underlying VM unhappy.
- */
-#define	__DB_VMPAGESIZE	(4 * 1024)
-
-/*
  * __db_rgrow --
- *	Extend a region by a specified amount.
+ *	Extend a region.
  *
- * PUBLIC: int __db_rgrow __P((DB_ENV *, int, size_t));
+ * PUBLIC: int __db_rgrow __P((REGINFO *, size_t));
  */
 int
-__db_rgrow(dbenv, fd, incr)
-	DB_ENV *dbenv;
-	int fd;
-	size_t incr;
+__db_rgrow(infop, new_size)
+	REGINFO *infop;
+	size_t new_size;
+{
+	RLAYOUT *rlp;
+	size_t increment;
+	int ret;
+
+	/*
+	 * !!!
+	 * This routine MUST be called with the region already locked.
+	 */
+
+	/* The underlying routines have flagged if this region can grow. */
+	if (!F_ISSET(infop, REGION_CANGROW))
+		return (EINVAL);
+
+	/*
+	 * Round off the requested size to the next page boundary, and
+	 * determine the additional space required.
+	 */
+	rlp = (RLAYOUT *)infop->addr;
+	DB_ROUNDOFF(new_size);
+	increment = new_size - rlp->size;
+
+	if ((ret = __db_growregion(infop, increment)) != 0)
+		return (ret);
+
+	/* Update the on-disk region size. */
+	rlp->size = new_size;
+
+	/* Detach from and reattach to the region. */
+	return (__db_rreattach(infop, new_size));
+}
+
+/*
+ * __db_growregion --
+ *	Grow a shared memory region.
+ */
+static int
+__db_growregion(infop, increment)
+	REGINFO *infop;
+	size_t increment;
 {
+	db_pgno_t pages;
 	size_t i;
-	ssize_t nw;
-	int mmap_init_needed, ret;
-	char buf[__DB_VMPAGESIZE];
+	ssize_t nr, nw;
+	u_int32_t relative;
+	int ret;
+	char buf[DB_VMPAGESIZE];
 
 	/* Seek to the end of the region. */
-	if ((ret = __db_seek(fd, 0, 0, 0, SEEK_END)) != 0)
+	if ((ret = __db_seek(infop->fd, 0, 0, 0, 0, SEEK_END)) != 0)
 		goto err;
 
 	/* Write nuls to the new bytes. */
 	memset(buf, 0, sizeof(buf));
 
 	/*
-	 * Historically, some systems required that all of the bytes of the
-	 * region be written before it could be mmapped and accessed randomly.
-	 *
-	 * Windows/95 doesn't have that problem, but it leaves file contents
-	 * uninitialized.  Win/NT apparently initializes them.
+	 * Some systems require that all of the bytes of the region be
+	 * written before it can be mapped and accessed randomly, and
+	 * other systems don't zero out the pages.
 	 */
-#ifdef MMAP_INIT_NEEDED
-	mmap_init_needed = 1;
-#else
-	mmap_init_needed = __os_oldwin();
-#endif
-	if (mmap_init_needed)
+	if (__db_mapinit())
 		/* Extend the region by writing each new page. */
-		for (i = 0; i < incr; i += __DB_VMPAGESIZE) {
-			if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0)
+		for (i = 0; i < increment; i += DB_VMPAGESIZE) {
+			if ((ret =
+			    __db_write(infop->fd, buf, sizeof(buf), &nw)) != 0)
 				goto err;
 			if (nw != sizeof(buf))
 				goto eio;
 		}
 	else {
 		/*
-		 * Extend the region by writing the last page.
-		 *
-		 * Round off the increment to the next page boundary.
+		 * Extend the region by writing the last page.  If the region
+		 * is >4Gb, increment may be larger than the maximum possible
+		 * seek "relative" argument, as it's an unsigned 32-bit value.
+		 * Break the offset into pages of 1MB each so that we don't
+		 * overflow (2^20 + 2^32 is bigger than any memory I expect
+		 * to see for awhile).
 		 */
-		incr += __DB_VMPAGESIZE - 1;
-		incr -= incr % __DB_VMPAGESIZE;
-
-		/* Write the last page, not the page after the last. */
-		if ((ret =
-		    __db_seek(fd, 0, 0, incr - __DB_VMPAGESIZE, SEEK_CUR)) != 0)
+		pages = (increment - DB_VMPAGESIZE) / MEGABYTE;
+		relative = (increment - DB_VMPAGESIZE) % MEGABYTE;
+		if ((ret = __db_seek(infop->fd,
+		    MEGABYTE, pages, relative, 0, SEEK_CUR)) != 0)
 			goto err;
-		if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0)
+		if ((ret = __db_write(infop->fd, buf, sizeof(buf), &nw)) != 0)
 			goto err;
 		if (nw != sizeof(buf))
 			goto eio;
+
+		/*
+		 * It's sometimes significantly faster to page-fault in all
+		 * of the region's pages before we run the application, as
+		 * we can see fairly nasty side-effects when we page-fault
+		 * while holding various locks, i.e., the lock takes a long
+		 * time, and other threads convoy behind the lock holder.
+		 */
+		if (DB_GLOBAL(db_region_init)) {
+			pages = increment / MEGABYTE;
+			relative = increment % MEGABYTE;
+			if ((ret = __db_seek(infop->fd,
+			    MEGABYTE, pages, relative, 1, SEEK_END)) != 0)
+				goto err;
+
+			/* Read a byte from each page. */
+			for (i = 0; i < increment; i += DB_VMPAGESIZE) {
+				if ((ret =
+				    __db_read(infop->fd, buf, 1, &nr)) != 0)
+					goto err;
+				if (nr != 1)
+					goto eio;
+				if ((ret = __db_seek(infop->fd,
+				    0, 0, DB_VMPAGESIZE - 1, 0, SEEK_CUR)) != 0)
+					goto err;
+			}
+		}
 	}
 	return (0);
 
 eio:	ret = EIO;
-err:	__db_err(dbenv, "region grow: %s", strerror(ret));
+err:	__db_err(infop->dbenv, "region grow: %s", strerror(ret));
 	return (ret);
 }
 
 /*
- * __db_rremap --
- *	Unmap the old region and map in a new region of a new size.  If
- *	either call fails, returns NULL, else returns the address of the
- *	new region.
+ * __db_rreattach --
+ *	Detach from and reattach to a region.
  *
- * PUBLIC: int __db_rremap __P((DB_ENV *, void *, size_t, size_t, int, void *));
+ * PUBLIC: int __db_rreattach __P((REGINFO *, size_t));
  */
 int
-__db_rremap(dbenv, ptr, oldsize, newsize, fd, retp)
-	DB_ENV *dbenv;
-	void *ptr, *retp;
-	size_t oldsize, newsize;
-	int fd;
+__db_rreattach(infop, new_size)
+	REGINFO *infop;
+	size_t new_size;
 {
 	int ret;
 
-	if ((ret = __db_unmap(ptr, oldsize)) != 0) {
-		__db_err(dbenv, "region remap: munmap: %s", strerror(ret));
-		return (ret);
+#ifdef DIAGNOSTIC
+	if (infop->name == NULL) {
+		__db_err(infop->dbenv, "__db_rreattach: name was NULL");
+		return (EINVAL);
 	}
+#endif
+	/*
+	 * If we're growing an already mapped region, we have to unmap it
+	 * and get it back.  We have it locked, so nobody else can get in,
+	 * which makes it fairly straight-forward to do, as everybody else
+	 * is going to block while we do the unmap/remap.  NB: if we fail
+	 * to get it back, the pooch is genuinely screwed, because we can
+	 * never release the lock we're holding.
+	 *
+	 * Detach from the region.  We have to do this first so architectures
+	 * that don't permit a file to be mapped into different places in the
+	 * address space simultaneously, e.g., HP's PaRisc, will work.
+	 */
+	if ((ret = __db_unmapregion(infop)) != 0)
+		return (ret);
 
-	return (__db_rmap(dbenv, fd, newsize, retp));
-}
-
-/*
- * __db_rmap --
- *	Attach to a shared memory region.
- */
-static int
-__db_rmap(dbenv, fd, size, retp)
-	DB_ENV *dbenv;
-	int fd;
-	size_t size;
-	void *retp;
-{
-	RLAYOUT *rp;
-	int ret;
+	/* Update the caller's REGINFO size to the new map size. */
+	infop->size = new_size;
 
-	if ((ret = __db_map(fd, size, 0, 0, (void **)&rp)) != 0) {
-		__db_err(dbenv, "region map: mmap %s", strerror(ret));
-		return (ret);
-	}
-	if (rp->size < size)
-		rp->size = size;
+	/* Attach to the region. */
+	ret = __db_mapregion(infop->name, infop);
 
-	*(void **)retp = rp;
-	return (0);
+	return (ret);
 }
diff --git a/db2/common/db_salloc.c b/db2/common/db_salloc.c
index f0202ddb90..0fa696bf7e 100644
--- a/db2/common/db_salloc.c
+++ b/db2/common/db_salloc.c
@@ -1,21 +1,21 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_salloc.c	10.6 (Sleepycat) 7/5/97";
+static const char sccsid[] = "@(#)db_salloc.c	10.13 (Sleepycat) 5/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdio.h>
+#include <string.h>
 #endif
 
 #include "db_int.h"
@@ -109,11 +109,13 @@ __db_shalloc(p, len, align, retp)
 
 		*(void **)retp = rp;
 
+#define	SHALLOC_FRAGMENT	32
 		/*
-		 * If there are at least 32 bytes of additional memory, divide
-		 * the chunk into two chunks.
+		 * If there are at least SHALLOC_FRAGMENT additional bytes of
+		 * memory, divide the chunk into two chunks.
 		 */
-		if ((u_int8_t *)rp >= (u_int8_t *)&elp->links + 32) {
+		if ((u_int8_t *)rp >=
+		    (u_int8_t *)&elp->links + SHALLOC_FRAGMENT) {
 			sp = rp;
 			*--sp = elp->len -
 			    ((u_int8_t *)rp - (u_int8_t *)&elp->links);
@@ -136,7 +138,7 @@ __db_shalloc(p, len, align, retp)
 		return (0);
 	}
 
-	/* Nothing found large enough; need to figure out how to grow region. */
+	/* Nothing found large enough; need to grow the region. */
 	return (ENOMEM);
 }
 
@@ -159,12 +161,18 @@ __db_shalloc_free(regionp, ptr)
 	 * Step back over flagged length fields to find the beginning of
 	 * the object and its real size.
 	 */
-	for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp);
+	for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp)
+		;
 	ptr = sp;
 
 	newp = (struct __data *)((u_int8_t *)ptr - sizeof(size_t));
 	free_size = newp->len;
 
+	/* Trash the returned memory. */
+#ifdef DIAGNOSTIC
+	memset(ptr, 0xff, free_size);
+#endif
+
 	/*
 	 * Walk the list, looking for where this entry goes.
 	 *
@@ -177,7 +185,8 @@ __db_shalloc_free(regionp, ptr)
 	hp = (struct __head *)regionp;
 	for (elp = SH_LIST_FIRST(hp, __data), lastp = NULL;
 	    elp != NULL && (void *)elp < (void *)ptr;
-	    lastp = elp, elp = SH_LIST_NEXT(elp, links, __data));
+	    lastp = elp, elp = SH_LIST_NEXT(elp, links, __data))
+		;
 
 	/*
 	 * Elp is either NULL (we reached the end of the list), or the slot
@@ -259,32 +268,34 @@ __db_shsizeof(ptr)
 	 * Step back over flagged length fields to find the beginning of
 	 * the object and its real size.
 	 */
-	for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp);
+	for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp)
+		;
 
 	elp = (struct __data *)((u_int8_t *)sp - sizeof(size_t));
 	return (elp->len);
 }
 
-#ifdef DEBUG
 /*
  * __db_shalloc_dump --
  *
- * PUBLIC: void __db_shalloc_dump __P((FILE *, void *));
+ * PUBLIC: void __db_shalloc_dump __P((void *, FILE *));
  */
 void
-__db_shalloc_dump(fp, addr)
-	FILE *fp;
+__db_shalloc_dump(addr, fp)
 	void *addr;
+	FILE *fp;
 {
 	struct __data *elp;
 
+	/* Make it easy to call from the debugger. */
 	if (fp == NULL)
 		fp = stderr;
 
+	fprintf(fp, "%s\nMemory free list\n", DB_LINE);
+
 	for (elp = SH_LIST_FIRST((struct __head *)addr, __data);
 	    elp != NULL;
 	    elp = SH_LIST_NEXT(elp, links, __data))
 		fprintf(fp, "%#lx: %lu\t", (u_long)elp, (u_long)elp->len);
 	fprintf(fp, "\n");
 }
-#endif
diff --git a/db2/common/db_shash.c b/db2/common/db_shash.c
index ab188f564f..3f48a55907 100644
--- a/db2/common/db_shash.c
+++ b/db2/common/db_shash.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_shash.c	10.4 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)db_shash.c	10.9 (Sleepycat) 4/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -19,39 +19,75 @@ static const char sccsid[] = "@(#)db_shash.c	10.4 (Sleepycat) 1/8/98";
 #include "shqueue.h"
 #include "common_ext.h"
 
-/* Powers-of-2 and close-by prime number pairs. */
+/*
+ * Table of good hash values.  Up to ~250,000 buckets, we use powers of 2.
+ * After that, we slow the rate of increase by half.  For each choice, we
+ * then use a nearby prime number as the hash value.
+ *
+ * If a terabyte is the maximum cache we'll see, and we assume there are
+ * 10 1K buckets on each hash chain, then 107374182 is the maximum number
+ * of buckets we'll ever need.
+ */
 static const struct {
-	u_int	power;
-	u_int	prime;
+	u_int32_t power;
+	u_int32_t prime;
 } list[] = {
-	{  64,	  67},
-	{ 128,	 131},
-	{ 256,	 257},
-	{ 512,	 521},
-	{1024,	1031},
-	{2048,	2053},
-	{4096,	4099},
-	{8192,	8191},
-	{0,	   0}
+	{	 64,		67},		/* 2^6 */
+	{	128,	       131},		/* 2^7 */
+	{	256,	       257},		/* 2^8 */
+	{	512,	       521},		/* 2^9 */
+	{      1024,	      1031},		/* 2^10 */
+	{      2048,	      2053},		/* 2^11 */
+	{      4096,	      4099},		/* 2^12 */
+	{      8192,	      8191},		/* 2^13 */
+	{     16384,	     16381},		/* 2^14 */
+	{     32768,	     32771},		/* 2^15 */
+	{     65536,	     65537},		/* 2^16 */
+	{    131072,	    131071},		/* 2^17 */
+	{    262144,	    262147},		/* 2^18 */
+	{    393216,	    393209},		/* 2^18 + 2^18/2 */
+	{    524288,	    524287},		/* 2^19 */
+	{    786432,	    786431},		/* 2^19 + 2^19/2 */
+	{   1048576,	   1048573},		/* 2^20 */
+	{   1572864,	   1572869},		/* 2^20 + 2^20/2 */
+	{   2097152,	   2097169},		/* 2^21 */
+	{   3145728,	   3145721},		/* 2^21 + 2^21/2 */
+	{   4194304,	   4194301},		/* 2^22 */
+	{   6291456,	   6291449},		/* 2^22 + 2^22/2 */
+	{   8388608,	   8388617},		/* 2^23 */
+	{  12582912,	  12582917},		/* 2^23 + 2^23/2 */
+	{  16777216,	  16777213},		/* 2^24 */
+	{  25165824,	  25165813},		/* 2^24 + 2^24/2 */
+	{  33554432,	  33554393},		/* 2^25 */
+	{  50331648,	  50331653},		/* 2^25 + 2^25/2 */
+	{  67108864,	  67108859},		/* 2^26 */
+	{ 100663296,	 100663291},		/* 2^26 + 2^26/2 */
+	{ 134217728,	 134217757},		/* 2^27 */
+	{ 201326592,	 201326611},		/* 2^27 + 2^27/2 */
+	{ 268435456,	 268435459},		/* 2^28 */
+	{ 402653184,	 402653189},		/* 2^28 + 2^28/2 */
+	{ 536870912,	 536870909},		/* 2^29 */
+	{ 805306368,	 805306357},		/* 2^29 + 2^29/2 */
+	{1073741824,	1073741827},		/* 2^30 */
+	{0,		0}
 };
 
 /*
  * __db_tablesize --
  *	Choose a size for the hash table.
  *
- * PUBLIC: int __db_tablesize __P((u_int));
+ * PUBLIC: int __db_tablesize __P((u_int32_t));
  */
 int
 __db_tablesize(n_buckets)
-	u_int n_buckets;
+	u_int32_t n_buckets;
 {
 	int i;
 
 	/*
-	 * We try to be clever about how big we make the hash tables.  Pick
-	 * a prime number close to the "suggested" number of elements that
-	 * will be in the hash table.  We shoot for minimum collisions (i.e.
-	 * one element in each bucket).  We use 64 as the minimum table size.
+	 * We try to be clever about how big we make the hash tables.  Use a
+	 * prime number close to the "suggested" number of elements that will
+	 * be in the hash table.  Use 64 as the minimum hash table size.
 	 *
 	 * Ref: Sedgewick, Algorithms in C, "Hash Functions"
 	 */
@@ -73,14 +109,14 @@ __db_tablesize(n_buckets)
  * __db_hashinit --
  *	Initialize a hash table that resides in shared memory.
  *
- * PUBLIC: void __db_hashinit __P((void *, int));
+ * PUBLIC: void __db_hashinit __P((void *, u_int32_t));
  */
 void
 __db_hashinit(begin, nelements)
 	void *begin;
-	int nelements;
+	u_int32_t nelements;
 {
-	int i;
+	u_int32_t i;
 	SH_TAILQ_HEAD(hash_head) *headp;
 
 	headp = (struct hash_head *)begin;
diff --git a/db2/config.h b/db2/config.h
index 7f784a0d9b..e5e105830e 100644
--- a/db2/config.h
+++ b/db2/config.h
@@ -24,6 +24,9 @@
 /* Define to `unsigned' if <sys/types.h> doesn't define.  */
 /* #undef size_t */
 
+/* Define if the `S_IS*' macros in <sys/stat.h> do not work properly.  */
+/* #undef STAT_MACROS_BROKEN */
+
 /* Define if you have the ANSI C header files.  */
 #define STDC_HEADERS 1
 
@@ -36,14 +39,17 @@
 /* Define if you want a debugging version. */
 /* #undef DEBUG */
 
+/* Define if you want a version with run-time diagnostic checking. */
+/* #undef DIAGNOSTIC */
+
 /* Define if you have sigfillset (and sigprocmask). */
 #define HAVE_SIGFILLSET 1
 
-/* Define if seeking to 64-bit file offsets requires the _llseek() call. */
-/* #undef HAVE_LLSEEK */
-
-/* Define if seeking to 64-bit file offsets requires the _lseeki64() call. */
-/* #undef HAVE_LSEEKI */
+/* Define if building on AIX, HP, Solaris to get big-file environment. */
+/* #undef HAVE_FILE_OFFSET_BITS */
+#ifdef HAVE_FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
 
 /* Define if you have spinlocks. */
 /* #undef HAVE_SPINLOCKS */
@@ -51,6 +57,12 @@
 /* Define if you want to use mc68020/gcc assembly spinlocks. */
 /* #undef HAVE_ASSEM_MC68020_GCC */
 
+/* Define if you want to use parisc/gcc assembly spinlocks. */
+/* #undef HAVE_ASSEM_PARISC_GCC */
+
+/* Define if you want to use sco/cc assembly spinlocks. */
+/* #undef HAVE_ASSEM_SCO_CC */
+
 /* Define if you want to use sparc/gcc assembly spinlocks. */
 /* #undef HAVE_ASSEM_SPARC_GCC */
 
@@ -69,6 +81,9 @@
 /* Define if you have the SGI abilock_t spinlocks. */
 /* #undef HAVE_FUNC_SGI */
 
+/* Define if you have the ReliantUNIX spinlock_t spinlocks. */
+/* #undef HAVE_FUNC_RELIANT */
+
 /* Define if you have the Solaris mutex_t spinlocks. */
 /* #undef HAVE_FUNC_SOLARIS */
 
@@ -102,12 +117,12 @@
 /* Define if you have the select function.  */
 #define HAVE_SELECT 1
 
+/* Define if you have the shmget function.  */
+#define HAVE_SHMGET 1
+
 /* Define if you have the snprintf function.  */
 #define HAVE_SNPRINTF 1
 
-/* Define if you have the strdup function.  */
-#define HAVE_STRDUP 1
-
 /* Define if you have the strerror function.  */
 #define HAVE_STRERROR 1
 
diff --git a/db2/db.h b/db2/db.h
index 6a75bcd33d..e1f5c72044 100644
--- a/db2/db.h
+++ b/db2/db.h
@@ -1,10 +1,10 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db.h.src	10.102 (Sleepycat) 1/18/98
+ *	@(#)db.h.src	10.131 (Sleepycat) 6/2/98
  */
 
 #ifndef _DB_H_
@@ -54,8 +54,7 @@
  *
  * !!!
  * We also provide the standard u_int, u_long etc., if they're not provided
- * by the system.  This isn't completely necessary, but the example programs
- * need them.
+ * by the system.
  */
 #ifndef	__BIT_TYPES_DEFINED__
 #define	__BIT_TYPES_DEFINED__
@@ -72,9 +71,9 @@
 
 
 #define	DB_VERSION_MAJOR	2
-#define	DB_VERSION_MINOR	3
-#define	DB_VERSION_PATCH	16
-#define	DB_VERSION_STRING	"Sleepycat Software: DB 2.3.16: (1/19/98)"
+#define	DB_VERSION_MINOR	4
+#define	DB_VERSION_PATCH	14
+#define	DB_VERSION_STRING	"Sleepycat Software: DB 2.4.14: (6/2/98)"
 
 typedef	u_int32_t	db_pgno_t;	/* Page number type. */
 typedef	u_int16_t	db_indx_t;	/* Page offset type. */
@@ -95,6 +94,7 @@ struct __db_bt_stat;	typedef struct __db_bt_stat DB_BTREE_STAT;
 struct __db_dbt;	typedef struct __db_dbt DBT;
 struct __db_env;	typedef struct __db_env DB_ENV;
 struct __db_info;	typedef struct __db_info DB_INFO;
+struct __db_lock_stat;	typedef struct __db_lock_stat DB_LOCK_STAT;
 struct __db_lockregion;	typedef struct __db_lockregion DB_LOCKREGION;
 struct __db_lockreq;	typedef struct __db_lockreq DB_LOCKREQ;
 struct __db_locktab;	typedef struct __db_locktab DB_LOCKTAB;
@@ -102,6 +102,7 @@ struct __db_log;	typedef struct __db_log DB_LOG;
 struct __db_log_stat;	typedef struct __db_log_stat DB_LOG_STAT;
 struct __db_lsn;	typedef struct __db_lsn DB_LSN;
 struct __db_mpool;	typedef struct __db_mpool DB_MPOOL;
+struct __db_mpool_finfo;typedef struct __db_mpool_finfo DB_MPOOL_FINFO;
 struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT;
 struct __db_mpool_stat;	typedef struct __db_mpool_stat DB_MPOOL_STAT;
 struct __db_mpoolfile;	typedef struct __db_mpoolfile DB_MPOOLFILE;
@@ -134,7 +135,7 @@ struct __db_dbt {
  * There are a set of functions that the application can replace with its
  * own versions, and some other knobs which can be turned at run-time.
  */
-#define	DB_FUNC_CALLOC	 1		/* ANSI C calloc. */
+#define	DB_FUNC_CALLOC	 1	/* DELETED: ANSI C calloc. */
 #define	DB_FUNC_CLOSE	 2		/* POSIX 1003.1 close. */
 #define	DB_FUNC_DIRFREE	 3		/* DB: free directory list. */
 #define	DB_FUNC_DIRLIST	 4		/* DB: create directory list. */
@@ -149,12 +150,18 @@ struct __db_dbt {
 #define	DB_FUNC_REALLOC	13		/* ANSI C realloc. */
 #define	DB_FUNC_SEEK	14		/* POSIX 1003.1 lseek. */
 #define	DB_FUNC_SLEEP	15		/* DB: sleep secs/usecs. */
-#define	DB_FUNC_STRDUP	16		/* DB: strdup(3). */
+#define	DB_FUNC_STRDUP	16	/* DELETED: DB: strdup(3). */
 #define	DB_FUNC_UNLINK	17		/* POSIX 1003.1 unlink. */
 #define	DB_FUNC_UNMAP	18		/* DB: unmap shared memory file. */
 #define	DB_FUNC_WRITE	19		/* POSIX 1003.1 write. */
 #define	DB_FUNC_YIELD	20		/* DB: yield thread to scheduler. */
 #define	DB_TSL_SPINS	21		/* DB: initialize spin count. */
+#define	DB_FUNC_RUNLINK	22		/* DB: remove a shared region. */
+#define	DB_REGION_ANON	23		/* DB: anonymous, unnamed regions. */
+#define	DB_REGION_INIT	24		/* DB: page-fault regions in create. */
+#define	DB_REGION_NAME	25		/* DB: anonymous, named regions. */
+#define	DB_MUTEXLOCKS	26		/* DB: turn off all mutex locks. */
+#define	DB_PAGEYIELD	27		/* DB: yield the CPU on pool get. */
 
 /*
  * Database configuration and initialization.
@@ -162,52 +169,51 @@ struct __db_dbt {
  /*
   * Flags understood by both db_open(3) and db_appinit(3).
   */
-#define	DB_CREATE		0x00001	/* O_CREAT: create file as necessary. */
-#define	DB_NOMMAP		0x00002	/* Don't mmap underlying file. */
-#define	DB_THREAD		0x00004	/* Free-thread DB package handles. */
+#define	DB_CREATE	      0x000001	/* O_CREAT: create file as necessary. */
+#define	DB_NOMMAP	      0x000002	/* Don't mmap underlying file. */
+#define	DB_THREAD	      0x000004	/* Free-thread DB package handles. */
 
 /*
  * Flags understood by db_appinit(3).
- *
- * DB_MUTEXDEBUG is internal only, and not documented.
  */
-/*				0x00007	   COMMON MASK. */
-#define	DB_INIT_LOCK		0x00008	/* Initialize locking. */
-#define	DB_INIT_LOG		0x00010	/* Initialize logging. */
-#define	DB_INIT_MPOOL		0x00020	/* Initialize mpool. */
-#define	DB_INIT_TXN		0x00040	/* Initialize transactions. */
-#define	DB_MPOOL_PRIVATE	0x00080	/* Mpool: private memory pool. */
-#define	DB_MUTEXDEBUG		0x00100	/* Do not get/set mutexes in regions. */
-#define	DB_RECOVER		0x00200	/* Run normal recovery. */
-#define	DB_RECOVER_FATAL	0x00400 /* Run catastrophic recovery. */
-#define	DB_TXN_NOSYNC		0x00800	/* Do not sync log on commit. */
-#define	DB_USE_ENVIRON		0x01000	/* Use the environment. */
-#define	DB_USE_ENVIRON_ROOT	0x02000	/* Use the environment if root. */
+/*			      0x000007	   COMMON MASK. */
+#define	DB_INIT_LOCK	      0x000008	/* Initialize locking. */
+#define	DB_INIT_LOG	      0x000010	/* Initialize logging. */
+#define	DB_INIT_MPOOL	      0x000020	/* Initialize mpool. */
+#define	DB_INIT_TXN	      0x000040	/* Initialize transactions. */
+#define	DB_MPOOL_PRIVATE      0x000080	/* Mpool: private memory pool. */
+#define	__UNUSED_100	      0x000100
+#define	DB_RECOVER	      0x000200	/* Run normal recovery. */
+#define	DB_RECOVER_FATAL      0x000400	/* Run catastrophic recovery. */
+#define	DB_TXN_NOSYNC	      0x000800	/* Do not sync log on commit. */
+#define	DB_USE_ENVIRON	      0x001000	/* Use the environment. */
+#define	DB_USE_ENVIRON_ROOT   0x002000	/* Use the environment if root. */
 
 /* CURRENTLY UNUSED LOCK FLAGS. */
-#define	DB_TXN_LOCK_2PL		0x00000	/* Two-phase locking. */
-#define	DB_TXN_LOCK_OPTIMISTIC	0x00000	/* Optimistic locking. */
-#define	DB_TXN_LOCK_MASK	0x00000	/* Lock flags mask. */
+#define	DB_TXN_LOCK_2PL	      0x000000	/* Two-phase locking. */
+#define	DB_TXN_LOCK_OPTIMIST  0x000000	/* Optimistic locking. */
+#define	DB_TXN_LOCK_MASK      0x000000	/* Lock flags mask. */
 
 /* CURRENTLY UNUSED LOG FLAGS. */
-#define	DB_TXN_LOG_REDO		0x00000	/* Redo-only logging. */
-#define	DB_TXN_LOG_UNDO		0x00000	/* Undo-only logging. */
-#define	DB_TXN_LOG_UNDOREDO	0x00000	/* Undo/redo write-ahead logging. */
-#define	DB_TXN_LOG_MASK		0x00000	/* Log flags mask. */
+#define	DB_TXN_LOG_REDO	      0x000000	/* Redo-only logging. */
+#define	DB_TXN_LOG_UNDO	      0x000000	/* Undo-only logging. */
+#define	DB_TXN_LOG_UNDOREDO   0x000000	/* Undo/redo write-ahead logging. */
+#define	DB_TXN_LOG_MASK	      0x000000	/* Log flags mask. */
 
 /*
  * Flags understood by db_open(3).
  *
- * DB_EXCL and DB_TEMPORARY are internal only, and not documented.
- * DB_SEQUENTIAL is currently internal, but likely to be exported some day.
+ * DB_EXCL and DB_TEMPORARY are internal only, and are not documented.
+ * DB_SEQUENTIAL is currently internal, but may be exported some day.
  */
-/*				0x00007	   COMMON MASK. */
-/*				0x07fff	   ALREADY USED. */
-#define	DB_EXCL			0x08000	/* O_EXCL: exclusive open. */
-#define	DB_RDONLY		0x10000	/* O_RDONLY: read-only. */
-#define	DB_SEQUENTIAL		0x20000	/* Indicate sequential access. */
-#define	DB_TEMPORARY		0x40000	/* Remove on last close. */
-#define	DB_TRUNCATE		0x80000	/* O_TRUNCATE: replace existing DB. */
+/*			      0x000007	   COMMON MASK. */
+/*			      0x003fff	   ALREADY USED. */
+#define	__UNUSED_4000	      0x004000
+#define	DB_EXCL		      0x008000	/* O_EXCL: exclusive open. */
+#define	DB_RDONLY	      0x010000	/* O_RDONLY: read-only. */
+#define	DB_SEQUENTIAL	      0x020000	/* Indicate sequential access. */
+#define	DB_TEMPORARY	      0x040000	/* Remove on last close. */
+#define	DB_TRUNCATE	      0x080000	/* O_TRUNCATE: replace existing DB. */
 
 /*
  * Deadlock detector modes; used in the DBENV structure to configure the
@@ -240,9 +246,9 @@ struct __db_env {
 	/* Locking. */
 	DB_LOCKTAB	*lk_info;	/* Return from lock_open(). */
 	u_int8_t	*lk_conflicts;	/* Two dimensional conflict matrix. */
-	int		 lk_modes;	/* Number of lock modes in table. */
-	u_int		 lk_max;	/* Maximum number of locks. */
-	u_int32_t	 lk_detect;	/* Deadlock detect on every conflict. */
+	u_int32_t	 lk_modes;	/* Number of lock modes in table. */
+	u_int32_t	 lk_max;	/* Maximum number of locks. */
+	u_int32_t	 lk_detect;	/* Deadlock detect on all conflicts. */
 
 	/* Logging. */
 	DB_LOG		*lg_info;	/* Return from log_open(). */
@@ -255,7 +261,7 @@ struct __db_env {
 
 	/* Transactions. */
 	DB_TXNMGR	*tx_info;	/* Return from txn_open(). */
-	unsigned int	 tx_max;	/* Maximum number of transactions. */
+	u_int32_t	 tx_max;	/* Maximum number of transactions. */
 	int (*tx_recover)		/* Dispatch function for recovery. */
 	    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 
@@ -300,17 +306,17 @@ struct __db_info {
 	void *(*db_malloc) __P((size_t));
 
 	/* Btree access method. */
-	int		 bt_maxkey;	/* Maximum keys per page. */
-	int		 bt_minkey;	/* Minimum keys per page. */
+	u_int32_t	 bt_maxkey;	/* Maximum keys per page. */
+	u_int32_t	 bt_minkey;	/* Minimum keys per page. */
 	int (*bt_compare)		/* Comparison function. */
 	    __P((const DBT *, const DBT *));
 	size_t (*bt_prefix)		/* Prefix function. */
 	    __P((const DBT *, const DBT *));
 
 	/* Hash access method. */
-	unsigned int	 h_ffactor;	/* Fill factor. */
-	unsigned int	 h_nelem;	/* Number of elements. */
-	u_int32_t	(*h_hash)	/* Hash function. */
+	u_int32_t 	 h_ffactor;	/* Fill factor. */
+	u_int32_t	 h_nelem;	/* Number of elements. */
+	u_int32_t      (*h_hash)	/* Hash function. */
 	    __P((const void *, u_int32_t));
 
 	/* Recno access method. */
@@ -353,6 +359,7 @@ struct __db_info {
 #define	DB_SET		0x010000	/* c_get(), log_get() */
 #define	DB_SET_RANGE	0x020000	/* c_get() */
 #define	DB_SET_RECNO	0x040000	/* c_get() */
+#define	DB_CURLSN	0x080000	/* log_put() */
 
 /*
  * DB (user visible) error return codes.
@@ -435,14 +442,14 @@ struct __db {
 	void *(*db_malloc) __P((size_t));
 
 					/* Functions. */
-	int (*close)	__P((DB *, int));
+	int (*close)	__P((DB *, u_int32_t));
 	int (*cursor)	__P((DB *, DB_TXN *, DBC **));
-	int (*del)	__P((DB *, DB_TXN *, DBT *, int));
+	int (*del)	__P((DB *, DB_TXN *, DBT *, u_int32_t));
 	int (*fd)	__P((DB *, int *));
-	int (*get)	__P((DB *, DB_TXN *, DBT *, DBT *, int));
-	int (*put)	__P((DB *, DB_TXN *, DBT *, DBT *, int));
-	int (*stat)	__P((DB *, void *, void *(*)(size_t), int));
-	int (*sync)	__P((DB *, int));
+	int (*get)	__P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+	int (*put)	__P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+	int (*stat)	__P((DB *, void *, void *(*)(size_t), u_int32_t));
+	int (*sync)	__P((DB *, u_int32_t));
 
 #define	DB_AM_DUP	0x000001	/* DB_DUP (internal). */
 #define	DB_AM_INMEM	0x000002	/* In-memory; no sync on close. */
@@ -483,9 +490,9 @@ struct __dbc {
 	void	 *internal;		/* Access method private. */
 
 	int (*c_close)	__P((DBC *));
-	int (*c_del)	__P((DBC *, int));
-	int (*c_get)	__P((DBC *, DBT *, DBT *, int));
-	int (*c_put)	__P((DBC *, DBT *, DBT *, int));
+	int (*c_del)	__P((DBC *, u_int32_t));
+	int (*c_get)	__P((DBC *, DBT *, DBT *, u_int32_t));
+	int (*c_put)	__P((DBC *, DBT *, DBT *, u_int32_t));
 };
 
 /* Btree/recno statistics structure. */
@@ -524,10 +531,11 @@ struct __db_bt_stat {
 #if defined(__cplusplus)
 extern "C" {
 #endif
-int   db_appinit __P((const char *, char * const *, DB_ENV *, int));
+int   db_appinit __P((const char *, char * const *, DB_ENV *, u_int32_t));
 int   db_appexit __P((DB_ENV *));
 int   db_jump_set __P((void *, int));
-int   db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **));
+int   db_open __P((const char *,
+	  DBTYPE, u_int32_t, int, DB_ENV *, DB_INFO *, DB **));
 int   db_value_set __P((int, int));
 char *db_version __P((int *, int *, int *));
 #if defined(__cplusplus)
@@ -575,6 +583,21 @@ typedef enum {
 	DB_LOCK_IWR			/* Intent to read and write. */
 } db_lockmode_t;
 
+/*
+ * Status of a lock.
+ */
+typedef enum {
+	DB_LSTAT_ABORTED,		/* Lock belongs to an aborted txn. */
+	DB_LSTAT_ERR,			/* Lock is bad. */
+	DB_LSTAT_FREE,			/* Lock is unallocated. */
+	DB_LSTAT_HELD,			/* Lock is currently held. */
+	DB_LSTAT_NOGRANT,		/* Lock was not granted. */
+	DB_LSTAT_PENDING,		/* Lock was waiting and has been
+					 * promoted; waiting for the owner
+					 * to run and upgrade it to held. */
+	DB_LSTAT_WAITING		/* Lock is on the wait queue. */
+} db_status_t;
+
 /* Lock request structure. */
 struct __db_lockreq {
 	db_lockop_t	 op;		/* Operation. */
@@ -596,19 +619,38 @@ extern const u_int8_t db_rw_conflicts[];
 #define	DB_LOCK_RIW_N	6
 extern const u_int8_t db_riw_conflicts[];
 
+struct __db_lock_stat {
+	u_int32_t st_magic;		/* Lock file magic number. */
+	u_int32_t st_version;		/* Lock file version number. */
+	u_int32_t st_maxlocks;		/* Maximum number of locks in table. */
+	u_int32_t st_nmodes;		/* Number of lock modes. */
+	u_int32_t st_numobjs;		/* Number of objects. */
+	u_int32_t st_nlockers;		/* Number of lockers. */
+	u_int32_t st_nconflicts;	/* Number of lock conflicts. */
+	u_int32_t st_nrequests;		/* Number of lock gets. */
+	u_int32_t st_nreleases;		/* Number of lock puts. */
+	u_int32_t st_ndeadlocks;	/* Number of lock deadlocks. */
+	u_int32_t st_region_wait;	/* Region lock granted after wait. */
+	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
+	u_int32_t st_refcnt;		/* Region reference count. */
+	u_int32_t st_regsize;		/* Region size. */
+};
+
 #if defined(__cplusplus)
 extern "C" {
 #endif
 int	  lock_close __P((DB_LOCKTAB *));
-int	  lock_detect __P((DB_LOCKTAB *, int, int));
+int	  lock_detect __P((DB_LOCKTAB *, u_int32_t, u_int32_t));
 int	  lock_get __P((DB_LOCKTAB *,
-	    u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *));
+	    u_int32_t, u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *));
 int	  lock_id __P((DB_LOCKTAB *, u_int32_t *));
-int	  lock_open __P((const char *, int, int, DB_ENV *, DB_LOCKTAB **));
+int	  lock_open __P((const char *,
+	    u_int32_t, int, DB_ENV *, DB_LOCKTAB **));
 int	  lock_put __P((DB_LOCKTAB *, DB_LOCK));
+int	  lock_stat __P((DB_LOCKTAB *, DB_LOCK_STAT **, void *(*)(size_t)));
 int	  lock_unlink __P((const char *, int, DB_ENV *));
 int	  lock_vec __P((DB_LOCKTAB *,
-	    u_int32_t, int, DB_LOCKREQ *, int, DB_LOCKREQ **));
+	    u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **));
 #if defined(__cplusplus)
 }
 #endif
@@ -651,19 +693,21 @@ struct __db_log_stat {
 	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
 	u_int32_t st_cur_file;		/* Current log file number. */
 	u_int32_t st_cur_offset;	/* Current log file offset. */
+	u_int32_t st_refcnt;		/* Region reference count. */
+	u_int32_t st_regsize;		/* Region size. */
 };
 
 #if defined(__cplusplus)
 extern "C" {
 #endif
-int	 log_archive __P((DB_LOG *, char **[], int, void *(*)(size_t)));
+int	 log_archive __P((DB_LOG *, char **[], u_int32_t, void *(*)(size_t)));
 int	 log_close __P((DB_LOG *));
 int	 log_compare __P((const DB_LSN *, const DB_LSN *));
 int	 log_file __P((DB_LOG *, const DB_LSN *, char *, size_t));
 int	 log_flush __P((DB_LOG *, const DB_LSN *));
-int	 log_get __P((DB_LOG *, DB_LSN *, DBT *, int));
-int	 log_open __P((const char *, int, int, DB_ENV *, DB_LOG **));
-int	 log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
+int	 log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t));
+int	 log_open __P((const char *, u_int32_t, int, DB_ENV *, DB_LOG **));
+int	 log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
 int	 log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *));
 int	 log_stat __P((DB_LOG *, DB_LOG_STAT **, void *(*)(size_t)));
 int	 log_unlink __P((const char *, int, DB_ENV *));
@@ -705,6 +749,17 @@ struct __db_mpool_stat {
 	u_int32_t st_page_trickle;	/* Pages written by memp_trickle. */
 	u_int32_t st_region_wait;	/* Region lock granted after wait. */
 	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
+	u_int32_t st_refcnt;		/* Region reference count. */
+	u_int32_t st_regsize;		/* Region size. */
+};
+
+/* Mpool file open information structure. */
+struct __db_mpool_finfo {
+	int	   ftype;		/* File type. */
+	DBT	  *pgcookie;		/* Byte-string passed to pgin/pgout. */
+	u_int8_t  *fileid;		/* Unique file ID. */
+	int32_t	   lsn_offset;		/* LSN offset in page. */
+	u_int32_t  clear_len;		/* Cleared length on created pages. */
 };
 
 /* Mpool file statistics structure. */
@@ -724,13 +779,13 @@ extern "C" {
 #endif
 int	memp_close __P((DB_MPOOL *));
 int	memp_fclose __P((DB_MPOOLFILE *));
-int	memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, int, void *));
+int	memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, u_int32_t, void *));
 int	memp_fopen __P((DB_MPOOL *, const char *,
-	    int, int, int, size_t, int, DBT *, u_int8_t *, DB_MPOOLFILE **));
-int	memp_fput __P((DB_MPOOLFILE *, void *, int));
-int	memp_fset __P((DB_MPOOLFILE *, void *, int));
+	    u_int32_t, int, size_t, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
+int	memp_fput __P((DB_MPOOLFILE *, void *, u_int32_t));
+int	memp_fset __P((DB_MPOOLFILE *, void *, u_int32_t));
 int	memp_fsync __P((DB_MPOOLFILE *));
-int	memp_open __P((const char *, int, int, DB_ENV *, DB_MPOOL **));
+int	memp_open __P((const char *, u_int32_t, int, DB_ENV *, DB_MPOOL **));
 int	memp_register __P((DB_MPOOL *, int,
 	    int (*)(db_pgno_t, void *, DBT *),
 	    int (*)(db_pgno_t, void *, DBT *)));
@@ -765,16 +820,21 @@ struct __db_txn_active {
 };
 
 struct __db_txn_stat {
-	DB_LSN		st_last_ckp;	/* lsn of the last checkpoint */
-	DB_LSN		st_pending_ckp;	/* last checkpoint did not finish */
-	time_t		st_time_ckp;	/* time of last checkpoint */
-	u_int32_t	st_last_txnid;	/* last transaction id given out */
-	u_int32_t	st_maxtxns;	/* maximum number of active txns */
-	u_int32_t	st_naborts;	/* number of aborted transactions */
-	u_int32_t	st_nbegins;	/* number of begun transactions */
-	u_int32_t	st_ncommits;	/* number of committed transactions */
-	u_int32_t	st_nactive;	/* number of active transactions */
-	DB_TXN_ACTIVE	*st_txnarray;	/* array of active transactions */
+	DB_LSN	  st_last_ckp;		/* lsn of the last checkpoint */
+	DB_LSN	  st_pending_ckp;	/* last checkpoint did not finish */
+	time_t	  st_time_ckp;		/* time of last checkpoint */
+	u_int32_t st_last_txnid;	/* last transaction id given out */
+	u_int32_t st_maxtxns;	/* maximum number of active txns */
+	u_int32_t st_naborts;	/* number of aborted transactions */
+	u_int32_t st_nbegins;	/* number of begun transactions */
+	u_int32_t st_ncommits;	/* number of committed transactions */
+	u_int32_t st_nactive;	/* number of active transactions */
+	DB_TXN_ACTIVE
+		 *st_txnarray;	/* array of active transactions */
+	u_int32_t st_region_wait;	/* Region lock granted after wait. */
+	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
+	u_int32_t st_refcnt;		/* Region reference count. */
+	u_int32_t st_regsize;		/* Region size. */
 };
 
 #if defined(__cplusplus)
@@ -782,11 +842,11 @@ extern "C" {
 #endif
 int	  txn_abort __P((DB_TXN *));
 int	  txn_begin __P((DB_TXNMGR *, DB_TXN *, DB_TXN **));
-int	  txn_checkpoint __P((const DB_TXNMGR *, int, int));
+int	  txn_checkpoint __P((const DB_TXNMGR *, u_int32_t, u_int32_t));
 int	  txn_commit __P((DB_TXN *));
 int	  txn_close __P((DB_TXNMGR *));
 u_int32_t txn_id __P((DB_TXN *));
-int	  txn_open __P((const char *, int, int, DB_ENV *, DB_TXNMGR **));
+int	  txn_open __P((const char *, u_int32_t, int, DB_ENV *, DB_TXNMGR **));
 int	  txn_prepare __P((DB_TXN *));
 int	  txn_stat __P((DB_TXNMGR *, DB_TXN_STAT **, void *(*)(size_t)));
 int	  txn_unlink __P((const char *, int, DB_ENV *));
@@ -810,10 +870,17 @@ int	  txn_unlink __P((const char *, int, DB_ENV *));
  */
 #define	DBM_SUFFIX	".db"
 
+#if defined(_XPG4_2)
+typedef struct {
+	char *dptr;
+	size_t dsize;
+} datum;
+#else
 typedef struct {
 	char *dptr;
 	int dsize;
 } datum;
+#endif
 
 /*
  * Translate DBM calls into DB calls so that DB doesn't step on the
@@ -894,7 +961,7 @@ typedef enum {
 
 typedef struct entry {
 	char *key;
-	void *data;
+	char *data;
 } ENTRY;
 
 /*
@@ -909,7 +976,7 @@ typedef struct entry {
 #if defined(__cplusplus)
 extern "C" {
 #endif
-int	 __db_hcreate __P((unsigned int));
+int	 __db_hcreate __P((size_t));
 void	 __db_hdestroy __P((void));
 ENTRY	*__db_hsearch __P((ENTRY, ACTION));
 #if defined(__cplusplus)
diff --git a/db2/db/db.c b/db2/db/db.c
index 8df76349d1..9951ebd944 100644
--- a/db2/db/db.c
+++ b/db2/db/db.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -44,20 +44,16 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db.c	10.45 (Sleepycat) 12/4/97";
+static const char sccsid[] = "@(#)db.c	10.57 (Sleepycat) 5/7/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <fcntl.h>
 #include <stddef.h>
-#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
@@ -71,7 +67,7 @@ static const char sccsid[] = "@(#)db.c	10.45 (Sleepycat) 12/4/97";
 #include "db_am.h"
 #include "common_ext.h"
 
-static int db_close __P((DB *, int));
+static int db_close __P((DB *, u_int32_t));
 static int db_fd __P((DB *, int *));
 
 /*
@@ -99,7 +95,8 @@ int
 db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
 	const char *fname;
 	DBTYPE type;
-	int flags, mode;
+	u_int32_t flags;
+	int mode;
 	DB_ENV *dbenv;
 	DB_INFO *dbinfo;
 	DB **dbpp;
@@ -108,6 +105,7 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
 	DB *dbp;
 	DBT pgcookie;
 	DB_ENV *envp, t_dbenv;
+	DB_MPOOL_FINFO finfo;
 	DB_PGINFO pginfo;
 	HASHHDR *hashm;
 	size_t cachesize;
@@ -125,10 +123,26 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
 	if ((ret = __db_fchk(dbenv, "db_open", flags, OKFLAGS)) != 0)
 		return (ret);
 
-	if (dbenv != NULL &&
-	    LF_ISSET(DB_THREAD) && !F_ISSET(dbenv, DB_ENV_THREAD)) {
-		__db_err(dbenv, "environment not created using DB_THREAD");
-		return (EINVAL);
+	if (dbenv != NULL) {
+		/*
+		 * You can't specify threads during the db_open() if the
+		 * environment wasn't configured with them.
+		 */
+		if (LF_ISSET(DB_THREAD) && !F_ISSET(dbenv, DB_ENV_THREAD)) {
+			__db_err(dbenv,
+			    "environment not created using DB_THREAD");
+			return (EINVAL);
+		}
+
+		/*
+		 * Specifying a cachesize to db_open(3), after creating an
+		 * environment, is a common mistake.
+		 */
+		if (dbinfo != NULL && dbinfo->db_cachesize != 0) {
+			__db_err(dbenv,
+			    "cachesize will be ignored if environment exists");
+			return (EINVAL);
+		}
 	}
 
 	/* Initialize for error return. */
@@ -203,7 +217,7 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
 
 	/* Fill in the default file mode. */
 	if (mode == 0)
-		mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+		mode = __db_omode("rwrw--");
 
 	/* Check if the user wants us to swap byte order. */
 	if (dbinfo != NULL)
@@ -230,7 +244,7 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
 	if (fname != NULL && fname[0] != '\0') {
 		/* Get the real file name. */
 		if ((ret = __db_appname(dbenv,
-		     DB_APP_DATA, NULL, fname, NULL, &real_name)) != 0)
+		     DB_APP_DATA, NULL, fname, 0, NULL, &real_name)) != 0)
 			goto err;
 
 		/*
@@ -455,22 +469,6 @@ empty:	/*
 	}
 
 	/*
-	 * Set and/or correct the cache size; must be a multiple of the
-	 * page size.
-	 */
-	if (dbinfo == NULL || dbinfo->db_cachesize == 0)
-		cachesize = dbp->pgsize * DB_MINCACHE;
-	else {
-		cachesize = dbinfo->db_cachesize;
-		if (cachesize & (dbp->pgsize - 1))
-			cachesize += (~cachesize & (dbp->pgsize - 1)) + 1;
-		if (cachesize < dbp->pgsize * DB_MINCACHE)
-			cachesize = dbp->pgsize * DB_MINCACHE;
-		if (cachesize < 20 * 1024)
-			cachesize = 20 * 1024;
-	}
-
-	/*
 	 * If no mpool supplied by the application, attach to a local,
 	 * created buffer pool.
 	 *
@@ -499,10 +497,28 @@ empty:	/*
 			envp = dbenv;
 			restore = 1;
 		}
+
+		/*
+		 * Set and/or correct the cache size; must be a multiple of
+		 * the page size.
+		 */
+		if (dbinfo == NULL || dbinfo->db_cachesize == 0)
+			cachesize = dbp->pgsize * DB_MINCACHE;
+		else {
+			cachesize = dbinfo->db_cachesize;
+			if (cachesize & (dbp->pgsize - 1))
+				cachesize +=
+				    (~cachesize & (dbp->pgsize - 1)) + 1;
+			if (cachesize < dbp->pgsize * DB_MINCACHE)
+				cachesize = dbp->pgsize * DB_MINCACHE;
+			if (cachesize < 20 * 1024)
+				cachesize = 20 * 1024;
+		}
 		envp->mp_size = cachesize;
+
 		if ((ret = memp_open(NULL, DB_CREATE | DB_MPOOL_PRIVATE |
 		    (F_ISSET(dbp, DB_AM_THREAD) ? DB_THREAD : 0),
-		    S_IRUSR | S_IWUSR, envp, &dbp->mp)) != 0)
+		    __db_omode("rw----"), envp, &dbp->mp)) != 0)
 			goto err;
 		if (restore)
 			*dbenv = t_dbenv;
@@ -566,9 +582,18 @@ empty:	/*
 	pgcookie.data = &pginfo;
 	pgcookie.size = sizeof(DB_PGINFO);
 
-	if ((ret = memp_fopen(dbp->mp, fname, ftype,
-	    F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0, 0, dbp->pgsize,
-	    0, &pgcookie, dbp->lock.fileid, &dbp->mpf)) != 0)
+	/*
+	 * Set up additional memp_fopen information.
+	 */
+	memset(&finfo, 0, sizeof(finfo));
+	finfo.ftype = ftype;
+	finfo.pgcookie = &pgcookie;
+	finfo.fileid = dbp->lock.fileid;
+	finfo.lsn_offset = 0;
+	finfo.clear_len = DB_PAGE_CLEAR_LEN;
+	if ((ret = memp_fopen(dbp->mp, fname,
+	    F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0,
+	    0, dbp->pgsize, &finfo, &dbp->mpf)) != 0)
 		goto err;
 
 	/*
@@ -673,7 +698,7 @@ err:	/* Close the file descriptor. */
 static int
 db_close(dbp, flags)
 	DB *dbp;
-	int flags;
+	u_int32_t flags;
 {
 	DBC *dbc;
 	DB *tdbp;
@@ -734,7 +759,7 @@ db_close(dbp, flags)
 	}
 
 	/* Sync the memory pool. */
-	if ((t_ret = memp_fsync(dbp->mpf)) != 0 &&
+	if (!LF_ISSET(DB_NOSYNC) && (t_ret = memp_fsync(dbp->mpf)) != 0 &&
 	    t_ret != DB_INCOMPLETE && ret == 0)
 		ret = t_ret;
 
@@ -796,18 +821,11 @@ db_fd(dbp, fdp)
         DB *dbp;
 	int *fdp;
 {
-	/* In-memory database can't have a file descriptor. */
-	if (F_ISSET(dbp, DB_AM_INMEM))
-		return (ENOENT);
-
 	/*
 	 * XXX
-	 * Truly spectacular layering violation.  As we don't open the
-	 * underlying file until we need it, it may not be initialized.
+	 * Truly spectacular layering violation.
 	 */
-	if ((*fdp = dbp->mpf->fd) == -1)
-		return (ENOENT);
-	return (0);
+	return (__mp_xxx_fd(dbp->mpf, fdp));
 }
 
 /*
@@ -821,6 +839,11 @@ __db_pgerr(dbp, pgno)
 	DB *dbp;
 	db_pgno_t pgno;
 {
+	/*
+	 * Three things are certain:
+	 * Death, taxes, and lost data.
+	 * Guess which has occurred.
+	 */
 	__db_err(dbp->dbenv,
 	    "unable to create/retrieve page %lu", (u_long)pgno);
 	return (__db_panic(dbp));
diff --git a/db2/db/db.src b/db2/db/db.src
index 07d98123ac..91d8b390a1 100644
--- a/db2/db/db.src
+++ b/db2/db/db.src
@@ -1,11 +1,11 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
- *	@(#)db.src	10.4 (Sleepycat) 11/2/97
+ *
+ *	@(#)db.src	10.6 (Sleepycat) 4/28/98
  */
-#include "config.h"
 
 PREFIX	db
 
@@ -153,4 +153,7 @@ END
  * noop -- do nothing, but get an LSN.
  */
 BEGIN noop
+ARG	fileid		u_int32_t	lu
+ARG	pgno		db_pgno_t	lu
+POINTER	prevlsn		DB_LSN *	lu
 END
diff --git a/db2/db/db_auto.c b/db2/db/db_auto.c
index 5d35264103..5203e0a94c 100644
--- a/db2/db/db_auto.c
+++ b/db2/db/db_auto.c
@@ -14,8 +14,6 @@
 #include "db_page.h"
 #include "db_dispatch.h"
 #include "db_am.h"
-#include "common_ext.h"
-
 /*
  * PUBLIC: int __db_addrem_log
  * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
@@ -107,7 +105,7 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*pagelsn));
 	bp += sizeof(*pagelsn);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -123,22 +121,23 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__db_addrem_print(notused1, dbtp, lsnp, notused3, notused4)
+__db_addrem_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__db_addrem_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __db_addrem_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -156,20 +155,20 @@ __db_addrem_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tnbytes: %lu\n", (u_long)argp->nbytes);
 	printf("\thdr: ");
 	for (i = 0; i < argp->hdr.size; i++) {
-		c = ((char *)argp->hdr.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->hdr.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tdbt: ");
 	for (i = 0; i < argp->dbt.size; i++) {
-		c = ((char *)argp->dbt.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->dbt.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tpagelsn: [%lu][%lu]\n",
@@ -296,7 +295,7 @@ int __db_split_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*pagelsn));
 	bp += sizeof(*pagelsn);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -312,22 +311,23 @@ int __db_split_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__db_split_print(notused1, dbtp, lsnp, notused3, notused4)
+__db_split_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__db_split_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __db_split_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -343,11 +343,11 @@ __db_split_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tpgno: %lu\n", (u_long)argp->pgno);
 	printf("\tpageimage: ");
 	for (i = 0; i < argp->pageimage.size; i++) {
-		c = ((char *)argp->pageimage.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->pageimage.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tpagelsn: [%lu][%lu]\n",
@@ -490,7 +490,7 @@ int __db_big_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*nextlsn));
 	bp += sizeof(*nextlsn);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -506,22 +506,23 @@ int __db_big_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__db_big_print(notused1, dbtp, lsnp, notused3, notused4)
+__db_big_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__db_big_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __db_big_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -539,11 +540,11 @@ __db_big_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno);
 	printf("\tdbt: ");
 	for (i = 0; i < argp->dbt.size; i++) {
-		c = ((char *)argp->dbt.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->dbt.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tpagelsn: [%lu][%lu]\n",
@@ -660,7 +661,7 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*lsn));
 	bp += sizeof(*lsn);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -676,22 +677,23 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__db_ovref_print(notused1, dbtp, lsnp, notused3, notused4)
+__db_ovref_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__db_ovref_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __db_ovref_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -823,7 +825,7 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*lsn_next));
 	bp += sizeof(*lsn_next);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -839,22 +841,23 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__db_relink_print(notused1, dbtp, lsnp, notused3, notused4)
+__db_relink_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__db_relink_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __db_relink_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -985,7 +988,7 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*nextlsn));
 	bp += sizeof(*nextlsn);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -1001,22 +1004,23 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__db_addpage_print(notused1, dbtp, lsnp, notused3, notused4)
+__db_addpage_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__db_addpage_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __db_addpage_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -1159,7 +1163,7 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags,
 	}
 	memcpy(bp, &arg_flags, sizeof(arg_flags));
 	bp += sizeof(arg_flags);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -1175,22 +1179,23 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__db_debug_print(notused1, dbtp, lsnp, notused3, notused4)
+__db_debug_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__db_debug_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __db_debug_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -1203,30 +1208,30 @@ __db_debug_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->prev_lsn.offset);
 	printf("\top: ");
 	for (i = 0; i < argp->op.size; i++) {
-		c = ((char *)argp->op.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->op.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tfileid: %lu\n", (u_long)argp->fileid);
 	printf("\tkey: ");
 	for (i = 0; i < argp->key.size; i++) {
-		c = ((char *)argp->key.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->key.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tdata: ");
 	for (i = 0; i < argp->data.size; i++) {
-		c = ((char *)argp->data.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->data.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\targ_flags: %lu\n", (u_long)argp->arg_flags);
@@ -1280,13 +1285,18 @@ __db_debug_read(recbuf, argpp)
 
 /*
  * PUBLIC: int __db_noop_log
- * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t));
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, DB_LSN *));
  */
-int __db_noop_log(logp, txnid, ret_lsnp, flags)
+int __db_noop_log(logp, txnid, ret_lsnp, flags,
+	fileid, pgno, prevlsn)
 	DB_LOG *logp;
 	DB_TXN *txnid;
 	DB_LSN *ret_lsnp;
 	u_int32_t flags;
+	u_int32_t fileid;
+	db_pgno_t pgno;
+	DB_LSN * prevlsn;
 {
 	DBT logrec;
 	DB_LSN *lsnp, null_lsn;
@@ -1302,7 +1312,10 @@ int __db_noop_log(logp, txnid, ret_lsnp, flags)
 		lsnp = &null_lsn;
 	} else
 		lsnp = &txnid->last_lsn;
-	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN);
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(fileid)
+	    + sizeof(pgno)
+	    + sizeof(*prevlsn);
 	if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL)
 		return (ENOMEM);
 
@@ -1313,7 +1326,16 @@ int __db_noop_log(logp, txnid, ret_lsnp, flags)
 	bp += sizeof(txn_num);
 	memcpy(bp, lsnp, sizeof(DB_LSN));
 	bp += sizeof(DB_LSN);
-#ifdef DEBUG
+	memcpy(bp, &fileid, sizeof(fileid));
+	bp += sizeof(fileid);
+	memcpy(bp, &pgno, sizeof(pgno));
+	bp += sizeof(pgno);
+	if (prevlsn != NULL)
+		memcpy(bp, prevlsn, sizeof(*prevlsn));
+	else
+		memset(bp, 0, sizeof(*prevlsn));
+	bp += sizeof(*prevlsn);
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -1329,22 +1351,23 @@ int __db_noop_log(logp, txnid, ret_lsnp, flags)
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__db_noop_print(notused1, dbtp, lsnp, notused3, notused4)
+__db_noop_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__db_noop_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __db_noop_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -1355,6 +1378,10 @@ __db_noop_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->txnid->txnid,
 	    (u_long)argp->prev_lsn.file,
 	    (u_long)argp->prev_lsn.offset);
+	printf("\tfileid: %lu\n", (u_long)argp->fileid);
+	printf("\tpgno: %lu\n", (u_long)argp->pgno);
+	printf("\tprevlsn: [%lu][%lu]\n",
+	    (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset);
 	printf("\n");
 	__db_free(argp);
 	return (0);
@@ -1383,6 +1410,12 @@ __db_noop_read(recbuf, argpp)
 	bp += sizeof(argp->txnid->txnid);
 	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
 	bp += sizeof(DB_LSN);
+	memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+	bp += sizeof(argp->fileid);
+	memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+	bp += sizeof(argp->pgno);
+	memcpy(&argp->prevlsn, bp,  sizeof(argp->prevlsn));
+	bp += sizeof(argp->prevlsn);
 	*argpp = argp;
 	return (0);
 }
diff --git a/db2/db/db_conv.c b/db2/db/db_conv.c
index e9c4bf90bd..8b5cf5f4a7 100644
--- a/db2/db/db_conv.c
+++ b/db2/db/db_conv.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -44,7 +44,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_conv.c	10.8 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)db_conv.c	10.13 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -160,6 +160,13 @@ __db_convert(pg, pp, pagesize, pgin)
 			}
 
 		}
+
+		/*
+		 * The offsets in the inp array are used to determine
+		 * the size of entries on a page; therefore they
+		 * cannot be converted until we've done all the
+		 * entries.
+		 */
 		if (!pgin)
 			for (i = 0; i < NUM_ENT(h); i++)
 				M_16_SWAP(h->inp[i]);
@@ -179,8 +186,8 @@ __db_convert(pg, pp, pagesize, pgin)
 			case B_DUPLICATE:
 			case B_OVERFLOW:
 				bo = (BOVERFLOW *)bk;
-				M_32_SWAP(bo->tlen);
 				M_32_SWAP(bo->pgno);
+				M_32_SWAP(bo->tlen);
 				break;
 			}
 
@@ -194,17 +201,18 @@ __db_convert(pg, pp, pagesize, pgin)
 				M_16_SWAP(h->inp[i]);
 
 			bi = GET_BINTERNAL(h, i);
+			M_16_SWAP(bi->len);
+			M_32_SWAP(bi->pgno);
+			M_32_SWAP(bi->nrecs);
+
 			switch (B_TYPE(bi->type)) {
 			case B_KEYDATA:
-				M_16_SWAP(bi->len);
-				M_32_SWAP(bi->pgno);
-				M_32_SWAP(bi->nrecs);
 				break;
 			case B_DUPLICATE:
 			case B_OVERFLOW:
-				bo = (BOVERFLOW *)bi;
-				M_32_SWAP(bo->tlen);
+				bo = (BOVERFLOW *)bi->data;
 				M_32_SWAP(bo->pgno);
+				M_32_SWAP(bo->tlen);
 				break;
 			}
 
@@ -224,6 +232,7 @@ __db_convert(pg, pp, pagesize, pgin)
 			if (!pgin)
 				M_16_SWAP(h->inp[i]);
 		}
+		break;
 	case P_OVERFLOW:
 	case P_INVALID:
 		/* Nothing to do. */
diff --git a/db2/db/db_dispatch.c b/db2/db/db_dispatch.c
index 736575adfc..8645948614 100644
--- a/db2/db/db_dispatch.c
+++ b/db2/db/db_dispatch.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -43,14 +43,13 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_dispatch.c	10.9 (Sleepycat) 1/17/98";
+static const char sccsid[] = "@(#)db_dispatch.c	10.14 (Sleepycat) 5/3/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <fcntl.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
@@ -62,6 +61,7 @@ static const char sccsid[] = "@(#)db_dispatch.c	10.9 (Sleepycat) 1/17/98";
 #include "db_am.h"
 #include "common_ext.h"
 #include "log_auto.h"
+#include "txn_auto.h"
 
 /*
  * Data structures to manage the DB dispatch table.  The dispatch table
@@ -114,7 +114,7 @@ __db_dispatch(logp, db, lsnp, redo, info)
 		 * seen it, then we call the appropriate recovery routine
 		 * in "abort mode".
 		 */
-		if (rectype == DB_log_register ||
+		if (rectype == DB_log_register || rectype == DB_txn_ckp ||
 		    __db_txnlist_find(info, txnid) == DB_NOTFOUND)
 			return ((dispatch_table[rectype])(logp,
 			    db, lsnp, TXN_UNDO, info));
@@ -124,7 +124,7 @@ __db_dispatch(logp, db, lsnp, redo, info)
 		 * In the forward pass, if we haven't seen the transaction,
 		 * do nothing, else recovery it.
 		 */
-		if (rectype == DB_log_register ||
+		if (rectype == DB_log_register || rectype == DB_txn_ckp ||
 		    __db_txnlist_find(info, txnid) != DB_NOTFOUND)
 			return ((dispatch_table[rectype])(logp,
 			    db, lsnp, TXN_REDO, info));
@@ -188,14 +188,14 @@ int
 __db_txnlist_init(retp)
 	void *retp;
 {
-	__db_txnhead *headp;
+	DB_TXNHEAD *headp;
 
-	if ((headp = (struct __db_txnhead *)
-	    __db_malloc(sizeof(struct __db_txnhead))) == NULL)
+	if ((headp = (DB_TXNHEAD *)__db_malloc(sizeof(DB_TXNHEAD))) == NULL)
 		return (ENOMEM);
 
 	LIST_INIT(&headp->head);
 	headp->maxid = 0;
+	headp->generation = 1;
 
 	*(void **)retp = headp;
 	return (0);
@@ -212,25 +212,26 @@ __db_txnlist_add(listp, txnid)
 	void *listp;
 	u_int32_t txnid;
 {
-	__db_txnhead *hp;
-	__db_txnlist *elp;
+	DB_TXNHEAD *hp;
+	DB_TXNLIST *elp;
 
-	if ((elp = (__db_txnlist *)__db_malloc(sizeof(__db_txnlist))) == NULL)
+	if ((elp = (DB_TXNLIST *)__db_malloc(sizeof(DB_TXNLIST))) == NULL)
 		return (ENOMEM);
 
 	elp->txnid = txnid;
-	hp = (struct __db_txnhead *)listp;
+	hp = (DB_TXNHEAD *)listp;
 	LIST_INSERT_HEAD(&hp->head, elp, links);
 	if (txnid > hp->maxid)
 		hp->maxid = txnid;
+	elp->generation = hp->generation;
 
 	return (0);
 }
 
 /*
  * __db_txnlist_find --
- *	Checks to see if txnid is in the txnid list, returns 1 if found,
- *	0 if not found.
+ *	Checks to see if a txnid with the current generation is in the
+ *	txnid list.
  *
  * PUBLIC: int __db_txnlist_find __P((void *, u_int32_t));
  */
@@ -239,45 +240,19 @@ __db_txnlist_find(listp, txnid)
 	void *listp;
 	u_int32_t txnid;
 {
-	__db_txnhead *hp;
-	__db_txnlist *p;
+	DB_TXNHEAD *hp;
+	DB_TXNLIST *p;
 
-	if ((hp = (struct __db_txnhead *)listp) == NULL)
+	if ((hp = (DB_TXNHEAD *)listp) == NULL)
 		return (DB_NOTFOUND);
 
-	if (hp->maxid < txnid) {
-		hp->maxid = txnid;
-		return (DB_NOTFOUND);
-	}
-
 	for (p = hp->head.lh_first; p != NULL; p = p->links.le_next)
-		if (p->txnid == txnid)
+		if (p->txnid == txnid && hp->generation == p->generation)
 			return (0);
 
 	return (DB_NOTFOUND);
 }
 
-#ifdef DEBUG
-/*
- * __db_txnlist_print --
- *	Print out the transaction list.
- *
- * PUBLIC: void __db_txnlist_print __P((void *));
- */
-void
-__db_txnlist_print(listp)
-	void *listp;
-{
-	__db_txnhead *hp;
-	__db_txnlist *p;
-
-	hp = (struct __db_txnhead *)listp;
-	printf("Maxid: %lu\n", (u_long)hp->maxid);
-	for (p = hp->head.lh_first; p != NULL; p = p->links.le_next)
-		printf("TXNID: %lu\n", (u_long)p->txnid);
-}
-#endif
-
 /*
  * __db_txnlist_end --
  *	Discard transaction linked list.
@@ -288,13 +263,61 @@ void
 __db_txnlist_end(listp)
 	void *listp;
 {
-	__db_txnhead *hp;
-	__db_txnlist *p;
+	DB_TXNHEAD *hp;
+	DB_TXNLIST *p;
 
-	hp = (struct __db_txnhead *)listp;
+	hp = (DB_TXNHEAD *)listp;
 	while ((p = LIST_FIRST(&hp->head)) != LIST_END(&hp->head)) {
 		LIST_REMOVE(p, links);
 		__db_free(p);
 	}
 	__db_free(listp);
 }
+
+/*
+ * __db_txnlist_gen --
+ *	Change the current generation number.
+ *
+ * PUBLIC: void __db_txnlist_gen __P((void *, int));
+ */
+void
+__db_txnlist_gen(listp, incr)
+	void *listp;
+	int incr;
+{
+	DB_TXNHEAD *hp;
+
+	/*
+	 * During recovery generation numbers keep track of how many "restart"
+	 * checkpoints we've seen.  Restart checkpoints occur whenever we take
+	 * a checkpoint and there are no outstanding transactions.  When that
+	 * happens, we can reset transaction IDs back to 1.  It always happens
+	 * at recovery and it prevents us from exhausting the transaction IDs
+	 * name space.
+	 */
+	hp = (DB_TXNHEAD *)listp;
+	hp->generation += incr;
+}
+
+#ifdef DEBUG
+/*
+ * __db_txnlist_print --
+ *	Print out the transaction list.
+ *
+ * PUBLIC: void __db_txnlist_print __P((void *));
+ */
+void
+__db_txnlist_print(listp)
+	void *listp;
+{
+	DB_TXNHEAD *hp;
+	DB_TXNLIST *p;
+
+	hp = (DB_TXNHEAD *)listp;
+	printf("Maxid: %lu Generation: %lu\n", (u_long)hp->maxid,
+	    (u_long)hp->generation);
+	for (p = hp->head.lh_first; p != NULL; p = p->links.le_next)
+		printf("TXNID: %lu(%lu)\n", (u_long)p->txnid,
+		(u_long)p->generation);
+}
+#endif
diff --git a/db2/db/db_dup.c b/db2/db/db_dup.c
index 59dfb85b92..6379fc1729 100644
--- a/db2/db/db_dup.c
+++ b/db2/db/db_dup.c
@@ -1,35 +1,27 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_dup.c	10.11 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)db_dup.c	10.18 (Sleepycat) 5/31/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <fcntl.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
 #include "db_page.h"
-#include "db_swap.h"
 #include "btree.h"
 #include "db_am.h"
-#include "common_ext.h"
 
 static int __db_addpage __P((DB *,
     PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **)));
@@ -209,9 +201,8 @@ __db_dsplit(dbp, hp, indxp, size, newfunc)
 	PAGE *h, *np, *tp;
 	BKEYDATA *bk;
 	DBT page_dbt;
-	db_indx_t indx, nindex, oindex, sum;
-	db_indx_t halfbytes, i, lastsum;
-	int did_indx, ret, s;
+	db_indx_t halfbytes, i, indx, lastsum, nindex, oindex, s, sum;
+	int did_indx, ret;
 
 	h = *hp;
 	indx = *indxp;
@@ -219,7 +210,7 @@ __db_dsplit(dbp, hp, indxp, size, newfunc)
 	/* Create a temporary page to do compaction onto. */
 	if ((tp = (PAGE *)__db_malloc(dbp->pgsize)) == NULL)
 		return (ENOMEM);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	memset(tp, 0xff, dbp->pgsize);
 #endif
 	/* Create new page for the split. */
@@ -239,6 +230,7 @@ __db_dsplit(dbp, hp, indxp, size, newfunc)
 	for (sum = 0, lastsum = 0, i = 0; i < NUM_ENT(h); i++) {
 		if (i == indx) {
 			sum += size;
+			did_indx = 1;
 			if (lastsum < halfbytes && sum >= halfbytes) {
 				/* We've crossed the halfway point. */
 				if ((db_indx_t)(halfbytes - lastsum) <
@@ -252,7 +244,6 @@ __db_dsplit(dbp, hp, indxp, size, newfunc)
 			}
 			*indxp = i;
 			lastsum = sum;
-			did_indx = 1;
 		}
 		if (B_TYPE(GET_BKEYDATA(h, i)->type) == B_KEYDATA)
 			sum += BKEYDATA_SIZE(GET_BKEYDATA(h, i)->len);
diff --git a/db2/db/db_overflow.c b/db2/db/db_overflow.c
index 8c6619f228..d28740dcbe 100644
--- a/db2/db/db_overflow.c
+++ b/db2/db/db_overflow.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,22 +47,19 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_overflow.c	10.7 (Sleepycat) 11/2/97";
+static const char sccsid[] = "@(#)db_overflow.c	10.11 (Sleepycat) 5/7/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
 #include "db_int.h"
 #include "db_page.h"
 #include "db_am.h"
-#include "common_ext.h"
 
 /*
  * Big key/data code.
@@ -91,9 +88,9 @@ __db_goff(dbp, dbt, tlen, pgno, bpp, bpsz)
 {
 	PAGE *h;
 	db_indx_t bytes;
-	int ret;
 	u_int32_t curoff, needed, start;
 	u_int8_t *p, *src;
+	int ret;
 
 	/*
 	 * Check if the buffer is big enough; if it is not and we are
@@ -259,13 +256,13 @@ __db_poff(dbp, dbt, pgnop, newfunc)
  * __db_ovref --
  *	Increment/decrement the reference count on an overflow page.
  *
- * PUBLIC: int __db_ovref __P((DB *, db_pgno_t, int));
+ * PUBLIC: int __db_ovref __P((DB *, db_pgno_t, int32_t));
  */
 int
 __db_ovref(dbp, pgno, adjust)
 	DB *dbp;
 	db_pgno_t pgno;
-	int adjust;
+	int32_t adjust;
 {
 	PAGE *h;
 	int ret;
@@ -277,7 +274,7 @@ __db_ovref(dbp, pgno, adjust)
 
 	if (DB_LOGGING(dbp))
 		if ((ret = __db_ovref_log(dbp->dbenv->lg_info, dbp->txn,
-		    &LSN(h), 0, dbp->log_fileid, h->pgno, (int32_t)adjust,
+		    &LSN(h), 0, dbp->log_fileid, h->pgno, adjust,
 		    &LSN(h))) != 0)
 			return (ret);
 	OV_REF(h) += adjust;
@@ -353,8 +350,8 @@ __db_moff(dbp, dbt, pgno)
 {
 	PAGE *pagep;
 	u_int32_t cmp_bytes, key_left;
-	int ret;
 	u_int8_t *p1, *p2;
+	int ret;
 
 	/* While there are both keys to compare. */
 	for (ret = 0, p1 = dbt->data,
diff --git a/db2/db/db_pr.c b/db2/db/db_pr.c
index 1135a9e738..a294cdd135 100644
--- a/db2/db/db_pr.c
+++ b/db2/db/db_pr.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_pr.c	10.20 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)db_pr.c	10.29 (Sleepycat) 5/23/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -16,7 +16,6 @@ static const char sccsid[] = "@(#)db_pr.c	10.20 (Sleepycat) 1/8/98";
 
 #include <ctype.h>
 #include <errno.h>
-#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
@@ -160,7 +159,7 @@ __db_prdb(dbp)
 	}
 
 	fprintf(fp, "%s ", t);
-	__db_prflags(dbp->flags, fn);
+	__db_prflags(dbp->flags, fn, fp);
 	fprintf(fp, "\n");
 
 	return (0);
@@ -179,12 +178,16 @@ __db_prbtree(dbp)
 	static const FN mfn[] = {
 		{ BTM_DUP,	"duplicates" },
 		{ BTM_RECNO,	"recno" },
+		{ BTM_RECNUM,	"btree:records" },
+		{ BTM_FIXEDLEN,	"recno:fixed-length" },
+		{ BTM_RENUMBER,	"recno:renumber" },
 		{ 0 },
 	};
 	BTMETA *mp;
 	BTREE *t;
 	EPG *epg;
 	FILE *fp;
+	PAGE *h;
 	RECNO *rp;
 	db_pgno_t i;
 	int ret;
@@ -193,19 +196,29 @@ __db_prbtree(dbp)
 	fp = __db_prinit(NULL);
 
 	(void)fprintf(fp, "%s\nOn-page metadata:\n", DB_LINE);
-	i = PGNO_METADATA;
 
+	i = PGNO_METADATA;
 	if ((ret = __bam_pget(dbp, (PAGE **)&mp, &i, 0)) != 0)
 		return (ret);
 
 	(void)fprintf(fp, "magic %#lx\n", (u_long)mp->magic);
-	(void)fprintf(fp, "version %lu\n", (u_long)mp->version);
+	(void)fprintf(fp, "version %#lx\n", (u_long)mp->version);
 	(void)fprintf(fp, "pagesize %lu\n", (u_long)mp->pagesize);
 	(void)fprintf(fp, "maxkey: %lu minkey: %lu\n",
 	    (u_long)mp->maxkey, (u_long)mp->minkey);
-	(void)fprintf(fp, "free %lu\n", (u_long)mp->free);
-	(void)fprintf(fp, "flags %lu", (u_long)mp->flags);
-	__db_prflags(mp->flags, mfn);
+
+	(void)fprintf(fp, "free %lu", (u_long)mp->free);
+	for (i = mp->free; i != PGNO_INVALID;) {
+		if ((ret = __bam_pget(dbp, &h, &i, 0)) != 0)
+			return (ret);
+		i = h->next_pgno;
+		(void)memp_fput(dbp->mpf, h, 0);
+		(void)fprintf(fp, ", %lu", (u_long)i);
+	}
+	(void)fprintf(fp, "\n");
+
+	(void)fprintf(fp, "flags %#lx", (u_long)mp->flags);
+	__db_prflags(mp->flags, mfn, fp);
 	(void)fprintf(fp, "\n");
 	(void)memp_fput(dbp->mpf, mp, 0);
 
@@ -576,7 +589,7 @@ __db_isbad(h, die)
 	BKEYDATA *bk;
 	FILE *fp;
 	db_indx_t i;
-	int type;
+	u_int type;
 
 	fp = __db_prinit(NULL);
 
@@ -668,7 +681,8 @@ __db_pr(p, len)
 	u_int32_t len;
 {
 	FILE *fp;
-	int i, lastch;
+	u_int lastch;
+	int i;
 
 	fp = __db_prinit(NULL);
 
@@ -681,7 +695,7 @@ __db_pr(p, len)
 			if (isprint(*p) || *p == '\n')
 				fprintf(fp, "%c", *p);
 			else
-				fprintf(fp, "%#x", (u_int)*p);
+				fprintf(fp, "0x%.2x", (u_int)*p);
 		}
 		if (len > 20) {
 			fprintf(fp, "...");
@@ -693,6 +707,50 @@ __db_pr(p, len)
 }
 
 /*
+ * __db_prdbt --
+ *	Print out a DBT data element.
+ *
+ * PUBLIC: int __db_prdbt __P((DBT *, int, FILE *));
+ */
+int
+__db_prdbt(dbtp, checkprint, fp)
+	DBT *dbtp;
+	int checkprint;
+	FILE *fp;
+{
+	static const char hex[] = "0123456789abcdef";
+	u_int8_t *p;
+	u_int32_t len;
+
+	/*
+	 * !!!
+	 * This routine is the routine that dumps out items in the format
+	 * used by db_dump(1) and db_load(1).  This means that the format
+	 * cannot change.
+	 */
+	if (checkprint) {
+		for (len = dbtp->size, p = dbtp->data; len--; ++p)
+			if (isprint(*p)) {
+				if (*p == '\\' && fprintf(fp, "\\") != 1)
+					return (EIO);
+				if (fprintf(fp, "%c", *p) != 1)
+					return (EIO);
+			} else
+				if (fprintf(fp, "\\%c%c",
+				    hex[(u_int8_t)(*p & 0xf0) >> 4],
+				    hex[*p & 0x0f]) != 3)
+					return (EIO);
+	} else
+		for (len = dbtp->size, p = dbtp->data; len--; ++p)
+			if (fprintf(fp, "%c%c",
+			    hex[(u_int8_t)(*p & 0xf0) >> 4],
+			    hex[*p & 0x0f]) != 2)
+				return (EIO);
+
+	return (fprintf(fp, "\n") == 1 ? 0 : EIO);
+}
+
+/*
  * __db_proff --
  *	Print out an off-page element.
  */
@@ -721,23 +779,21 @@ __db_proff(vp)
  * __db_prflags --
  *	Print out flags values.
  *
- * PUBLIC: void __db_prflags __P((u_int32_t, const FN *));
+ * PUBLIC: void __db_prflags __P((u_int32_t, const FN *, FILE *));
  */
 void
-__db_prflags(flags, fn)
+__db_prflags(flags, fn, fp)
 	u_int32_t flags;
 	FN const *fn;
-{
 	FILE *fp;
+{
 	const FN *fnp;
 	int found;
 	const char *sep;
 
-	fp = __db_prinit(NULL);
-
 	sep = " (";
 	for (found = 0, fnp = fn; fnp->mask != 0; ++fnp)
-		if (fnp->mask & flags) {
+		if (LF_ISSET(fnp->mask)) {
 			fprintf(fp, "%s%s", sep, fnp->name);
 			sep = ", ";
 			found = 1;
diff --git a/db2/db/db_rec.c b/db2/db/db_rec.c
index 48e09e6f23..fe7c807384 100644
--- a/db2/db/db_rec.c
+++ b/db2/db/db_rec.c
@@ -1,30 +1,25 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_rec.c	10.12 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)db_rec.c	10.16 (Sleepycat) 4/28/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
-#endif
-#include <ctype.h>
-#include <errno.h>
-#include <stddef.h>
-#include <stdlib.h>
 #include <string.h>
+#endif
 
 #include "db_int.h"
 #include "shqueue.h"
 #include "db_page.h"
-#include "db_dispatch.h"
 #include "log.h"
 #include "hash.h"
 #include "btree.h"
@@ -48,7 +43,8 @@ __db_addrem_recover(logp, dbtp, lsnp, redo, info)
 	DB *file_dbp, *mdbp;
 	DB_MPOOLFILE *mpf;
 	PAGE *pagep;
-	int change, cmp_n, cmp_p, ret;
+	u_int32_t change;
+	int cmp_n, cmp_p, ret;
 
 	REC_PRINT(__db_addrem_print);
 	REC_INTRO(__db_addrem_read);
@@ -193,7 +189,8 @@ __db_big_recover(logp, dbtp, lsnp, redo, info)
 	DB *file_dbp, *mdbp;
 	DB_MPOOLFILE *mpf;
 	PAGE *pagep;
-	int change, cmp_n, cmp_p, ret;
+	u_int32_t change;
+	int cmp_n, cmp_p, ret;
 
 	REC_PRINT(__db_big_print);
 	REC_INTRO(__db_big_read);
@@ -503,7 +500,8 @@ __db_addpage_recover(logp, dbtp, lsnp, redo, info)
 	DB *file_dbp, *mdbp;
 	DB_MPOOLFILE *mpf;
 	PAGE *pagep;
-	int change, cmp_n, cmp_p, ret;
+	u_int32_t change;
+	int cmp_n, cmp_p, ret;
 
 	REC_PRINT(__db_addpage_print);
 	REC_INTRO(__db_addpage_read);
@@ -601,8 +599,7 @@ __db_debug_recover(logp, dbtp, lsnp, redo, info)
  * __db_noop_recover --
  *	Recovery function for noop.
  *
- * PUBLIC: int __db_noop_recover
- * PUBLIC:   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ * PUBLIC: int __db_noop_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
 __db_noop_recover(logp, dbtp, lsnp, redo, info)
@@ -613,16 +610,30 @@ __db_noop_recover(logp, dbtp, lsnp, redo, info)
 	void *info;
 {
 	__db_noop_args *argp;
-	int ret;
-
-	COMPQUIET(redo, 0);
-	COMPQUIET(logp, NULL);
+	DB *file_dbp, *mdbp;
+	DB_MPOOLFILE *mpf;
+	PAGE *pagep;
+	u_int32_t change;
+	int cmp_n, cmp_p, ret;
 
 	REC_PRINT(__db_noop_print);
-	REC_NOOP_INTRO(__db_noop_read);
+	REC_INTRO(__db_noop_read);
+
+	if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0)
+		goto out;
 
+	cmp_n = log_compare(lsnp, &LSN(pagep));
+	cmp_p = log_compare(&LSN(pagep), &argp->prevlsn);
+	change = 0;
+	if (cmp_p == 0 && redo) {
+		LSN(pagep) = *lsnp;
+		change = DB_MPOOL_DIRTY;
+	} else if (cmp_n == 0 && !redo) {
+		LSN(pagep) = argp->prevlsn;
+		change = DB_MPOOL_DIRTY;
+	}
 	*lsnp = argp->prev_lsn;
-	ret = 0;
+	ret = memp_fput(mpf, pagep, change);
 
-	REC_NOOP_CLOSE;
+out:	REC_CLOSE;
 }
diff --git a/db2/db/db_ret.c b/db2/db/db_ret.c
index 65441aa45a..9d9b599ad6 100644
--- a/db2/db/db_ret.c
+++ b/db2/db/db_ret.c
@@ -1,29 +1,26 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_ret.c	10.10 (Sleepycat) 11/28/97";
+static const char sccsid[] = "@(#)db_ret.c	10.13 (Sleepycat) 5/7/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
 #include "db_int.h"
 #include "db_page.h"
 #include "btree.h"
-#include "hash.h"
 #include "db_am.h"
 
 /*
diff --git a/db2/db/db_thread.c b/db2/db/db_thread.c
index d9086918dd..73e2a51286 100644
--- a/db2/db/db_thread.c
+++ b/db2/db/db_thread.c
@@ -1,27 +1,25 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_thread.c	8.13 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)db_thread.c	8.15 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
 #include "db_int.h"
 #include "db_page.h"
-#include "shqueue.h"
 #include "db_am.h"
 
 static int __db_getlockid __P((DB *, DB *));
diff --git a/db2/db185/db185.c b/db2/db185/db185.c
index 7f6a16de49..893dfa3c7f 100644
--- a/db2/db185/db185.c
+++ b/db2/db185/db185.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
@@ -9,9 +9,9 @@
 
 #ifndef lint
 static const char copyright[] =
-"@(#) Copyright (c) 1997\n\
+"@(#) Copyright (c) 1996, 1997, 1998\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db185.c	8.14 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)db185.c	8.17 (Sleepycat) 5/7/98";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -20,7 +20,6 @@ static const char sccsid[] = "@(#)db185.c	8.14 (Sleepycat) 10/25/97";
 #include <errno.h>
 #include <fcntl.h>
 #include <stdlib.h>
-#include <stdio.h>
 #include <string.h>
 #include <unistd.h>
 #endif
@@ -114,6 +113,16 @@ __dbopen(file, oflags, mode, type, openinfo)
 		 * and DB 2.0 doesn't.
 		 *
 		 * !!!
+		 * Setting the file name to NULL specifies that we're creating
+		 * a temporary backing file, in DB 2.X.  If we're opening the
+		 * DB file read-only, change the flags to read-write, because
+		 * temporary backing files cannot be opened read-only, and DB
+		 * 2.X will return an error.  We are cheating here -- if the
+		 * application does a put on the database, it will succeed --
+		 * although that would be a stupid thing for the application
+		 * to do.
+		 *
+		 * !!!
 		 * Note, the file name in DB 1.85 was a const -- we don't do
 		 * that in DB 2.0, so do that cast.
 		 */
@@ -122,6 +131,10 @@ __dbopen(file, oflags, mode, type, openinfo)
 				(void)__os_close(__os_open(file, oflags, mode));
 			dbinfop->re_source = (char *)file;
 			file = NULL;
+
+			if (O_RDONLY)
+				oflags &= ~O_RDONLY;
+			oflags |= O_RDWR;
 		}
 
 		if ((ri = openinfo) != NULL) {
@@ -181,15 +194,14 @@ __dbopen(file, oflags, mode, type, openinfo)
 	 * Store the returned pointer to the real DB 2.0 structure in the
 	 * internal pointer.  Ugly, but we're not going for pretty, here.
 	 */
-	if ((__set_errno(db_open(file,
-	    type, __db_oflags(oflags), mode, NULL, dbinfop, &dbp))) != 0) {
+	if ((errno = db_open(file,
+	    type, __db_oflags(oflags), mode, NULL, dbinfop, &dbp)) != 0) {
 		__db_free(db185p);
 		return (NULL);
 	}
 
 	/* Create the cursor used for sequential ops. */
-	if ((__set_errno(dbp->cursor(dbp, NULL, &((DB185 *)db185p)->dbc)))
-	    != 0) {
+	if ((errno = dbp->cursor(dbp, NULL, &((DB185 *)db185p)->dbc)) != 0) {
 		s_errno = errno;
 		(void)dbp->close(dbp, 0);
 		__db_free(db185p);
diff --git a/db2/db185/db185_int.h b/db2/db185/db185_int.h
index f3e24b9026..f7d7af5347 100644
--- a/db2/db185/db185_int.h
+++ b/db2/db185/db185_int.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -40,7 +40,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)db185_int.h	8.4 (Sleepycat) 7/27/97
+ *	@(#)db185_int.h	8.7 (Sleepycat) 4/10/98
  */
 
 #ifndef _DB185_H_
@@ -90,11 +90,11 @@ typedef struct __db185 {
 /* Structure used to pass parameters to the btree routines. */
 typedef struct {
 #define	R_DUP		0x01	/* duplicate keys */
-	u_long	flags;
-	u_int	cachesize;	/* bytes to cache */
-	int	maxkeypage;	/* maximum keys per page */
-	int	minkeypage;	/* minimum keys per page */
-	u_int	psize;		/* page size */
+	u_int32_t flags;
+	u_int32_t cachesize;	/* bytes to cache */
+	u_int32_t maxkeypage;	/* maximum keys per page */
+	u_int32_t minkeypage;	/* minimum keys per page */
+	u_int32_t psize;	/* page size */
 	int	(*compare)	/* comparison function */
 	    __P((const DBT *, const DBT *));
 	size_t	(*prefix)	/* prefix function */
@@ -104,10 +104,10 @@ typedef struct {
 
 /* Structure used to pass parameters to the hashing routines. */
 typedef struct {
-	u_int	bsize;		/* bucket size */
-	u_int	ffactor;	/* fill factor */
-	u_int	nelem;		/* number of elements */
-	u_int	cachesize;	/* bytes to cache */
+	u_int32_t bsize;	/* bucket size */
+	u_int32_t ffactor;	/* fill factor */
+	u_int32_t nelem;	/* number of elements */
+	u_int32_t cachesize;	/* bytes to cache */
 	u_int32_t		/* hash function */
 		(*hash) __P((const void *, size_t));
 	int	lorder;		/* byte order */
@@ -118,9 +118,9 @@ typedef struct {
 #define	R_FIXEDLEN	0x01	/* fixed-length records */
 #define	R_NOKEY		0x02	/* key not required */
 #define	R_SNAPSHOT	0x04	/* snapshot the input */
-	u_long	flags;
-	u_int	cachesize;	/* bytes to cache */
-	u_int	psize;		/* page size */
+	u_int32_t flags;
+	u_int32_t cachesize;	/* bytes to cache */
+	u_int32_t psize;	/* page size */
 	int	lorder;		/* byte order */
 	size_t	reclen;		/* record length (fixed-length records) */
 	u_char	bval;		/* delimiting byte (variable-length records */
diff --git a/db2/db_185.h b/db2/db_185.h
index f3b02c71e9..0e1b87879b 100644
--- a/db2/db_185.h
+++ b/db2/db_185.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -36,7 +36,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)db_185.h.src	8.5 (Sleepycat) 1/15/98
+ *	@(#)db_185.h.src	8.7 (Sleepycat) 4/10/98
  */
 
 #ifndef _DB_185_H_
@@ -127,11 +127,11 @@ typedef struct __db {
 /* Structure used to pass parameters to the btree routines. */
 typedef struct {
 #define	R_DUP		0x01	/* duplicate keys */
-	u_long	flags;
-	u_int	cachesize;	/* bytes to cache */
-	int	maxkeypage;	/* maximum keys per page */
-	int	minkeypage;	/* minimum keys per page */
-	u_int	psize;		/* page size */
+	u_int32_t flags;
+	u_int32_t cachesize;	/* bytes to cache */
+	u_int32_t maxkeypage;	/* maximum keys per page */
+	u_int32_t minkeypage;	/* minimum keys per page */
+	u_int32_t psize;	/* page size */
 	int	(*compare)	/* comparison function */
 	    __P((const DBT *, const DBT *));
 	size_t	(*prefix)	/* prefix function */
@@ -144,10 +144,10 @@ typedef struct {
 
 /* Structure used to pass parameters to the hashing routines. */
 typedef struct {
-	u_int	bsize;		/* bucket size */
-	u_int	ffactor;	/* fill factor */
-	u_int	nelem;		/* number of elements */
-	u_int	cachesize;	/* bytes to cache */
+	u_int32_t bsize;	/* bucket size */
+	u_int32_t ffactor;	/* fill factor */
+	u_int32_t nelem;	/* number of elements */
+	u_int32_t cachesize;	/* bytes to cache */
 	u_int32_t		/* hash function */
 		(*hash) __P((const void *, size_t));
 	int	lorder;		/* byte order */
@@ -158,9 +158,9 @@ typedef struct {
 #define	R_FIXEDLEN	0x01	/* fixed-length records */
 #define	R_NOKEY		0x02	/* key not required */
 #define	R_SNAPSHOT	0x04	/* snapshot the input */
-	u_long	flags;
-	u_int	cachesize;	/* bytes to cache */
-	u_int	psize;		/* page size */
+	u_int32_t flags;
+	u_int32_t cachesize;	/* bytes to cache */
+	u_int32_t psize;	/* page size */
 	int	lorder;		/* byte order */
 	size_t	reclen;		/* record length (fixed-length records) */
 	u_char	bval;		/* delimiting byte (variable-length records */
diff --git a/db2/db_int.h b/db2/db_int.h
index eacd3f9806..b48b104a91 100644
--- a/db2/db_int.h
+++ b/db2/db_int.h
@@ -1,10 +1,10 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_int.h.src	10.41 (Sleepycat) 1/8/98
+ *	@(#)db_int.h.src	10.62 (Sleepycat) 5/23/98
  */
 
 #ifndef _DB_INTERNAL_H_
@@ -12,8 +12,6 @@
 
 #include "db.h"				/* Standard DB include file. */
 #include "queue.h"
-#include "os_func.h"
-#include "os_ext.h"
 
 /*******************************************************
  * General purpose constants and macros.
@@ -77,8 +75,8 @@
 #define	R_ADDR(base, offset)	((void *)((u_int8_t *)((base)->addr) + offset))
 #define	R_OFFSET(base, p)	((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
 
-/* Free and free-string macros that overwrite memory during debugging. */
-#ifdef DEBUG
+/* Free and free-string macros that overwrite memory. */
+#ifdef DIAGNOSTIC
 #undef	FREE
 #define	FREE(p, len) {							\
 	memset(p, 0xff, len);						\
@@ -117,36 +115,41 @@ typedef struct __fn {
 #undef	DB_LINE
 #define	DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
 
+/* Global variables. */
+typedef struct __db_globals {
+	int db_mutexlocks;		/* DB_MUTEXLOCKS */
+	int db_region_anon;		/* DB_REGION_ANON, DB_REGION_NAME */
+	int db_region_init;		/* DB_REGION_INIT */
+	int db_tsl_spins;		/* DB_TSL_SPINS */
+	int db_pageyield;		/* DB_PAGEYIELD */
+} DB_GLOBALS;
+extern	DB_GLOBALS	__db_global_values;
+#define	DB_GLOBAL(v)	__db_global_values.v
+
 /* Unused, or not-used-yet variable.  "Shut that bloody compiler up!" */
 #define	COMPQUIET(n, v)	(n) = (v)
 
+/*
+ * Win16 needs specific syntax on callback functions.  Nobody else cares.
+ */
+#ifndef	DB_CALLBACK
+#define	DB_CALLBACK	/* Nothing. */
+#endif
+
 /*******************************************************
  * Files.
  *******************************************************/
-#ifndef MAXPATHLEN		/* Maximum path length. */
-#ifdef PATH_MAX
-#define	MAXPATHLEN	PATH_MAX
-#else
+ /*
+  * We use 1024 as the maximum path length.  It's too hard to figure out what
+  * the real path length is, as it was traditionally stored in <sys/param.h>,
+  * and that file isn't always available.
+  */
+#undef	MAXPATHLEN
 #define	MAXPATHLEN	1024
-#endif
-#endif
 
 #define	PATH_DOT	"."	/* Current working directory. */
 #define	PATH_SEPARATOR	"/"	/* Path separator character. */
 
-#ifndef S_IRUSR			/* UNIX specific file permissions. */
-#define	S_IRUSR	0000400		/* R for owner */
-#define	S_IWUSR	0000200		/* W for owner */
-#define	S_IRGRP	0000040		/* R for group */
-#define	S_IWGRP	0000020		/* W for group */
-#define	S_IROTH	0000004		/* R for other */
-#define	S_IWOTH	0000002		/* W for other */
-#endif
-
-#ifndef S_ISDIR			/* UNIX specific: directory test. */
-#define	S_ISDIR(m)	((m & 0170000) == 0040000)
-#endif
-
 /*******************************************************
  * Mutex support.
  *******************************************************/
@@ -176,12 +179,12 @@ typedef unsigned char tsl_t;
 typedef struct _db_mutex_t {
 #ifdef HAVE_SPINLOCKS
 	tsl_t	  tsl_resource;		/* Resource test and set. */
-#ifdef DEBUG
-	u_long	  pid;			/* Lock holder: 0 or process pid. */
+#ifdef DIAGNOSTIC
+	u_int32_t pid;			/* Lock holder: 0 or process pid. */
 #endif
 #else
 	u_int32_t off;			/* Backing file offset. */
-	u_long	  pid;			/* Lock holder: 0 or process pid. */
+	u_int32_t pid;			/* Lock holder: 0 or process pid. */
 #endif
 	u_int32_t spins;		/* Spins before block. */
 	u_int32_t mutex_set_wait;	/* Granted after wait. */
@@ -195,11 +198,11 @@ typedef struct _db_mutex_t {
  *******************************************************/
 /* Lock/unlock a DB thread. */
 #define	DB_THREAD_LOCK(dbp)						\
-	(F_ISSET(dbp, DB_AM_THREAD) ?					\
-	    __db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1) : 0)
+	if (F_ISSET(dbp, DB_AM_THREAD))					\
+	    (void)__db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1);
 #define	DB_THREAD_UNLOCK(dbp)						\
-	(F_ISSET(dbp, DB_AM_THREAD) ?					\
-	    __db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1) : 0)
+	if (F_ISSET(dbp, DB_AM_THREAD))					\
+	    (void)__db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1);
 
 /* Btree/recno local statistics structure. */
 struct __db_bt_lstat;	typedef struct __db_bt_lstat DB_BTREE_LSTAT;
@@ -228,7 +231,7 @@ typedef enum {
 } APPNAME;
 
 /*******************************************************
- * Regions.
+ * Shared memory regions.
  *******************************************************/
 /*
  * The shared memory regions share an initial structure so that the general
@@ -240,16 +243,69 @@ typedef enum {
  */
 typedef struct _rlayout {
 	db_mutex_t lock;		/* Region mutex. */
+#define	DB_REGIONMAGIC	0x120897
+	u_int32_t  valid;		/* Valid magic number. */
 	u_int32_t  refcnt;		/* Region reference count. */
 	size_t	   size;		/* Region length. */
 	int	   majver;		/* Major version number. */
 	int	   minver;		/* Minor version number. */
 	int	   patch;		/* Patch version number. */
+#define	INVALID_SEGID	-1
+	int	   segid;		/* shmget(2) ID, or Win16 segment ID. */
 
-#define	DB_R_DELETED	0x01		/* Region was deleted. */
+#define	REGION_ANONYMOUS	0x01	/* Region is/should be in anon mem. */
 	u_int32_t  flags;
 } RLAYOUT;
 
+/*
+ * DB creates all regions on 4K boundaries out of sheer paranoia, so that
+ * we don't make the underlying VM unhappy.
+ */
+#define	DB_VMPAGESIZE	(4 * 1024)
+#define	DB_ROUNDOFF(i) {						\
+	(i) += DB_VMPAGESIZE - 1;					\
+	(i) -= (i) % DB_VMPAGESIZE;					\
+}
+
+/*
+ * The interface to region attach is nasty, there is a lot of complex stuff
+ * going on, which has to be retained between create/attach and detach.  The
+ * REGINFO structure keeps track of it.
+ */
+struct __db_reginfo;	typedef struct __db_reginfo REGINFO;
+struct __db_reginfo {
+					/* Arguments. */
+	DB_ENV	   *dbenv;		/* Region naming info. */
+	APPNAME	    appname;		/* Region naming info. */
+	char	   *path;		/* Region naming info. */
+	const char *file;		/* Region naming info. */
+	int	    mode;		/* Region mode, if a file. */
+	size_t	    size;		/* Region size. */
+	u_int32_t   dbflags;		/* Region file open flags, if a file. */
+
+					/* Results. */
+	char	   *name;		/* Region name. */
+	void	   *addr;		/* Region address. */
+	int	    fd;			/* Fcntl(2) locking file descriptor.
+					   NB: this is only valid if a regular
+					   file is backing the shared region,
+					   and mmap(2) is being used to map it
+					   into our address space. */
+	int	    segid;		/* shmget(2) ID, or Win16 segment ID. */
+
+					/* Shared flags. */
+/*				0x0001	COMMON MASK with RLAYOUT structure. */
+#define	REGION_CANGROW		0x0002	/* Can grow. */
+#define	REGION_CREATED		0x0004	/* Created. */
+#define	REGION_HOLDINGSYS	0x0008	/* Holding system resources. */
+#define	REGION_LASTDETACH	0x0010	/* Delete on last detach. */
+#define	REGION_MALLOC		0x0020	/* Created in malloc'd memory. */
+#define	REGION_PRIVATE		0x0040	/* Private to thread/process. */
+#define	REGION_REMOVED		0x0080	/* Already deleted. */
+#define	REGION_SIZEDEF		0x0100	/* Use default region size if exists. */
+	u_int32_t   flags;
+};
+
 /*******************************************************
  * Mpool.
  *******************************************************/
@@ -281,7 +337,7 @@ typedef struct __dbpginfo {
 #define	DB_LOGGING(dbp)							\
 	(F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER))
 
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 /*
  * Debugging macro to log operations.
  *	If DEBUG_WOP is defined, log operations that modify the database.
@@ -318,7 +374,7 @@ typedef struct __dbpginfo {
 #else
 #define	DEBUG_LREAD(D, T, O, K, A, F)
 #define	DEBUG_LWRITE(D, T, O, K, A, F)
-#endif /* DEBUG */
+#endif /* DIAGNOSTIC */
 
 /*******************************************************
  * Transactions and recovery.
@@ -339,4 +395,8 @@ struct __db_txn {
 	size_t		off;		/* Detail structure within region. */
 	TAILQ_ENTRY(__db_txn) links;
 };
+
+#include "os_func.h"
+#include "os_ext.h"
+
 #endif /* !_DB_INTERNAL_H_ */
diff --git a/db2/dbm/dbm.c b/db2/dbm/dbm.c
index 81f4bba69c..261fe81ff2 100644
--- a/db2/dbm/dbm.c
+++ b/db2/dbm/dbm.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,15 +47,14 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)dbm.c	10.10 (Sleepycat) 1/16/98";
+static const char sccsid[] = "@(#)dbm.c	10.16 (Sleepycat) 5/7/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
-#include <sys/param.h>
+#include <sys/types.h>
 
 #include <errno.h>
 #include <fcntl.h>
-#include <stdio.h>
 #include <string.h>
 #endif
 
@@ -82,7 +81,7 @@ __db_dbm_init(file)
 	if (__cur_db != NULL)
 		(void)dbm_close(__cur_db);
 	if ((__cur_db =
-	    dbm_open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR)) != NULL)
+	    dbm_open(file, O_CREAT | O_RDWR, __db_omode("rw----"))) != NULL)
 		return (0);
 	if ((__cur_db = dbm_open(file, O_RDONLY, 0)) != NULL)
 		return (0);
@@ -244,19 +243,19 @@ __db_ndbm_fetch(db, key)
 {
 	DBT _key, _data;
 	datum data;
-	int status;
+	int ret;
 
 	memset(&_key, 0, sizeof(DBT));
 	memset(&_data, 0, sizeof(DBT));
 	_key.size = key.dsize;
 	_key.data = key.dptr;
-	status = db->get((DB *)db, NULL, &_key, &_data, 0);
-	if (status) {
-		data.dptr = NULL;
-		data.dsize = 0;
-	} else {
+	if ((ret = db->get((DB *)db, NULL, &_key, &_data, 0)) == 0) {
 		data.dptr = _data.data;
 		data.dsize = _data.size;
+	} else {
+		data.dptr = NULL;
+		data.dsize = 0;
+		__set_errno (ret == DB_NOTFOUND ? ENOENT : ret);
 	}
 	return (data);
 }
@@ -273,7 +272,7 @@ __db_ndbm_firstkey(db)
 {
 	DBT _key, _data;
 	datum key;
-	int status;
+	int ret;
 
 	DBC *cp;
 
@@ -285,13 +284,13 @@ __db_ndbm_firstkey(db)
 
 	memset(&_key, 0, sizeof(DBT));
 	memset(&_data, 0, sizeof(DBT));
-	status = (cp->c_get)(cp, &_key, &_data, DB_FIRST);
-	if (status) {
-		key.dptr = NULL;
-		key.dsize = 0;
-	} else {
+	if ((ret = (cp->c_get)(cp, &_key, &_data, DB_FIRST)) == 0) {
 		key.dptr = _key.data;
 		key.dsize = _key.size;
+	} else {
+		key.dptr = NULL;
+		key.dsize = 0;
+		__set_errno (ret == DB_NOTFOUND ? ENOENT : ret);
 	}
 	return (key);
 }
@@ -309,7 +308,7 @@ __db_ndbm_nextkey(db)
 	DBC *cp;
 	DBT _key, _data;
 	datum key;
-	int status;
+	int ret;
 
 	if ((cp = TAILQ_FIRST(&db->curs_queue)) == NULL)
 		if ((errno = db->cursor(db, NULL, &cp)) != 0) {
@@ -319,13 +318,13 @@ __db_ndbm_nextkey(db)
 
 	memset(&_key, 0, sizeof(DBT));
 	memset(&_data, 0, sizeof(DBT));
-	status = (cp->c_get)(cp, &_key, &_data, DB_NEXT);
-	if (status) {
-		key.dptr = NULL;
-		key.dsize = 0;
-	} else {
+	if ((ret = (cp->c_get)(cp, &_key, &_data, DB_NEXT)) == 0) {
 		key.dptr = _key.data;
 		key.dsize = _key.size;
+	} else {
+		key.dptr = NULL;
+		key.dsize = 0;
+		__set_errno (ret == DB_NOTFOUND ? ENOENT : ret);
 	}
 	return (key);
 }
@@ -347,14 +346,10 @@ __db_ndbm_delete(db, key)
 	memset(&_key, 0, sizeof(DBT));
 	_key.data = key.dptr;
 	_key.size = key.dsize;
-	ret = (((DB *)db)->del)((DB *)db, NULL, &_key, 0);
-	if (ret < 0)
-		errno = ENOENT;
-	else if (ret > 0) {
-		errno = ret;
-		ret = -1;
-	}
-	return (ret);
+	if ((ret = (((DB *)db)->del)((DB *)db, NULL, &_key, 0)) == 0)
+		return (0);
+	errno = ret == DB_NOTFOUND ? ENOENT : ret;
+	return (-1);
 }
 weak_alias (__db_ndbm_delete, dbm_delete)
 
@@ -371,6 +366,7 @@ __db_ndbm_store(db, key, data, flags)
 	int flags;
 {
 	DBT _key, _data;
+	int ret;
 
 	memset(&_key, 0, sizeof(DBT));
 	memset(&_data, 0, sizeof(DBT));
@@ -378,8 +374,13 @@ __db_ndbm_store(db, key, data, flags)
 	_key.size = key.dsize;
 	_data.data = data.dptr;
 	_data.size = data.dsize;
-	return (db->put((DB *)db,
-	    NULL, &_key, &_data, (flags == DBM_INSERT) ? DB_NOOVERWRITE : 0));
+	if ((ret = db->put((DB *)db, NULL,
+	    &_key, &_data, flags == DBM_INSERT ? DB_NOOVERWRITE : 0)) == 0)
+		return (0);
+	if (ret == DB_KEYEXIST)
+		return (1);
+	errno = ret;
+	return (-1);
 }
 weak_alias (__db_ndbm_store, dbm_store)
 
diff --git a/db2/hash/hash.c b/db2/hash/hash.c
index 5193ece561..5e0660b727 100644
--- a/db2/hash/hash.c
+++ b/db2/hash/hash.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,23 +47,19 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)hash.c	10.36 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)hash.c	10.45 (Sleepycat) 5/11/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
-#include "shqueue.h"
 #include "db_int.h"
+#include "shqueue.h"
 #include "db_page.h"
 #include "db_am.h"
 #include "db_ext.h"
@@ -71,20 +67,20 @@ static const char sccsid[] = "@(#)hash.c	10.36 (Sleepycat) 1/8/98";
 #include "log.h"
 
 static int  __ham_c_close __P((DBC *));
-static int  __ham_c_del __P((DBC *, int));
-static int  __ham_c_get __P((DBC *, DBT *, DBT *, int));
-static int  __ham_c_put __P((DBC *, DBT *, DBT *, int));
+static int  __ham_c_del __P((DBC *, u_int32_t));
+static int  __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
+static int  __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
 static int  __ham_c_init __P((DB *, DB_TXN *, DBC **));
 static int  __ham_cursor __P((DB *, DB_TXN *, DBC **));
-static int  __ham_delete __P((DB *, DB_TXN *, DBT *, int));
-static int  __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, int));
-static int  __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
-static void __ham_init_htab __P((HTAB *, u_int));
+static int  __ham_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
+static int  __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
+static int  __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+static void __ham_init_htab __P((HTAB *, u_int32_t, u_int32_t));
 static int  __ham_lookup __P((HTAB *,
 		HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t));
 static int  __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *));
-static int  __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
-static int  __ham_sync __P((DB *, int));
+static int  __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+static int  __ham_sync __P((DB *, u_int32_t));
 
 /************************** INTERFACE ROUTINES ***************************/
 /* OPEN/CLOSE */
@@ -175,9 +171,9 @@ __ham_open(dbp, dbinfo)
 			goto out;
 		}
 
-		hashp->hdr->ffactor =
-		    dbinfo != NULL && dbinfo->h_ffactor ? dbinfo->h_ffactor : 0;
-		__ham_init_htab(hashp, dbinfo != NULL ? dbinfo->h_nelem : 0);
+		__ham_init_htab(hashp,
+		    dbinfo != NULL ? dbinfo->h_nelem : 0,
+		    dbinfo != NULL ? dbinfo->h_ffactor : 0);
 		if (F_ISSET(dbp, DB_AM_DUP))
 			F_SET(hashp->hdr, DB_HASH_DUP);
 		if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0)
@@ -230,7 +226,7 @@ out:	(void)__ham_close(dbp);
 }
 
 /*
- * PUBLIC: int  __ham_close __P((DB *));
+ * PUBLIC: int __ham_close __P((DB *));
  */
 int
 __ham_close(dbp)
@@ -264,13 +260,14 @@ __ham_close(dbp)
  * Returns 0 on No Error
  */
 static void
-__ham_init_htab(hashp, nelem)
+__ham_init_htab(hashp, nelem, ffactor)
 	HTAB *hashp;
-	u_int nelem;
+	u_int32_t nelem, ffactor;
 {
 	int32_t l2, nbuckets;
 
-	hashp->hdr->nelem = 0;
+	memset(hashp->hdr, 0, sizeof(HASHHDR));
+	hashp->hdr->ffactor = ffactor;
 	hashp->hdr->pagesize = hashp->dbp->pgsize;
 	ZERO_LSN(hashp->hdr->lsn);
 	hashp->hdr->magic = DB_HASHMAGIC;
@@ -287,8 +284,6 @@ __ham_init_htab(hashp, nelem)
 
 	nbuckets = 1 << l2;
 
-	hashp->hdr->spares[l2] = 0;
-	hashp->hdr->spares[l2 + 1] = 0;
 	hashp->hdr->ovfl_point = l2;
 	hashp->hdr->last_freed = PGNO_INVALID;
 
@@ -310,7 +305,7 @@ __ham_init_htab(hashp, nelem)
 static int
 __ham_sync(dbp, flags)
 	DB *dbp;
-	int flags;
+	u_int32_t flags;
 {
 	int ret;
 
@@ -342,10 +337,9 @@ __ham_get(dbp, txn, key, data, flags)
 	DB_TXN *txn;
 	DBT *key;
 	DBT *data;
-	int flags;
+	u_int32_t flags;
 {
 	DB *ldbp;
-	DBC *cp;
 	HTAB *hashp;
 	HASH_CURSOR *hcp;
 	int ret, t_ret;
@@ -362,7 +356,6 @@ __ham_get(dbp, txn, key, data, flags)
 	hashp = (HTAB *)ldbp->internal;
 	SET_LOCKER(ldbp, txn);
 	GET_META(ldbp, hashp);
-	cp = TAILQ_FIRST(&ldbp->curs_queue);
 
 	hashp->hash_accesses++;
 	hcp = (HASH_CURSOR *)TAILQ_FIRST(&ldbp->curs_queue)->internal;
@@ -386,14 +379,14 @@ __ham_put(dbp, txn, key, data, flags)
 	DB_TXN *txn;
 	DBT *key;
 	DBT *data;
-	int flags;
+	u_int32_t flags;
 {
 	DB *ldbp;
-	HTAB *hashp;
-	HASH_CURSOR *hcp;
 	DBT tmp_val, *myval;
-	int ret, t_ret;
+	HASH_CURSOR *hcp;
+	HTAB *hashp;
 	u_int32_t nbytes;
+	int ret, t_ret;
 
 	DEBUG_LWRITE(dbp, txn, "ham_put", key, data, flags);
 	if ((ret = __db_putchk(dbp, key, data,
@@ -531,7 +524,7 @@ __ham_delete(dbp, txn, key, flags)
 	DB *dbp;
 	DB_TXN *txn;
 	DBT *key;
-	int flags;
+	u_int32_t flags;
 {
 	DB *ldbp;
 	HTAB *hashp;
@@ -539,7 +532,8 @@ __ham_delete(dbp, txn, key, flags)
 	int ret, t_ret;
 
 	DEBUG_LWRITE(dbp, txn, "ham_delete", key, NULL, flags);
-	if ((ret = __db_delchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
+	if ((ret =
+	    __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
 		return (ret);
 
 	ldbp = dbp;
@@ -639,12 +633,12 @@ __ham_c_iclose(dbp, dbc)
 static int
 __ham_c_del(cursor, flags)
 	DBC *cursor;
-	int flags;
+	u_int32_t flags;
 {
 	DB *ldbp;
-	HTAB *hashp;
 	HASH_CURSOR *hcp;
 	HASH_CURSOR save_curs;
+	HTAB *hashp;
 	db_pgno_t ppgno, chg_pgno;
 	int ret, t_ret;
 
@@ -756,7 +750,7 @@ __ham_c_del(cursor, flags)
 normal:		ret = __ham_del_pair(hashp, hcp, 1);
 
 out:	if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
-		t_ret = ret;
+		ret = t_ret;
 	if (ret != 0)
 		*hcp = save_curs;
 	RELEASE_META(hashp->dbp, hashp);
@@ -770,7 +764,7 @@ __ham_c_get(cursor, key, data, flags)
 	DBC *cursor;
 	DBT *key;
 	DBT *data;
-	int flags;
+	u_int32_t flags;
 {
 	DB *ldbp;
 	HTAB *hashp;
@@ -805,7 +799,7 @@ __ham_c_get(cursor, key, data, flags)
 			ret = __ham_item_prev(hashp, hcp, DB_LOCK_READ);
 			break;
 		}
-		/* FALL THROUGH */
+		/* FALLTHROUGH */
 	case DB_LAST:
 		ret = __ham_item_last(hashp, hcp, DB_LOCK_READ);
 		break;
@@ -893,7 +887,7 @@ __ham_c_get(cursor, key, data, flags)
 		}
 	}
 out1:	if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
-		t_ret = ret;
+		ret = t_ret;
 out:	if (ret)
 		*hcp = save_curs;
 	RELEASE_META(hashp->dbp, hashp);
@@ -907,17 +901,17 @@ __ham_c_put(cursor, key, data, flags)
 	DBC *cursor;
 	DBT *key;
 	DBT *data;
-	int flags;
+	u_int32_t flags;
 {
 	DB *ldbp;
-	HTAB *hashp;
 	HASH_CURSOR *hcp, save_curs;
-	int ret, t_ret;
+	HTAB *hashp;
 	u_int32_t nbytes;
+	int ret, t_ret;
 
 	DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_put",
 	    flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
-	    NULL, flags);
+	    data, flags);
 	ldbp = cursor->dbp;
 	if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
 	    (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
@@ -1087,14 +1081,14 @@ __ham_dup_return(hashp, hcp, val, flags)
 	HTAB *hashp;
 	HASH_CURSOR *hcp;
 	DBT *val;
-	int flags;
+	u_int32_t flags;
 {
 	PAGE *pp;
 	DBT *myval, tmp_val;
 	db_indx_t ndx;
 	db_pgno_t pgno;
 	u_int8_t *hk, type;
-	int indx, ret;
+	int ret;
 	db_indx_t len;
 
 	/* Check for duplicate and return the first one. */
@@ -1145,7 +1139,6 @@ __ham_dup_return(hashp, hcp, val, flags)
 			memcpy(&pgno, HOFFDUP_PGNO(P_ENTRY(hcp->pagep, ndx)),
 			    sizeof(db_pgno_t));
 			if (flags == DB_LAST || flags == DB_PREV) {
-				indx = (int)hcp->dndx;
 				if ((ret = __db_dend(hashp->dbp,
 				    pgno, &hcp->dpagep)) != 0)
 					return (ret);
@@ -1451,14 +1444,15 @@ __ham_c_update(hcp, chg_pgno, len, add, is_dup)
  * __ham_hdup --
  *	This function gets called when we create a duplicate handle for a
  *	threaded DB.  It should create the private part of the DB structure.
+ *
  * PUBLIC: int  __ham_hdup __P((DB *, DB *));
  */
 int
 __ham_hdup(orig, new)
 	DB *orig, *new;
 {
-	HTAB *hashp;
 	DBC *curs;
+	HTAB *hashp;
 	int ret;
 
 	if ((hashp = (HTAB *)__db_malloc(sizeof(HTAB))) == NULL)
diff --git a/db2/hash/hash.src b/db2/hash/hash.src
index 8cbcee73f7..8a512830b8 100644
--- a/db2/hash/hash.src
+++ b/db2/hash/hash.src
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -43,11 +43,9 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)hash.src	10.2 (Sleepycat) 11/2/97
+ *	@(#)hash.src	10.3 (Sleepycat) 4/10/98
  */
 
-#include "config.h"
-
 /*
  * This is the source file used to create the logging functions for the
  * hash package.  Each access method (or set of routines wishing to register
diff --git a/db2/hash/hash_auto.c b/db2/hash/hash_auto.c
index 830ea46a4e..41b1ebed01 100644
--- a/db2/hash/hash_auto.c
+++ b/db2/hash/hash_auto.c
@@ -15,8 +15,6 @@
 #include "db_dispatch.h"
 #include "hash.h"
 #include "db_am.h"
-#include "common_ext.h"
-
 /*
  * PUBLIC: int __ham_insdel_log
  * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
@@ -104,7 +102,7 @@ int __ham_insdel_log(logp, txnid, ret_lsnp, flags,
 		memcpy(bp, data->data, data->size);
 		bp += data->size;
 	}
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -120,22 +118,23 @@ int __ham_insdel_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__ham_insdel_print(notused1, dbtp, lsnp, notused3, notused4)
+__ham_insdel_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__ham_insdel_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __ham_insdel_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -154,20 +153,20 @@ __ham_insdel_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
 	printf("\tkey: ");
 	for (i = 0; i < argp->key.size; i++) {
-		c = ((char *)argp->key.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->key.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tdata: ");
 	for (i = 0; i < argp->data.size; i++) {
-		c = ((char *)argp->data.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->data.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\n");
@@ -300,7 +299,7 @@ int __ham_newpage_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*nextlsn));
 	bp += sizeof(*nextlsn);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -316,22 +315,23 @@ int __ham_newpage_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__ham_newpage_print(notused1, dbtp, lsnp, notused3, notused4)
+__ham_newpage_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__ham_newpage_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __ham_newpage_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -462,7 +462,7 @@ int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*metalsn));
 	bp += sizeof(*metalsn);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -478,22 +478,23 @@ int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__ham_splitmeta_print(notused1, dbtp, lsnp, notused3, notused4)
+__ham_splitmeta_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__ham_splitmeta_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __ham_splitmeta_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -622,7 +623,7 @@ int __ham_splitdata_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*pagelsn));
 	bp += sizeof(*pagelsn);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -638,22 +639,23 @@ int __ham_splitdata_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__ham_splitdata_print(notused1, dbtp, lsnp, notused3, notused4)
+__ham_splitdata_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__ham_splitdata_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __ham_splitdata_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -669,11 +671,11 @@ __ham_splitdata_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\tpgno: %lu\n", (u_long)argp->pgno);
 	printf("\tpageimage: ");
 	for (i = 0; i < argp->pageimage.size; i++) {
-		c = ((char *)argp->pageimage.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->pageimage.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tpagelsn: [%lu][%lu]\n",
@@ -813,7 +815,7 @@ int __ham_replace_log(logp, txnid, ret_lsnp, flags,
 	}
 	memcpy(bp, &makedup, sizeof(makedup));
 	bp += sizeof(makedup);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -829,22 +831,23 @@ int __ham_replace_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__ham_replace_print(notused1, dbtp, lsnp, notused3, notused4)
+__ham_replace_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__ham_replace_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __ham_replace_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -863,20 +866,20 @@ __ham_replace_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\toff: %ld\n", (long)argp->off);
 	printf("\tolditem: ");
 	for (i = 0; i < argp->olditem.size; i++) {
-		c = ((char *)argp->olditem.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->olditem.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tnewitem: ");
 	for (i = 0; i < argp->newitem.size; i++) {
-		c = ((char *)argp->newitem.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->newitem.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tmakedup: %lu\n", (u_long)argp->makedup);
@@ -1014,7 +1017,7 @@ int __ham_newpgno_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*metalsn));
 	bp += sizeof(*metalsn);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -1030,22 +1033,23 @@ int __ham_newpgno_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__ham_newpgno_print(notused1, dbtp, lsnp, notused3, notused4)
+__ham_newpgno_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__ham_newpgno_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __ham_newpgno_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -1182,7 +1186,7 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*metalsn));
 	bp += sizeof(*metalsn);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -1198,22 +1202,23 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__ham_ovfl_print(notused1, dbtp, lsnp, notused3, notused4)
+__ham_ovfl_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__ham_ovfl_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __ham_ovfl_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -1364,7 +1369,7 @@ int __ham_copypage_log(logp, txnid, ret_lsnp, flags,
 		memcpy(bp, page->data, page->size);
 		bp += page->size;
 	}
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -1380,22 +1385,23 @@ int __ham_copypage_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__ham_copypage_print(notused1, dbtp, lsnp, notused3, notused4)
+__ham_copypage_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__ham_copypage_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __ham_copypage_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -1418,11 +1424,11 @@ __ham_copypage_print(notused1, dbtp, lsnp, notused3, notused4)
 	    (u_long)argp->nnextlsn.file, (u_long)argp->nnextlsn.offset);
 	printf("\tpage: ");
 	for (i = 0; i < argp->page.size; i++) {
-		c = ((char *)argp->page.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->page.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\n");
diff --git a/db2/hash/hash_conv.c b/db2/hash/hash_conv.c
index 9cebe72390..c6d0ba4d4e 100644
--- a/db2/hash/hash_conv.c
+++ b/db2/hash/hash_conv.c
@@ -1,13 +1,13 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)hash_conv.c	10.4 (Sleepycat) 9/15/97";
+static const char sccsid[] = "@(#)hash_conv.c	10.5 (Sleepycat) 4/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
diff --git a/db2/hash/hash_debug.c b/db2/hash/hash_debug.c
index 979ddd7b87..232906ae34 100644
--- a/db2/hash/hash_debug.c
+++ b/db2/hash/hash_debug.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -43,7 +43,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)hash_debug.c	10.2 (Sleepycat) 6/21/97";
+static const char sccsid[] = "@(#)hash_debug.c	10.6 (Sleepycat) 5/7/98";
 #endif /* not lint */
 
 #ifdef DEBUG
@@ -60,9 +60,6 @@ static const char sccsid[] = "@(#)hash_debug.c	10.2 (Sleepycat) 6/21/97";
  */
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-
-#include <stdio.h>
-#include <string.h>
 #endif
 
 #include "db_int.h"
@@ -83,10 +80,9 @@ __ham_dump_bucket(hashp, bucket)
 {
 	PAGE *p;
 	db_pgno_t pgno;
-	int ret;
 
 	for (pgno = BUCKET_TO_PAGE(hashp, bucket); pgno != PGNO_INVALID;) {
-		if ((ret = memp_fget(hashp->dbp->mpf, &pgno, 0, &p)) != 0)
+		if (memp_fget(hashp->dbp->mpf, &pgno, 0, &p) != 0)
 			break;
 		(void)__db_prpage(p, 1);
 		pgno = p->next_pgno;
diff --git a/db2/hash/hash_dup.c b/db2/hash/hash_dup.c
index f8b0adb933..ba248ddb17 100644
--- a/db2/hash/hash_dup.c
+++ b/db2/hash/hash_dup.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -42,7 +42,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)hash_dup.c	10.10 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)hash_dup.c	10.14 (Sleepycat) 5/7/98";
 #endif /* not lint */
 
 /*
@@ -61,15 +61,11 @@ static const char sccsid[] = "@(#)hash_dup.c	10.10 (Sleepycat) 1/8/98";
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
 #include "db_page.h"
-#include "db_swap.h"
 #include "hash.h"
 
 static int __ham_check_move __P((HTAB *, HASH_CURSOR *, int32_t));
@@ -89,14 +85,14 @@ static int __ham_make_dup __P((const DBT *, DBT *d, void **, u_int32_t *));
  * Case 4: The element is large enough to push the duplicate set onto a
  *	   separate page.
  *
- * PUBLIC: int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int));
+ * PUBLIC: int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
  */
 int
 __ham_add_dup(hashp, hcp, nval, flags)
 	HTAB *hashp;
 	HASH_CURSOR *hcp;
 	DBT *nval;
-	int flags;
+	u_int32_t flags;
 {
 	DBT pval, tmp_val;
 	u_int32_t del_len, new_size;
@@ -367,9 +363,9 @@ __ham_check_move(hashp, hcp, add_len)
 	DB_LSN new_lsn;
 	PAGE *next_pagep;
 	db_pgno_t next_pgno;
-	int rectype, ret;
-	u_int32_t new_datalen, old_len;
+	u_int32_t new_datalen, old_len, rectype;
 	u_int8_t *hk;
+	int ret;
 
 	/*
 	 * Check if we can do whatever we need to on this page.  If not,
@@ -419,7 +415,8 @@ __ham_check_move(hashp, hcp, add_len)
 		    (ret = __ham_put_page(hashp->dbp, next_pagep, 0)) != 0)
 			return (ret);
 
-		if ((ret = __ham_get_page(hashp->dbp, next_pgno, &next_pagep)) != 0)
+		if ((ret =
+		    __ham_get_page(hashp->dbp, next_pgno, &next_pagep)) != 0)
 			return (ret);
 
 		if (P_FREESPACE(next_pagep) >= new_datalen)
diff --git a/db2/hash/hash_func.c b/db2/hash/hash_func.c
index 1bf12c4948..9131098e5e 100644
--- a/db2/hash/hash_func.c
+++ b/db2/hash/hash_func.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)hash_func.c	10.7 (Sleepycat) 9/16/97";
+static const char sccsid[] = "@(#)hash_func.c	10.8 (Sleepycat) 4/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
diff --git a/db2/hash/hash_page.c b/db2/hash/hash_page.c
index 09a4a0c374..ce692f2e41 100644
--- a/db2/hash/hash_page.c
+++ b/db2/hash/hash_page.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)hash_page.c	10.31 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)hash_page.c	10.40 (Sleepycat) 6/2/98";
 #endif /* not lint */
 
 /*
@@ -70,15 +70,11 @@ static const char sccsid[] = "@(#)hash_page.c	10.31 (Sleepycat) 1/8/98";
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
 #include "db_page.h"
-#include "db_swap.h"
 #include "hash.h"
 
 static int __ham_lock_bucket __P((DB *, HASH_CURSOR *, db_lockmode_t));
@@ -266,6 +262,7 @@ __ham_item_last(hashp, cursorp, mode)
 	F_SET(cursorp, H_OK);
 	return (__ham_item_prev(hashp, cursorp, mode));
 }
+
 /*
  * PUBLIC: int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
  */
@@ -285,8 +282,10 @@ __ham_item_first(hashp, cursorp, mode)
 }
 
 /*
- * Returns a pointer to key/data pair on a page.  In the case of bigkeys,
- * just returns the page number and index of the bigkey pointer pair.
+ * __ham_item_prev --
+ *	Returns a pointer to key/data pair on a page.  In the case of
+ *	bigkeys, just returns the page number and index of the bigkey
+ *	pointer pair.
  *
  * PUBLIC: int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
  */
@@ -487,12 +486,61 @@ __ham_putitem(p, dbt, type)
 	NUM_ENT(p) += 1;
 }
 
+/*
+ * PUBLIC: void __ham_reputpair
+ * PUBLIC:    __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *));
+ *
+ * This is a special case to restore a key/data pair to its original
+ * location during recovery.  We are guaranteed that the pair fits
+ * on the page and is not the last pair on the page (because if it's
+ * the last pair, the normal insert works).
+ */
+void
+__ham_reputpair(p, psize, ndx, key, data)
+	PAGE *p;
+	u_int32_t psize, ndx;
+	const DBT *key, *data;
+{
+	db_indx_t i, movebytes, newbytes;
+	u_int8_t *from;
+
+	/* First shuffle the existing items up on the page.  */
+	movebytes =
+	    (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 1)]) - HOFFSET(p);
+	newbytes = key->size + data->size;
+	from = (u_int8_t *)p + HOFFSET(p);
+	memmove(from - newbytes, from, movebytes);
+
+	/*
+	 * Adjust the indices and move them up 2 spaces. Note that we
+	 * have to check the exit condition inside the loop just in case
+	 * we are dealing with index 0 (db_indx_t's are unsigned).
+	 */
+	for (i = NUM_ENT(p) - 1; ; i-- ) {
+		p->inp[i + 2] = p->inp[i] - newbytes;
+		if (i == H_KEYINDEX(ndx))
+			break;
+	}
+
+	/* Put the key and data on the page. */
+	p->inp[H_KEYINDEX(ndx)] =
+	    (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 1)]) - key->size;
+	p->inp[H_DATAINDEX(ndx)] = p->inp[H_KEYINDEX(ndx)] - data->size;
+	memcpy(P_ENTRY(p, H_KEYINDEX(ndx)), key->data, key->size);
+	memcpy(P_ENTRY(p, H_DATAINDEX(ndx)), data->data, data->size);
+
+	/* Adjust page info. */
+	HOFFSET(p) -= newbytes;
+	NUM_ENT(p) += 2;
+}
+
 
 /*
  * PUBLIC: int __ham_del_pair __P((HTAB *, HASH_CURSOR *, int));
- * XXX TODO: if the item is an offdup, delete the other pages and
- * then remove the pair. If the offpage page is 0, then you can
- * just remove the pair.
+ *
+ * XXX
+ * TODO: if the item is an offdup, delete the other pages and then remove
+ * the pair. If the offpage page is 0, then you can just remove the pair.
  */
 int
 __ham_del_pair(hashp, cursorp, reclaim_page)
@@ -648,8 +696,9 @@ __ham_del_pair(hashp, cursorp, reclaim_page)
 		/*
 		 * Cursor is advanced to the beginning of the next page.
 		 */
-		cursorp->bndx = NDX_INVALID;
+		cursorp->bndx = 0;
 		cursorp->pgno = PGNO(p);
+		F_SET(cursorp, H_DELETED);
 		chg_pgno = PGNO(p);
 		if ((ret = __ham_dirty_page(hashp, p)) != 0 ||
 		    (ret = __ham_del_page(hashp->dbp, n_pagep)) != 0)
@@ -748,8 +797,8 @@ __ham_replpair(hashp, hcp, dbt, make_dup)
 {
 	DBT old_dbt, tdata, tmp;
 	DB_LSN	new_lsn;
+	int32_t change;			/* XXX: Possible overflow. */
 	u_int32_t len;
-	int32_t change;
 	int is_big, ret, type;
 	u_int8_t *beg, *dest, *end, *hk, *src;
 
@@ -789,7 +838,7 @@ __ham_replpair(hashp, hcp, dbt, make_dup)
 		change += dbt->doff + dbt->dlen - len;
 
 
-	if (change > (int)P_FREESPACE(hcp->pagep) || is_big) {
+	if (change > (int32_t)P_FREESPACE(hcp->pagep) || is_big) {
 		/*
 		 * Case 3 -- two subcases.
 		 * A. This is not really a partial operation, but an overwrite.
@@ -954,7 +1003,7 @@ __ham_split_page(hashp, obucket, nbucket)
 	HTAB *hashp;
 	u_int32_t obucket, nbucket;
 {
-	DBT key, val, page_dbt;
+	DBT key, page_dbt;
 	DB_ENV *dbenv;
 	DB_LSN new_lsn;
 	PAGE **pp, *old_pagep, *temp_pagep, *new_pagep;
@@ -995,7 +1044,7 @@ __ham_split_page(hashp, obucket, nbucket)
 
 	big_len = 0;
 	big_buf = NULL;
-	val.flags = key.flags = 0;
+	key.flags = 0;
 	while (temp_pagep != NULL) {
 		for (n = 0; n < (db_indx_t)H_NUMPAIRS(temp_pagep); n++) {
 			if ((ret =
@@ -1103,8 +1152,8 @@ __ham_split_page(hashp, obucket, nbucket)
 	    ret == 0)
 		ret = tret;
 
-err:	if (0) {
-		if (old_pagep != NULL)
+	if (0) {
+err:		if (old_pagep != NULL)
 			(void)__ham_put_page(hashp->dbp, old_pagep, 1);
 		if (new_pagep != NULL)
 			(void)__ham_put_page(hashp->dbp, new_pagep, 1);
@@ -1121,8 +1170,8 @@ err:	if (0) {
  * to which we just added something.  This allows us to link overflow
  * pages and return the new page having correctly put the last page.
  *
- * PUBLIC: int __ham_add_el __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *,
- * PUBLIC:     int));
+ * PUBLIC: int __ham_add_el
+ * PUBLIC:    __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *, int));
  */
 int
 __ham_add_el(hashp, hcp, key, val, type)
@@ -1136,8 +1185,8 @@ __ham_add_el(hashp, hcp, key, val, type)
 	DB_LSN new_lsn;
 	HOFFPAGE doff, koff;
 	db_pgno_t next_pgno;
-	u_int32_t data_size, key_size, pairsize;
-	int do_expand, is_keybig, is_databig, rectype, ret;
+	u_int32_t data_size, key_size, pairsize, rectype;
+	int do_expand, is_keybig, is_databig, ret;
 	int key_type, data_type;
 
 	do_expand = 0;
@@ -1268,13 +1317,14 @@ __ham_add_el(hashp, hcp, key, val, type)
  * another.  Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
  * H_DUPLICATE, H_OFFDUP).  Since we log splits at a high level, we
  * do not need to do any logging here.
- * PUBLIC: void __ham_copy_item __P((HTAB *, PAGE *, int, PAGE *));
+ *
+ * PUBLIC: void __ham_copy_item __P((HTAB *, PAGE *, u_int32_t, PAGE *));
  */
 void
 __ham_copy_item(hashp, src_page, src_ndx, dest_page)
 	HTAB *hashp;
 	PAGE *src_page;
-	int src_ndx;
+	u_int32_t src_ndx;
 	PAGE *dest_page;
 {
 	u_int32_t len;
@@ -1409,7 +1459,7 @@ __ham_del_page(dbp, pagep)
 		LSN(pagep) = new_lsn;
 	}
 
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	{
 		db_pgno_t __pgno;
 		DB_LSN __lsn;
@@ -1563,13 +1613,13 @@ __ham_overflow_page(dbp, type, pp)
 #ifdef DEBUG
 /*
  * PUBLIC: #ifdef DEBUG
- * PUBLIC: int __bucket_to_page __P((HTAB *, int));
+ * PUBLIC: db_pgno_t __bucket_to_page __P((HTAB *, db_pgno_t));
  * PUBLIC: #endif
  */
-int
+db_pgno_t
 __bucket_to_page(hashp, n)
 	HTAB *hashp;
-	int n;
+	db_pgno_t n;
 {
 	int ret_val;
 
@@ -1580,7 +1630,6 @@ __bucket_to_page(hashp, n)
 }
 #endif
 
-
 /*
  * Create a bunch of overflow pages at the current split point.
  * PUBLIC: void __ham_init_ovflpages __P((HTAB *));
@@ -1660,8 +1709,9 @@ __ham_get_cpage(hashp, hcp, mode)
  * Get a new page at the cursor, putting the last page if necessary.
  * If the flag is set to H_ISDUP, then we are talking about the
  * duplicate page, not the main page.
- * PUBLIC: int __ham_next_cpage __P((HTAB *, HASH_CURSOR *, db_pgno_t,
- * PUBLIC:     int, int));
+ *
+ * PUBLIC: int __ham_next_cpage
+ * PUBLIC:    __P((HTAB *, HASH_CURSOR *, db_pgno_t, int, u_int32_t));
  */
 int
 __ham_next_cpage(hashp, hcp, pgno, dirty, flags)
@@ -1669,22 +1719,22 @@ __ham_next_cpage(hashp, hcp, pgno, dirty, flags)
 	HASH_CURSOR *hcp;
 	db_pgno_t pgno;
 	int dirty;
-	int flags;
+	u_int32_t flags;
 {
 	PAGE *p;
 	int ret;
 
-	if (flags & H_ISDUP && hcp->dpagep != NULL &&
+	if (LF_ISSET(H_ISDUP) && hcp->dpagep != NULL &&
 	    (ret = __ham_put_page(hashp->dbp, hcp->dpagep, dirty)) != 0)
 		return (ret);
-	else if (!(flags & H_ISDUP) && hcp->pagep != NULL &&
+	else if (!LF_ISSET(H_ISDUP) && hcp->pagep != NULL &&
 	    (ret = __ham_put_page(hashp->dbp, hcp->pagep, dirty)) != 0)
 		return (ret);
 
 	if ((ret = __ham_get_page(hashp->dbp, pgno, &p)) != 0)
 		return (ret);
 
-	if (flags & H_ISDUP) {
+	if (LF_ISSET(H_ISDUP)) {
 		hcp->dpagep = p;
 		hcp->dpgno = pgno;
 		hcp->dndx = 0;
diff --git a/db2/hash/hash_rec.c b/db2/hash/hash_rec.c
index 09508251a2..efaf61c638 100644
--- a/db2/hash/hash_rec.c
+++ b/db2/hash/hash_rec.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -47,14 +47,13 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)hash_rec.c	10.15 (Sleepycat) 12/4/97";
+static const char sccsid[] = "@(#)hash_rec.c	10.19 (Sleepycat) 5/23/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -64,7 +63,6 @@ static const char sccsid[] = "@(#)hash_rec.c	10.15 (Sleepycat) 12/4/97";
 #include "hash.h"
 #include "btree.h"
 #include "log.h"
-#include "db_dispatch.h"
 #include "common_ext.h"
 
 /*
@@ -131,13 +129,23 @@ __ham_insdel_recover(logp, dbtp, lsnp, redo, info)
 
 	if ((op == DELPAIR && cmp_n == 0 && !redo) ||
 	    (op == PUTPAIR && cmp_p == 0 && redo)) {
-		/* Need to redo a PUT or undo a delete. */
-		__ham_putitem(pagep, &argp->key,
-		    !redo || PAIR_ISKEYBIG(argp->opcode) ?
-		    H_OFFPAGE : H_KEYDATA);
-		__ham_putitem(pagep, &argp->data,
-		    !redo || PAIR_ISDATABIG(argp->opcode) ?
-		    H_OFFPAGE : H_KEYDATA);
+		/*
+		 * Need to redo a PUT or undo a delete.  If we are undoing a
+		 * delete, we've got to restore the item back to its original
+		 * position.  That's a royal pain in the butt (because we do
+		 * not store item lengths on the page), but there's no choice.
+		 */
+		if (op != DELPAIR ||
+		    argp->ndx == (u_int32_t)H_NUMPAIRS(pagep)) {
+			__ham_putitem(pagep, &argp->key,
+			    !redo || PAIR_ISKEYBIG(argp->opcode) ?
+			    H_OFFPAGE : H_KEYDATA);
+			__ham_putitem(pagep, &argp->data,
+			    !redo || PAIR_ISDATABIG(argp->opcode) ?
+			    H_OFFPAGE : H_KEYDATA);
+		} else
+			(void) __ham_reputpair(pagep, hashp->hdr->pagesize,
+			    argp->ndx, &argp->key, &argp->data);
 
 		LSN(pagep) = redo ? *lsnp : argp->pagelsn;
 		if ((ret = __ham_put_page(file_dbp, pagep, 1)) != 0)
@@ -453,7 +461,7 @@ __ham_newpgno_recover(logp, dbtp, lsnp, redo, info)
 	DBT *dbtp;
 	DB_LSN *lsnp;
 	int redo;
-	 void *info;
+	void *info;
 {
 	__ham_newpgno_args *argp;
 	DB *mdbp, *file_dbp;
@@ -574,7 +582,7 @@ __ham_splitmeta_recover(logp, dbtp, lsnp, redo, info)
 	DBT *dbtp;
 	DB_LSN *lsnp;
 	int redo;
-	 void *info;
+	void *info;
 {
 	__ham_splitmeta_args *argp;
 	DB *mdbp, *file_dbp;
@@ -649,7 +657,7 @@ __ham_splitdata_recover(logp, dbtp, lsnp, redo, info)
 	DBT *dbtp;
 	DB_LSN *lsnp;
 	int redo;
-	 void *info;
+	void *info;
 {
 	__ham_splitdata_args *argp;
 	DB *mdbp, *file_dbp;
diff --git a/db2/hash/hash_stat.c b/db2/hash/hash_stat.c
index 99c6078d86..b57ca0950d 100644
--- a/db2/hash/hash_stat.c
+++ b/db2/hash/hash_stat.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)hash_stat.c	10.6 (Sleepycat) 7/2/97";
+static const char sccsid[] = "@(#)hash_stat.c	10.8 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -20,7 +20,6 @@ static const char sccsid[] = "@(#)hash_stat.c	10.6 (Sleepycat) 7/2/97";
 #include "db_int.h"
 #include "db_page.h"
 #include "hash.h"
-#include "common_ext.h"
 
 /*
  * __ham_stat --
diff --git a/db2/include/btree.h b/db2/include/btree.h
index 878096b7b2..1660d331e7 100644
--- a/db2/include/btree.h
+++ b/db2/include/btree.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -43,7 +43,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)btree.h	10.17 (Sleepycat) 9/23/97
+ *	@(#)btree.h	10.21 (Sleepycat) 5/23/98
  */
 
 /* Forward structure declarations. */
@@ -103,28 +103,39 @@ struct __recno;		typedef struct __recno RECNO;
  * to return deleted entries.  To simplify both the mnemonic representation
  * and the code that checks for various cases, we construct a set of bitmasks.
  */
-#define	S_READ		0x0001		/* Read locks. */
-#define	S_WRITE		0x0002		/* Write locks. */
-
-#define	S_APPEND	0x0040		/* Append to the tree. */
-#define	S_DELNO		0x0080		/* Don't return deleted items. */
-#define	S_DUPFIRST	0x0100		/* Return first duplicate. */
-#define	S_DUPLAST	0x0200		/* Return last duplicate. */
-#define	S_EXACT		0x0400		/* Exact items only. */
-#define	S_PARENT	0x0800		/* Lock page pair. */
-
-#define	S_DELETE	(S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT)
+#define	S_READ		0x00001		/* Read locks. */
+#define	S_WRITE		0x00002		/* Write locks. */
+
+#define	S_APPEND	0x00040		/* Append to the tree. */
+#define	S_DELNO		0x00080		/* Don't return deleted items. */
+#define	S_DUPFIRST	0x00100		/* Return first duplicate. */
+#define	S_DUPLAST	0x00200		/* Return last duplicate. */
+#define	S_EXACT		0x00400		/* Exact items only. */
+#define	S_PARENT	0x00800		/* Lock page pair. */
+#define	S_STACK		0x01000		/* Need a complete stack. */
+
+#define	S_DELETE	(S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT | S_STACK)
 #define	S_FIND		(S_READ | S_DUPFIRST | S_DELNO)
-#define	S_INSERT	(S_WRITE | S_DUPLAST)
-#define	S_KEYFIRST	(S_WRITE | S_DUPFIRST)
-#define	S_KEYLAST	(S_WRITE | S_DUPLAST)
+#define	S_INSERT	(S_WRITE | S_DUPLAST | S_STACK)
+#define	S_KEYFIRST	(S_WRITE | S_DUPFIRST | S_STACK)
+#define	S_KEYLAST	(S_WRITE | S_DUPLAST | S_STACK)
 #define	S_WRPAIR	(S_WRITE | S_DUPLAST | S_PARENT)
 
 /*
+ * If doing insert search (including keyfirst or keylast operations) or a
+ * split search on behalf of an insert, it's okay to return the entry one
+ * past the end of the page.
+ */
+#define	PAST_END_OK(f)							\
+	((f) == S_INSERT ||						\
+	(f) == S_KEYFIRST || (f) == S_KEYLAST || (f) == S_WRPAIR)
+
+/*
  * Flags to __bam_iitem().
  */
-#define	BI_NEWKEY	0x01		/* New key. */
-#define	BI_DELETED	0x02		/* Key/data pair only placeholder. */
+#define	BI_DELETED	0x01		/* Key/data pair only placeholder. */
+#define	BI_DOINCR	0x02		/* Increment the record count. */
+#define	BI_NEWKEY	0x04		/* New key. */
 
 /*
  * Various routines pass around page references.  A page reference can be a
@@ -138,6 +149,21 @@ struct __epg {
 };
 
 /*
+ * All cursors are queued from the master DB structure.  Convert the user's
+ * DB reference to the master DB reference.  We lock the master DB mutex
+ * so that we can walk the cursor queue.  There's no race in accessing the
+ * cursors, because if we're modifying a page, we have a write lock on it,
+ * and therefore no other thread than the current one can have a cursor that
+ * references the page.
+ */
+#define	CURSOR_SETUP(dbp) {						\
+	(dbp) = (dbp)->master;						\
+	DB_THREAD_LOCK(dbp);						\
+}
+#define	CURSOR_TEARDOWN(dbp)						\
+	DB_THREAD_UNLOCK(dbp);
+
+/*
  * Btree cursor.
  *
  * Arguments passed to __bam_ca_replace().
diff --git a/db2/include/btree_ext.h b/db2/include/btree_ext.h
index 9c34c8c6bf..b8a137364c 100644
--- a/db2/include/btree_ext.h
+++ b/db2/include/btree_ext.h
@@ -2,7 +2,7 @@
 #ifndef _btree_ext_h_
 #define _btree_ext_h_
 int __bam_close __P((DB *));
-int __bam_sync __P((DB *, int));
+int __bam_sync __P((DB *, u_int32_t));
 int __bam_cmp __P((DB *, const DBT *, EPG *));
 int __bam_defcmp __P((const DBT *, const DBT *));
 size_t __bam_defpfx __P((const DBT *, const DBT *));
@@ -11,7 +11,7 @@ int __bam_pgout __P((db_pgno_t, void *, DBT *));
 int __bam_mswap __P((PAGE *));
 int __bam_cursor __P((DB *, DB_TXN *, DBC **));
 int __bam_c_iclose __P((DB *, DBC *));
-int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
+int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
 int __bam_ovfl_chk __P((DB *, CURSOR *, u_int32_t, int));
 int __bam_cprint __P((DB *));
 int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *, int));
@@ -23,8 +23,8 @@ void __bam_ca_replace
    __P((DB *, db_pgno_t, u_int32_t, ca_replace_arg));
 void __bam_ca_split __P((DB *,
    db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int));
-int __bam_delete __P((DB *, DB_TXN *, DBT *, int));
-int __ram_delete __P((DB *, DB_TXN *, DBT *, int));
+int __bam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
+int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
 int __bam_ditem __P((DB *, PAGE *, u_int32_t));
 int __bam_adjindx __P((DB *, PAGE *, u_int32_t, u_int32_t, int));
 int __bam_dpage __P((DB *, const DBT *));
@@ -35,10 +35,10 @@ int __bam_free __P((DB *, PAGE *));
 int __bam_lt __P((DB *));
 int __bam_lget __P((DB *, int, db_pgno_t, db_lockmode_t, DB_LOCK *));
 int __bam_lput __P((DB *, DB_LOCK));
-int __bam_pget __P((DB *, PAGE **, db_pgno_t *, int));
-int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
+int __bam_pget __P((DB *, PAGE **, db_pgno_t *, u_int32_t));
+int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
 int __bam_iitem __P((DB *,
-   PAGE **, db_indx_t *, DBT *, DBT *, int, int));
+   PAGE **, db_indx_t *, DBT *, DBT *, u_int32_t, u_int32_t));
 int __bam_ritem __P((DB *, PAGE *, u_int32_t, DBT *));
 int __bam_pg_alloc_recover
   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
@@ -64,19 +64,19 @@ void __ram_ca __P((DB *, db_recno_t, ca_recno_arg));
 int __ram_cprint __P((DB *));
 int __ram_getno __P((DB *, const DBT *, db_recno_t *, int));
 int __ram_snapshot __P((DB *));
-int __bam_rsearch __P((DB *, db_recno_t *, u_int, int, int *));
-int __bam_adjust __P((DB *, BTREE *, int));
+int __bam_rsearch __P((DB *, db_recno_t *, u_int32_t, int, int *));
+int __bam_adjust __P((DB *, BTREE *, int32_t));
 int __bam_nrecs __P((DB *, db_recno_t *));
 db_recno_t __bam_total __P((PAGE *));
 int __bam_search __P((DB *,
-    const DBT *, u_int, int, db_recno_t *, int *));
+    const DBT *, u_int32_t, int, db_recno_t *, int *));
 int __bam_stkrel __P((DB *));
 int __bam_stkgrow __P((BTREE *));
 int __bam_split __P((DB *, void *));
 int __bam_broot __P((DB *, PAGE *, PAGE *, PAGE *));
 int __ram_root __P((DB *, PAGE *, PAGE *, PAGE *));
 int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t));
-int __bam_stat __P((DB *, void *, void *(*)(size_t), int));
+int __bam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t));
 void __bam_add_mstat __P((DB_BTREE_LSTAT *, DB_BTREE_LSTAT *));
 int __bam_pg_alloc_log
     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
diff --git a/db2/include/clib_ext.h b/db2/include/clib_ext.h
index eb982bf85f..f5510a1629 100644
--- a/db2/include/clib_ext.h
+++ b/db2/include/clib_ext.h
@@ -53,9 +53,6 @@ int snprintf __P((char *, size_t, const char *, ...));
 int snprintf();
 #endif
 #endif
-#ifndef HAVE_STRDUP
-char *strdup __P((const char *));
-#endif
 #ifndef HAVE_STRERROR
 char *strerror __P((int));
 #endif
diff --git a/db2/include/common_ext.h b/db2/include/common_ext.h
index b362c9c32e..4674f9ce01 100644
--- a/db2/include/common_ext.h
+++ b/db2/include/common_ext.h
@@ -2,8 +2,8 @@
 #ifndef _common_ext_h_
 #define _common_ext_h_
 int __db_appname __P((DB_ENV *,
-   APPNAME, const char *, const char *, int *, char **));
-int __db_apprec __P((DB_ENV *, int));
+   APPNAME, const char *, const char *, u_int32_t, int *, char **));
+int __db_apprec __P((DB_ENV *, u_int32_t));
 int __db_byteorder __P((DB_ENV *, int));
 #ifdef __STDC__
 void __db_err __P((const DB_ENV *dbenv, const char *fmt, ...));
@@ -11,35 +11,32 @@ void __db_err __P((const DB_ENV *dbenv, const char *fmt, ...));
 void __db_err();
 #endif
 int __db_panic __P((DB *));
-int __db_fchk __P((DB_ENV *, const char *, int, int));
-int __db_fcchk __P((DB_ENV *, const char *, int, int, int));
-int __db_cdelchk __P((const DB *, int, int, int));
-int __db_cgetchk __P((const DB *, DBT *, DBT *, int, int));
+int __db_fchk __P((DB_ENV *, const char *, u_int32_t, u_int32_t));
+int __db_fcchk
+   __P((DB_ENV *, const char *, u_int32_t, u_int32_t, u_int32_t));
+int __db_cdelchk __P((const DB *, u_int32_t, int, int));
+int __db_cgetchk __P((const DB *, DBT *, DBT *, u_int32_t, int));
 int __db_cputchk __P((const DB *,
-   const DBT *, DBT *, int, int, int));
-int __db_delchk __P((const DB *, int, int));
-int __db_getchk __P((const DB *, const DBT *, DBT *, int));
-int __db_putchk __P((const DB *, DBT *, const DBT *, int, int, int));
-int __db_statchk __P((const DB *, int));
-int __db_syncchk __P((const DB *, int));
+   const DBT *, DBT *, u_int32_t, int, int));
+int __db_delchk __P((const DB *, DBT *, u_int32_t, int));
+int __db_getchk __P((const DB *, const DBT *, DBT *, u_int32_t));
+int __db_putchk
+   __P((const DB *, DBT *, const DBT *, u_int32_t, int, int));
+int __db_statchk __P((const DB *, u_int32_t));
+int __db_syncchk __P((const DB *, u_int32_t));
 int __db_ferr __P((const DB_ENV *, const char *, int));
 u_int32_t __db_log2 __P((u_int32_t));
-int __db_rcreate __P((DB_ENV *, APPNAME,
-   const char *, const char *, int, size_t, int, int *, void *));
-int __db_rinit __P((DB_ENV *, RLAYOUT *, int, size_t, int));
-int __db_ropen __P((DB_ENV *,
-   APPNAME, const char *, const char *, int, int *, void *));
-int __db_rclose __P((DB_ENV *, int, void *));
-int __db_runlink __P((DB_ENV *,
-   APPNAME, const char *, const char *, int));
-int __db_rgrow __P((DB_ENV *, int, size_t));
-int __db_rremap __P((DB_ENV *, void *, size_t, size_t, int, void *));
+int __db_rattach __P((REGINFO *));
+int __db_rdetach __P((REGINFO *));
+int __db_runlink __P((REGINFO *, int));
+int __db_rgrow __P((REGINFO *, size_t));
+int __db_rreattach __P((REGINFO *, size_t));
 void __db_shalloc_init __P((void *, size_t));
 int __db_shalloc __P((void *, size_t, size_t, void *));
 void __db_shalloc_free __P((void *, void *));
 size_t __db_shalloc_count __P((void *));
 size_t __db_shsizeof __P((void *));
-void __db_shalloc_dump __P((FILE *, void *));
-int __db_tablesize __P((u_int));
-void __db_hashinit __P((void *, int));
+void __db_shalloc_dump __P((void *, FILE *));
+int __db_tablesize __P((u_int32_t));
+void __db_hashinit __P((void *, u_int32_t));
 #endif /* _common_ext_h_ */
diff --git a/db2/include/cxx_int.h b/db2/include/cxx_int.h
index bf7a09602d..0a59de4391 100644
--- a/db2/include/cxx_int.h
+++ b/db2/include/cxx_int.h
@@ -1,10 +1,10 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)cxx_int.h	10.4 (Sleepycat) 8/22/97
+ *	@(#)cxx_int.h	10.5 (Sleepycat) 4/10/98
  */
 
 #ifndef _CXX_INT_H_
diff --git a/db2/include/db.h.src b/db2/include/db.h.src
index ebdaa27470..97ad55693f 100644
--- a/db2/include/db.h.src
+++ b/db2/include/db.h.src
@@ -1,10 +1,10 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db.h.src	10.102 (Sleepycat) 1/18/98
+ *	@(#)db.h.src	10.131 (Sleepycat) 6/2/98
  */
 
 #ifndef _DB_H_
@@ -54,8 +54,7 @@
  *
  * !!!
  * We also provide the standard u_int, u_long etc., if they're not provided
- * by the system.  This isn't completely necessary, but the example programs
- * need them.
+ * by the system.
  */
 #ifndef	__BIT_TYPES_DEFINED__
 #define	__BIT_TYPES_DEFINED__
@@ -72,9 +71,9 @@
 @u_long_decl@
 
 #define	DB_VERSION_MAJOR	2
-#define	DB_VERSION_MINOR	3
-#define	DB_VERSION_PATCH	16
-#define	DB_VERSION_STRING	"Sleepycat Software: DB 2.3.16: (1/19/98)"
+#define	DB_VERSION_MINOR	4
+#define	DB_VERSION_PATCH	14
+#define	DB_VERSION_STRING	"Sleepycat Software: DB 2.4.14: (6/2/98)"
 
 typedef	u_int32_t	db_pgno_t;	/* Page number type. */
 typedef	u_int16_t	db_indx_t;	/* Page offset type. */
@@ -95,6 +94,7 @@ struct __db_bt_stat;	typedef struct __db_bt_stat DB_BTREE_STAT;
 struct __db_dbt;	typedef struct __db_dbt DBT;
 struct __db_env;	typedef struct __db_env DB_ENV;
 struct __db_info;	typedef struct __db_info DB_INFO;
+struct __db_lock_stat;	typedef struct __db_lock_stat DB_LOCK_STAT;
 struct __db_lockregion;	typedef struct __db_lockregion DB_LOCKREGION;
 struct __db_lockreq;	typedef struct __db_lockreq DB_LOCKREQ;
 struct __db_locktab;	typedef struct __db_locktab DB_LOCKTAB;
@@ -102,6 +102,7 @@ struct __db_log;	typedef struct __db_log DB_LOG;
 struct __db_log_stat;	typedef struct __db_log_stat DB_LOG_STAT;
 struct __db_lsn;	typedef struct __db_lsn DB_LSN;
 struct __db_mpool;	typedef struct __db_mpool DB_MPOOL;
+struct __db_mpool_finfo;typedef struct __db_mpool_finfo DB_MPOOL_FINFO;
 struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT;
 struct __db_mpool_stat;	typedef struct __db_mpool_stat DB_MPOOL_STAT;
 struct __db_mpoolfile;	typedef struct __db_mpoolfile DB_MPOOLFILE;
@@ -134,7 +135,7 @@ struct __db_dbt {
  * There are a set of functions that the application can replace with its
  * own versions, and some other knobs which can be turned at run-time.
  */
-#define	DB_FUNC_CALLOC	 1		/* ANSI C calloc. */
+#define	DB_FUNC_CALLOC	 1	/* DELETED: ANSI C calloc. */
 #define	DB_FUNC_CLOSE	 2		/* POSIX 1003.1 close. */
 #define	DB_FUNC_DIRFREE	 3		/* DB: free directory list. */
 #define	DB_FUNC_DIRLIST	 4		/* DB: create directory list. */
@@ -149,12 +150,18 @@ struct __db_dbt {
 #define	DB_FUNC_REALLOC	13		/* ANSI C realloc. */
 #define	DB_FUNC_SEEK	14		/* POSIX 1003.1 lseek. */
 #define	DB_FUNC_SLEEP	15		/* DB: sleep secs/usecs. */
-#define	DB_FUNC_STRDUP	16		/* DB: strdup(3). */
+#define	DB_FUNC_STRDUP	16	/* DELETED: DB: strdup(3). */
 #define	DB_FUNC_UNLINK	17		/* POSIX 1003.1 unlink. */
 #define	DB_FUNC_UNMAP	18		/* DB: unmap shared memory file. */
 #define	DB_FUNC_WRITE	19		/* POSIX 1003.1 write. */
 #define	DB_FUNC_YIELD	20		/* DB: yield thread to scheduler. */
 #define	DB_TSL_SPINS	21		/* DB: initialize spin count. */
+#define	DB_FUNC_RUNLINK	22		/* DB: remove a shared region. */
+#define	DB_REGION_ANON	23		/* DB: anonymous, unnamed regions. */
+#define	DB_REGION_INIT	24		/* DB: page-fault regions in create. */
+#define	DB_REGION_NAME	25		/* DB: anonymous, named regions. */
+#define	DB_MUTEXLOCKS	26		/* DB: turn off all mutex locks. */
+#define	DB_PAGEYIELD	27		/* DB: yield the CPU on pool get. */
 
 /*
  * Database configuration and initialization.
@@ -162,52 +169,51 @@ struct __db_dbt {
  /*
   * Flags understood by both db_open(3) and db_appinit(3).
   */
-#define	DB_CREATE		0x00001	/* O_CREAT: create file as necessary. */
-#define	DB_NOMMAP		0x00002	/* Don't mmap underlying file. */
-#define	DB_THREAD		0x00004	/* Free-thread DB package handles. */
+#define	DB_CREATE	      0x000001	/* O_CREAT: create file as necessary. */
+#define	DB_NOMMAP	      0x000002	/* Don't mmap underlying file. */
+#define	DB_THREAD	      0x000004	/* Free-thread DB package handles. */
 
 /*
  * Flags understood by db_appinit(3).
- *
- * DB_MUTEXDEBUG is internal only, and not documented.
  */
-/*				0x00007	   COMMON MASK. */
-#define	DB_INIT_LOCK		0x00008	/* Initialize locking. */
-#define	DB_INIT_LOG		0x00010	/* Initialize logging. */
-#define	DB_INIT_MPOOL		0x00020	/* Initialize mpool. */
-#define	DB_INIT_TXN		0x00040	/* Initialize transactions. */
-#define	DB_MPOOL_PRIVATE	0x00080	/* Mpool: private memory pool. */
-#define	DB_MUTEXDEBUG		0x00100	/* Do not get/set mutexes in regions. */
-#define	DB_RECOVER		0x00200	/* Run normal recovery. */
-#define	DB_RECOVER_FATAL	0x00400 /* Run catastrophic recovery. */
-#define	DB_TXN_NOSYNC		0x00800	/* Do not sync log on commit. */
-#define	DB_USE_ENVIRON		0x01000	/* Use the environment. */
-#define	DB_USE_ENVIRON_ROOT	0x02000	/* Use the environment if root. */
+/*			      0x000007	   COMMON MASK. */
+#define	DB_INIT_LOCK	      0x000008	/* Initialize locking. */
+#define	DB_INIT_LOG	      0x000010	/* Initialize logging. */
+#define	DB_INIT_MPOOL	      0x000020	/* Initialize mpool. */
+#define	DB_INIT_TXN	      0x000040	/* Initialize transactions. */
+#define	DB_MPOOL_PRIVATE      0x000080	/* Mpool: private memory pool. */
+#define	__UNUSED_100	      0x000100
+#define	DB_RECOVER	      0x000200	/* Run normal recovery. */
+#define	DB_RECOVER_FATAL      0x000400	/* Run catastrophic recovery. */
+#define	DB_TXN_NOSYNC	      0x000800	/* Do not sync log on commit. */
+#define	DB_USE_ENVIRON	      0x001000	/* Use the environment. */
+#define	DB_USE_ENVIRON_ROOT   0x002000	/* Use the environment if root. */
 
 /* CURRENTLY UNUSED LOCK FLAGS. */
-#define	DB_TXN_LOCK_2PL		0x00000	/* Two-phase locking. */
-#define	DB_TXN_LOCK_OPTIMISTIC	0x00000	/* Optimistic locking. */
-#define	DB_TXN_LOCK_MASK	0x00000	/* Lock flags mask. */
+#define	DB_TXN_LOCK_2PL	      0x000000	/* Two-phase locking. */
+#define	DB_TXN_LOCK_OPTIMIST  0x000000	/* Optimistic locking. */
+#define	DB_TXN_LOCK_MASK      0x000000	/* Lock flags mask. */
 
 /* CURRENTLY UNUSED LOG FLAGS. */
-#define	DB_TXN_LOG_REDO		0x00000	/* Redo-only logging. */
-#define	DB_TXN_LOG_UNDO		0x00000	/* Undo-only logging. */
-#define	DB_TXN_LOG_UNDOREDO	0x00000	/* Undo/redo write-ahead logging. */
-#define	DB_TXN_LOG_MASK		0x00000	/* Log flags mask. */
+#define	DB_TXN_LOG_REDO	      0x000000	/* Redo-only logging. */
+#define	DB_TXN_LOG_UNDO	      0x000000	/* Undo-only logging. */
+#define	DB_TXN_LOG_UNDOREDO   0x000000	/* Undo/redo write-ahead logging. */
+#define	DB_TXN_LOG_MASK	      0x000000	/* Log flags mask. */
 
 /*
  * Flags understood by db_open(3).
  *
- * DB_EXCL and DB_TEMPORARY are internal only, and not documented.
- * DB_SEQUENTIAL is currently internal, but likely to be exported some day.
+ * DB_EXCL and DB_TEMPORARY are internal only, and are not documented.
+ * DB_SEQUENTIAL is currently internal, but may be exported some day.
  */
-/*				0x00007	   COMMON MASK. */
-/*				0x07fff	   ALREADY USED. */
-#define	DB_EXCL			0x08000	/* O_EXCL: exclusive open. */
-#define	DB_RDONLY		0x10000	/* O_RDONLY: read-only. */
-#define	DB_SEQUENTIAL		0x20000	/* Indicate sequential access. */
-#define	DB_TEMPORARY		0x40000	/* Remove on last close. */
-#define	DB_TRUNCATE		0x80000	/* O_TRUNCATE: replace existing DB. */
+/*			      0x000007	   COMMON MASK. */
+/*			      0x003fff	   ALREADY USED. */
+#define	__UNUSED_4000	      0x004000
+#define	DB_EXCL		      0x008000	/* O_EXCL: exclusive open. */
+#define	DB_RDONLY	      0x010000	/* O_RDONLY: read-only. */
+#define	DB_SEQUENTIAL	      0x020000	/* Indicate sequential access. */
+#define	DB_TEMPORARY	      0x040000	/* Remove on last close. */
+#define	DB_TRUNCATE	      0x080000	/* O_TRUNCATE: replace existing DB. */
 
 /*
  * Deadlock detector modes; used in the DBENV structure to configure the
@@ -240,9 +246,9 @@ struct __db_env {
 	/* Locking. */
 	DB_LOCKTAB	*lk_info;	/* Return from lock_open(). */
 	u_int8_t	*lk_conflicts;	/* Two dimensional conflict matrix. */
-	int		 lk_modes;	/* Number of lock modes in table. */
-	u_int		 lk_max;	/* Maximum number of locks. */
-	u_int32_t	 lk_detect;	/* Deadlock detect on every conflict. */
+	u_int32_t	 lk_modes;	/* Number of lock modes in table. */
+	u_int32_t	 lk_max;	/* Maximum number of locks. */
+	u_int32_t	 lk_detect;	/* Deadlock detect on all conflicts. */
 
 	/* Logging. */
 	DB_LOG		*lg_info;	/* Return from log_open(). */
@@ -255,7 +261,7 @@ struct __db_env {
 
 	/* Transactions. */
 	DB_TXNMGR	*tx_info;	/* Return from txn_open(). */
-	unsigned int	 tx_max;	/* Maximum number of transactions. */
+	u_int32_t	 tx_max;	/* Maximum number of transactions. */
 	int (*tx_recover)		/* Dispatch function for recovery. */
 	    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 
@@ -300,17 +306,17 @@ struct __db_info {
 	void *(*db_malloc) __P((size_t));
 
 	/* Btree access method. */
-	int		 bt_maxkey;	/* Maximum keys per page. */
-	int		 bt_minkey;	/* Minimum keys per page. */
+	u_int32_t	 bt_maxkey;	/* Maximum keys per page. */
+	u_int32_t	 bt_minkey;	/* Minimum keys per page. */
 	int (*bt_compare)		/* Comparison function. */
 	    __P((const DBT *, const DBT *));
 	size_t (*bt_prefix)		/* Prefix function. */
 	    __P((const DBT *, const DBT *));
 
 	/* Hash access method. */
-	unsigned int	 h_ffactor;	/* Fill factor. */
-	unsigned int	 h_nelem;	/* Number of elements. */
-	u_int32_t	(*h_hash)	/* Hash function. */
+	u_int32_t 	 h_ffactor;	/* Fill factor. */
+	u_int32_t	 h_nelem;	/* Number of elements. */
+	u_int32_t      (*h_hash)	/* Hash function. */
 	    __P((const void *, u_int32_t));
 
 	/* Recno access method. */
@@ -353,6 +359,7 @@ struct __db_info {
 #define	DB_SET		0x010000	/* c_get(), log_get() */
 #define	DB_SET_RANGE	0x020000	/* c_get() */
 #define	DB_SET_RECNO	0x040000	/* c_get() */
+#define	DB_CURLSN	0x080000	/* log_put() */
 
 /*
  * DB (user visible) error return codes.
@@ -435,14 +442,14 @@ struct __db {
 	void *(*db_malloc) __P((size_t));
 
 					/* Functions. */
-	int (*close)	__P((DB *, int));
+	int (*close)	__P((DB *, u_int32_t));
 	int (*cursor)	__P((DB *, DB_TXN *, DBC **));
-	int (*del)	__P((DB *, DB_TXN *, DBT *, int));
+	int (*del)	__P((DB *, DB_TXN *, DBT *, u_int32_t));
 	int (*fd)	__P((DB *, int *));
-	int (*get)	__P((DB *, DB_TXN *, DBT *, DBT *, int));
-	int (*put)	__P((DB *, DB_TXN *, DBT *, DBT *, int));
-	int (*stat)	__P((DB *, void *, void *(*)(size_t), int));
-	int (*sync)	__P((DB *, int));
+	int (*get)	__P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+	int (*put)	__P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
+	int (*stat)	__P((DB *, void *, void *(*)(size_t), u_int32_t));
+	int (*sync)	__P((DB *, u_int32_t));
 
 #define	DB_AM_DUP	0x000001	/* DB_DUP (internal). */
 #define	DB_AM_INMEM	0x000002	/* In-memory; no sync on close. */
@@ -483,9 +490,9 @@ struct __dbc {
 	void	 *internal;		/* Access method private. */
 
 	int (*c_close)	__P((DBC *));
-	int (*c_del)	__P((DBC *, int));
-	int (*c_get)	__P((DBC *, DBT *, DBT *, int));
-	int (*c_put)	__P((DBC *, DBT *, DBT *, int));
+	int (*c_del)	__P((DBC *, u_int32_t));
+	int (*c_get)	__P((DBC *, DBT *, DBT *, u_int32_t));
+	int (*c_put)	__P((DBC *, DBT *, DBT *, u_int32_t));
 };
 
 /* Btree/recno statistics structure. */
@@ -524,10 +531,11 @@ struct __db_bt_stat {
 #if defined(__cplusplus)
 extern "C" {
 #endif
-int   db_appinit __P((const char *, char * const *, DB_ENV *, int));
+int   db_appinit __P((const char *, char * const *, DB_ENV *, u_int32_t));
 int   db_appexit __P((DB_ENV *));
 int   db_jump_set __P((void *, int));
-int   db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **));
+int   db_open __P((const char *,
+	  DBTYPE, u_int32_t, int, DB_ENV *, DB_INFO *, DB **));
 int   db_value_set __P((int, int));
 char *db_version __P((int *, int *, int *));
 #if defined(__cplusplus)
@@ -575,6 +583,21 @@ typedef enum {
 	DB_LOCK_IWR			/* Intent to read and write. */
 } db_lockmode_t;
 
+/*
+ * Status of a lock.
+ */
+typedef enum {
+	DB_LSTAT_ABORTED,		/* Lock belongs to an aborted txn. */
+	DB_LSTAT_ERR,			/* Lock is bad. */
+	DB_LSTAT_FREE,			/* Lock is unallocated. */
+	DB_LSTAT_HELD,			/* Lock is currently held. */
+	DB_LSTAT_NOGRANT,		/* Lock was not granted. */
+	DB_LSTAT_PENDING,		/* Lock was waiting and has been
+					 * promoted; waiting for the owner
+					 * to run and upgrade it to held. */
+	DB_LSTAT_WAITING		/* Lock is on the wait queue. */
+} db_status_t;
+
 /* Lock request structure. */
 struct __db_lockreq {
 	db_lockop_t	 op;		/* Operation. */
@@ -596,19 +619,38 @@ extern const u_int8_t db_rw_conflicts[];
 #define	DB_LOCK_RIW_N	6
 extern const u_int8_t db_riw_conflicts[];
 
+struct __db_lock_stat {
+	u_int32_t st_magic;		/* Lock file magic number. */
+	u_int32_t st_version;		/* Lock file version number. */
+	u_int32_t st_maxlocks;		/* Maximum number of locks in table. */
+	u_int32_t st_nmodes;		/* Number of lock modes. */
+	u_int32_t st_numobjs;		/* Number of objects. */
+	u_int32_t st_nlockers;		/* Number of lockers. */
+	u_int32_t st_nconflicts;	/* Number of lock conflicts. */
+	u_int32_t st_nrequests;		/* Number of lock gets. */
+	u_int32_t st_nreleases;		/* Number of lock puts. */
+	u_int32_t st_ndeadlocks;	/* Number of lock deadlocks. */
+	u_int32_t st_region_wait;	/* Region lock granted after wait. */
+	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
+	u_int32_t st_refcnt;		/* Region reference count. */
+	u_int32_t st_regsize;		/* Region size. */
+};
+
 #if defined(__cplusplus)
 extern "C" {
 #endif
 int	  lock_close __P((DB_LOCKTAB *));
-int	  lock_detect __P((DB_LOCKTAB *, int, int));
+int	  lock_detect __P((DB_LOCKTAB *, u_int32_t, u_int32_t));
 int	  lock_get __P((DB_LOCKTAB *,
-	    u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *));
+	    u_int32_t, u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *));
 int	  lock_id __P((DB_LOCKTAB *, u_int32_t *));
-int	  lock_open __P((const char *, int, int, DB_ENV *, DB_LOCKTAB **));
+int	  lock_open __P((const char *,
+	    u_int32_t, int, DB_ENV *, DB_LOCKTAB **));
 int	  lock_put __P((DB_LOCKTAB *, DB_LOCK));
+int	  lock_stat __P((DB_LOCKTAB *, DB_LOCK_STAT **, void *(*)(size_t)));
 int	  lock_unlink __P((const char *, int, DB_ENV *));
 int	  lock_vec __P((DB_LOCKTAB *,
-	    u_int32_t, int, DB_LOCKREQ *, int, DB_LOCKREQ **));
+	    u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **));
 #if defined(__cplusplus)
 }
 #endif
@@ -651,19 +693,21 @@ struct __db_log_stat {
 	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
 	u_int32_t st_cur_file;		/* Current log file number. */
 	u_int32_t st_cur_offset;	/* Current log file offset. */
+	u_int32_t st_refcnt;		/* Region reference count. */
+	u_int32_t st_regsize;		/* Region size. */
 };
 
 #if defined(__cplusplus)
 extern "C" {
 #endif
-int	 log_archive __P((DB_LOG *, char **[], int, void *(*)(size_t)));
+int	 log_archive __P((DB_LOG *, char **[], u_int32_t, void *(*)(size_t)));
 int	 log_close __P((DB_LOG *));
 int	 log_compare __P((const DB_LSN *, const DB_LSN *));
 int	 log_file __P((DB_LOG *, const DB_LSN *, char *, size_t));
 int	 log_flush __P((DB_LOG *, const DB_LSN *));
-int	 log_get __P((DB_LOG *, DB_LSN *, DBT *, int));
-int	 log_open __P((const char *, int, int, DB_ENV *, DB_LOG **));
-int	 log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
+int	 log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t));
+int	 log_open __P((const char *, u_int32_t, int, DB_ENV *, DB_LOG **));
+int	 log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
 int	 log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *));
 int	 log_stat __P((DB_LOG *, DB_LOG_STAT **, void *(*)(size_t)));
 int	 log_unlink __P((const char *, int, DB_ENV *));
@@ -705,6 +749,17 @@ struct __db_mpool_stat {
 	u_int32_t st_page_trickle;	/* Pages written by memp_trickle. */
 	u_int32_t st_region_wait;	/* Region lock granted after wait. */
 	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
+	u_int32_t st_refcnt;		/* Region reference count. */
+	u_int32_t st_regsize;		/* Region size. */
+};
+
+/* Mpool file open information structure. */
+struct __db_mpool_finfo {
+	int	   ftype;		/* File type. */
+	DBT	  *pgcookie;		/* Byte-string passed to pgin/pgout. */
+	u_int8_t  *fileid;		/* Unique file ID. */
+	int32_t	   lsn_offset;		/* LSN offset in page. */
+	u_int32_t  clear_len;		/* Cleared length on created pages. */
 };
 
 /* Mpool file statistics structure. */
@@ -724,13 +779,13 @@ extern "C" {
 #endif
 int	memp_close __P((DB_MPOOL *));
 int	memp_fclose __P((DB_MPOOLFILE *));
-int	memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, int, void *));
+int	memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, u_int32_t, void *));
 int	memp_fopen __P((DB_MPOOL *, const char *,
-	    int, int, int, size_t, int, DBT *, u_int8_t *, DB_MPOOLFILE **));
-int	memp_fput __P((DB_MPOOLFILE *, void *, int));
-int	memp_fset __P((DB_MPOOLFILE *, void *, int));
+	    u_int32_t, int, size_t, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
+int	memp_fput __P((DB_MPOOLFILE *, void *, u_int32_t));
+int	memp_fset __P((DB_MPOOLFILE *, void *, u_int32_t));
 int	memp_fsync __P((DB_MPOOLFILE *));
-int	memp_open __P((const char *, int, int, DB_ENV *, DB_MPOOL **));
+int	memp_open __P((const char *, u_int32_t, int, DB_ENV *, DB_MPOOL **));
 int	memp_register __P((DB_MPOOL *, int,
 	    int (*)(db_pgno_t, void *, DBT *),
 	    int (*)(db_pgno_t, void *, DBT *)));
@@ -765,16 +820,21 @@ struct __db_txn_active {
 };
 
 struct __db_txn_stat {
-	DB_LSN		st_last_ckp;	/* lsn of the last checkpoint */
-	DB_LSN		st_pending_ckp;	/* last checkpoint did not finish */
-	time_t		st_time_ckp;	/* time of last checkpoint */
-	u_int32_t	st_last_txnid;	/* last transaction id given out */
-	u_int32_t	st_maxtxns;	/* maximum number of active txns */
-	u_int32_t	st_naborts;	/* number of aborted transactions */
-	u_int32_t	st_nbegins;	/* number of begun transactions */
-	u_int32_t	st_ncommits;	/* number of committed transactions */
-	u_int32_t	st_nactive;	/* number of active transactions */
-	DB_TXN_ACTIVE	*st_txnarray;	/* array of active transactions */
+	DB_LSN	  st_last_ckp;		/* lsn of the last checkpoint */
+	DB_LSN	  st_pending_ckp;	/* last checkpoint did not finish */
+	time_t	  st_time_ckp;		/* time of last checkpoint */
+	u_int32_t st_last_txnid;	/* last transaction id given out */
+	u_int32_t st_maxtxns;	/* maximum number of active txns */
+	u_int32_t st_naborts;	/* number of aborted transactions */
+	u_int32_t st_nbegins;	/* number of begun transactions */
+	u_int32_t st_ncommits;	/* number of committed transactions */
+	u_int32_t st_nactive;	/* number of active transactions */
+	DB_TXN_ACTIVE
+		 *st_txnarray;	/* array of active transactions */
+	u_int32_t st_region_wait;	/* Region lock granted after wait. */
+	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
+	u_int32_t st_refcnt;		/* Region reference count. */
+	u_int32_t st_regsize;		/* Region size. */
 };
 
 #if defined(__cplusplus)
@@ -782,11 +842,11 @@ extern "C" {
 #endif
 int	  txn_abort __P((DB_TXN *));
 int	  txn_begin __P((DB_TXNMGR *, DB_TXN *, DB_TXN **));
-int	  txn_checkpoint __P((const DB_TXNMGR *, int, int));
+int	  txn_checkpoint __P((const DB_TXNMGR *, u_int32_t, u_int32_t));
 int	  txn_commit __P((DB_TXN *));
 int	  txn_close __P((DB_TXNMGR *));
 u_int32_t txn_id __P((DB_TXN *));
-int	  txn_open __P((const char *, int, int, DB_ENV *, DB_TXNMGR **));
+int	  txn_open __P((const char *, u_int32_t, int, DB_ENV *, DB_TXNMGR **));
 int	  txn_prepare __P((DB_TXN *));
 int	  txn_stat __P((DB_TXNMGR *, DB_TXN_STAT **, void *(*)(size_t)));
 int	  txn_unlink __P((const char *, int, DB_ENV *));
@@ -810,10 +870,17 @@ int	  txn_unlink __P((const char *, int, DB_ENV *));
  */
 #define	DBM_SUFFIX	".db"
 
+#if defined(_XPG4_2)
+typedef struct {
+	char *dptr;
+	size_t dsize;
+} datum;
+#else
 typedef struct {
 	char *dptr;
 	int dsize;
 } datum;
+#endif
 
 /*
  * Translate DBM calls into DB calls so that DB doesn't step on the
@@ -894,7 +961,7 @@ typedef enum {
 
 typedef struct entry {
 	char *key;
-	void *data;
+	char *data;
 } ENTRY;
 
 /*
@@ -909,7 +976,7 @@ typedef struct entry {
 #if defined(__cplusplus)
 extern "C" {
 #endif
-int	 __db_hcreate __P((unsigned int));
+int	 __db_hcreate __P((size_t));
 void	 __db_hdestroy __P((void));
 ENTRY	*__db_hsearch __P((ENTRY, ACTION));
 #if defined(__cplusplus)
diff --git a/db2/include/db_185.h.src b/db2/include/db_185.h.src
index a88eb4e525..a928ca8fd5 100644
--- a/db2/include/db_185.h.src
+++ b/db2/include/db_185.h.src
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -36,7 +36,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)db_185.h.src	8.5 (Sleepycat) 1/15/98
+ *	@(#)db_185.h.src	8.7 (Sleepycat) 4/10/98
  */
 
 #ifndef _DB_185_H_
@@ -127,11 +127,11 @@ typedef struct __db {
 /* Structure used to pass parameters to the btree routines. */
 typedef struct {
 #define	R_DUP		0x01	/* duplicate keys */
-	u_long	flags;
-	u_int	cachesize;	/* bytes to cache */
-	int	maxkeypage;	/* maximum keys per page */
-	int	minkeypage;	/* minimum keys per page */
-	u_int	psize;		/* page size */
+	u_int32_t flags;
+	u_int32_t cachesize;	/* bytes to cache */
+	u_int32_t maxkeypage;	/* maximum keys per page */
+	u_int32_t minkeypage;	/* minimum keys per page */
+	u_int32_t psize;	/* page size */
 	int	(*compare)	/* comparison function */
 	    __P((const DBT *, const DBT *));
 	size_t	(*prefix)	/* prefix function */
@@ -144,10 +144,10 @@ typedef struct {
 
 /* Structure used to pass parameters to the hashing routines. */
 typedef struct {
-	u_int	bsize;		/* bucket size */
-	u_int	ffactor;	/* fill factor */
-	u_int	nelem;		/* number of elements */
-	u_int	cachesize;	/* bytes to cache */
+	u_int32_t bsize;	/* bucket size */
+	u_int32_t ffactor;	/* fill factor */
+	u_int32_t nelem;	/* number of elements */
+	u_int32_t cachesize;	/* bytes to cache */
 	u_int32_t		/* hash function */
 		(*hash) __P((const void *, size_t));
 	int	lorder;		/* byte order */
@@ -158,9 +158,9 @@ typedef struct {
 #define	R_FIXEDLEN	0x01	/* fixed-length records */
 #define	R_NOKEY		0x02	/* key not required */
 #define	R_SNAPSHOT	0x04	/* snapshot the input */
-	u_long	flags;
-	u_int	cachesize;	/* bytes to cache */
-	u_int	psize;		/* page size */
+	u_int32_t flags;
+	u_int32_t cachesize;	/* bytes to cache */
+	u_int32_t psize;	/* page size */
 	int	lorder;		/* byte order */
 	size_t	reclen;		/* record length (fixed-length records) */
 	u_char	bval;		/* delimiting byte (variable-length records */
diff --git a/db2/include/db_am.h b/db2/include/db_am.h
index 304e3fd959..0c189244a2 100644
--- a/db2/include/db_am.h
+++ b/db2/include/db_am.h
@@ -1,10 +1,10 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_am.h	10.8 (Sleepycat) 1/8/98
+ *	@(#)db_am.h	10.9 (Sleepycat) 4/10/98
  */
 #ifndef _DB_AM_H
 #define _DB_AM_H
diff --git a/db2/include/db_auto.h b/db2/include/db_auto.h
index 4c7b4da970..1b07c748e8 100644
--- a/db2/include/db_auto.h
+++ b/db2/include/db_auto.h
@@ -114,6 +114,9 @@ typedef struct _db_noop_args {
 	u_int32_t type;
 	DB_TXN *txnid;
 	DB_LSN prev_lsn;
+	u_int32_t	fileid;
+	db_pgno_t	pgno;
+	DB_LSN 	prevlsn;
 } __db_noop_args;
 
 #endif
diff --git a/db2/include/db_cxx.h b/db2/include/db_cxx.h
index 83523c5559..fc04d5d66b 100644
--- a/db2/include/db_cxx.h
+++ b/db2/include/db_cxx.h
@@ -1,10 +1,10 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_cxx.h	10.13 (Sleepycat) 11/25/97
+ *	@(#)db_cxx.h	10.17 (Sleepycat) 5/2/98
  */
 
 #ifndef _DB_CXX_H_
@@ -178,11 +178,11 @@ class _exported DbLock
     friend DbLockTab;
 
 public:
-    DbLock(unsigned int);
+    DbLock(u_int);
     DbLock();
 
-    unsigned int get_lock_id();
-    void set_lock_id(unsigned int);
+    u_int get_lock_id();
+    void set_lock_id(u_int);
 
     int put(DbLockTab *locktab);
 
@@ -202,16 +202,16 @@ class _exported DbLockTab
 friend DbEnv;
 public:
     int close();
-    int detect(int flags, int atype);
-    int get(u_int32_t locker, int flags, const Dbt *obj,
+    int detect(u_int32_t flags, int atype);
+    int get(u_int32_t locker, u_int32_t flags, const Dbt *obj,
             db_lockmode_t lock_mode, DbLock *lock);
     int id(u_int32_t *idp);
-    int vec(u_int32_t locker, int flags, DB_LOCKREQ list[],
+    int vec(u_int32_t locker, u_int32_t flags, DB_LOCKREQ list[],
 	    int nlist, DB_LOCKREQ **elistp);
 
     // Create or remove new locktab files
     //
-    static int open(const char *dir, int flags, int mode,
+    static int open(const char *dir, u_int32_t flags, int mode,
                     DbEnv* dbenv, DbLockTab **regionp);
     static int unlink(const char *dir, int force, DbEnv* dbenv);
 
@@ -252,13 +252,13 @@ class _exported DbLog
 {
 friend DbEnv;
 public:
-    int archive(char **list[], int flags, void *(*db_malloc)(size_t));
+    int archive(char **list[], u_int32_t flags, void *(*db_malloc)(size_t));
     int close();
     static int compare(const DbLsn *lsn0, const DbLsn *lsn1);
     int file(DbLsn *lsn, char *namep, int len);
     int flush(const DbLsn *lsn);
-    int get(DbLsn *lsn, Dbt *data, int flags);
-    int put(DbLsn *lsn, const Dbt *data, int flags);
+    int get(DbLsn *lsn, Dbt *data, u_int32_t flags);
+    int put(DbLsn *lsn, const Dbt *data, u_int32_t flags);
 
     // Normally these would be called register and unregister to
     // parallel the C interface, but "register" is a reserved word.
@@ -268,7 +268,7 @@ public:
 
     // Create or remove new log files
     //
-    static int open(const char *dir, int flags, int mode,
+    static int open(const char *dir, u_int32_t flags, int mode,
                     DbEnv* dbenv, DbLog **regionp);
     static int unlink(const char *dir, int force, DbEnv* dbenv);
 
@@ -300,17 +300,17 @@ private:
 
 class _exported DbMpoolFile
 {
+friend DbEnv;
 public:
     int close();
-    int get(db_pgno_t *pgnoaddr, int flags, void *pagep);
-    int put(void *pgaddr, int flags);
-    int set(void *pgaddr, int flags);
+    int get(db_pgno_t *pgnoaddr, u_int32_t flags, void *pagep);
+    int put(void *pgaddr, u_int32_t flags);
+    int set(void *pgaddr, u_int32_t flags);
     int sync();
 
     static int open(DbMpool *mp, const char *file,
-                    int ftype, int flags, int mode,
-                    size_t pagesize, int lsn_offset,
-                    Dbt *pgcookie, u_int8_t *uid, DbMpoolFile **mpf);
+                    u_int32_t flags, int mode, size_t pagesize,
+                    DB_MPOOL_FINFO *finfop, DbMpoolFile **mpf);
 
 private:
     // We can add data to this class if needed
@@ -356,7 +356,7 @@ public:
 
     // Create or remove new mpool files
     //
-    static int open(const char *dir, int flags, int mode,
+    static int open(const char *dir, u_int32_t flags, int mode,
                     DbEnv* dbenv, DbMpool **regionp);
     static int unlink(const char *dir, int force, DbEnv* dbenv);
 
@@ -391,13 +391,13 @@ class _exported DbTxnMgr
 friend DbEnv;
 public:
     int begin(DbTxn *pid, DbTxn **tid);
-    int checkpoint(int kbyte, int min) const;
+    int checkpoint(u_int32_t kbyte, u_int32_t min) const;
     int close();
     int stat(DB_TXN_STAT **statp, void *(*db_malloc)(size_t));
 
     // Create or remove new txnmgr files
     //
-    static int open(const char *dir, int flags, int mode,
+    static int open(const char *dir, u_int32_t flags, int mode,
                     DbEnv* dbenv, DbTxnMgr **regionp);
     static int unlink(const char *dir, int force, DbEnv* dbenv);
 
@@ -510,12 +510,12 @@ public:
     // Hash access method.
 
     // Fill factor.
-    unsigned int get_h_ffactor() const;
-    void set_h_ffactor(unsigned int);
+    u_int32_t get_h_ffactor() const;
+    void set_h_ffactor(u_int32_t);
 
     // Number of elements.
-    unsigned int get_h_nelem() const;
-    void set_h_nelem(unsigned int);
+    u_int32_t get_h_nelem() const;
+    void set_h_nelem(u_int32_t);
 
     // Hash function.
     typedef u_int32_t (*h_hash_fcn)(const void *, u_int32_t);
@@ -584,7 +584,7 @@ public:
     // application with these arguments.  Do not use it if you
     // need to set other parameters via the access methods.
     //
-    DbEnv(const char *homeDir, char *const *db_config, int flags);
+    DbEnv(const char *homeDir, char *const *db_config, u_int32_t flags);
 
     // Use this constructor if you wish to *delay* the initialization
     // of the db library.  This is useful if you need to set
@@ -596,7 +596,7 @@ public:
     // Used in conjunction with the default constructor to
     // complete the initialization of the db library.
     //
-    int appinit(const char *homeDir, char *const *db_config, int flags);
+    int appinit(const char *homeDir, char *const *db_config, u_int32_t flags);
 
     // Called automatically when DbEnv is destroyed, or can be
     // called at any time to shut down Db.
@@ -673,8 +673,8 @@ public:
     void set_lk_modes(int);
 
     // Maximum number of locks.
-    unsigned int get_lk_max() const;
-    void set_lk_max(unsigned int);
+    u_int32_t get_lk_max() const;
+    void set_lk_max(u_int32_t);
 
     // Deadlock detect on every conflict.
     u_int32_t get_lk_detect() const;
@@ -714,8 +714,8 @@ public:
     DbTxnMgr *get_tx_info() const;
 
     // Maximum number of transactions.
-    unsigned int get_tx_max() const;
-    void set_tx_max(unsigned int);
+    u_int32_t get_tx_max() const;
+    void set_tx_max(u_int32_t);
 
     // Dispatch function for recovery.
     typedef int (*tx_recover_fcn)(DB_LOG *, DBT *, DB_LSN *, int, void *);
@@ -781,18 +781,18 @@ class _exported Db
     friend DbEnv;
 
 public:
-    int close(int flags);
+    int close(u_int32_t flags);
     int cursor(DbTxn *txnid, Dbc **cursorp);
-    int del(DbTxn *txnid, Dbt *key, int flags);
+    int del(DbTxn *txnid, Dbt *key, u_int32_t flags);
     int fd(int *fdp);
-    int get(DbTxn *txnid, Dbt *key, Dbt *data, int flags);
-    int put(DbTxn *txnid, Dbt *key, Dbt *data, int flags);
-    int stat(void *sp, void *(*db_malloc)(size_t), int flags);
-    int sync(int flags);
+    int get(DbTxn *txnid, Dbt *key, Dbt *data, u_int32_t flags);
+    int put(DbTxn *txnid, Dbt *key, Dbt *data, u_int32_t flags);
+    int stat(void *sp, void *(*db_malloc)(size_t), u_int32_t flags);
+    int sync(u_int32_t flags);
 
     DBTYPE get_type() const;
 
-    static int open(const char *fname, DBTYPE type, int flags,
+    static int open(const char *fname, DBTYPE type, u_int32_t flags,
                     int mode, DbEnv *dbenv, DbInfo *info, Db **dbpp);
 
 private:
@@ -867,9 +867,9 @@ class _exported Dbc : protected DBC
 
 public:
     int close();
-    int del(int flags);
-    int get(Dbt* key, Dbt *data, int flags);
-    int put(Dbt* key, Dbt *data, int flags);
+    int del(u_int32_t flags);
+    int get(Dbt* key, Dbt *data, u_int32_t flags);
+    int put(Dbt* key, Dbt *data, u_int32_t flags);
 
 private:
     // No data is permitted in this class (see comment at top)
diff --git a/db2/include/db_dispatch.h b/db2/include/db_dispatch.h
index b93ec39b54..8f5e217402 100644
--- a/db2/include/db_dispatch.h
+++ b/db2/include/db_dispatch.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -36,26 +36,30 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)db_dispatch.h	10.1 (Sleepycat) 4/12/97
+ *	@(#)db_dispatch.h	10.4 (Sleepycat) 5/3/98
  */
 
 #ifndef _DB_DISPATCH_H
 #define _DB_DISPATCH_H
 
+struct __db_txnhead;	typedef struct __db_txnhead DB_TXNHEAD;
+struct __db_txnlist;	typedef struct __db_txnlist DB_TXNLIST;
+
 /*
  * Declarations and typedefs for the list of transaction IDs used during
  * recovery.
  */
-
-typedef struct __db_txnhead {
-	LIST_HEAD(__db_headlink, _db_txnlist) head;
+struct __db_txnhead {
+	LIST_HEAD(__db_headlink, __db_txnlist) head;
 	u_int32_t maxid;
-} __db_txnhead;
+	int32_t generation;
+};
 
-typedef struct _db_txnlist {
-	LIST_ENTRY(_db_txnlist) links;
-	u_int32_t	txnid;
-} __db_txnlist;
+struct __db_txnlist {
+	LIST_ENTRY(__db_txnlist) links;
+	u_int32_t txnid;
+	int32_t	generation;
+};
 
 #define	DB_log_BEGIN		  0
 #define	DB_txn_BEGIN		  5
diff --git a/db2/include/db_ext.h b/db2/include/db_ext.h
index 122d8f13c1..8a03db9f64 100644
--- a/db2/include/db_ext.h
+++ b/db2/include/db_ext.h
@@ -53,7 +53,8 @@ int __db_debug_print
    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __db_debug_read __P((void *, __db_debug_args **));
 int __db_noop_log
-    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t));
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, DB_LSN *));
 int __db_noop_print
    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __db_noop_read __P((void *, __db_noop_args **));
@@ -67,8 +68,9 @@ int __db_add_recovery __P((DB_ENV *,
 int __db_txnlist_init __P((void *));
 int __db_txnlist_add __P((void *, u_int32_t));
 int __db_txnlist_find __P((void *, u_int32_t));
-void __db_txnlist_print __P((void *));
 void __db_txnlist_end __P((void *));
+void __db_txnlist_gen __P((void *, int));
+void __db_txnlist_print __P((void *));
 int __db_dput __P((DB *,
    DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **)));
 int __db_drem __P((DB *,
@@ -83,7 +85,7 @@ int __db_goff __P((DB *, DBT *,
     u_int32_t, db_pgno_t, void **, u_int32_t *));
 int __db_poff __P((DB *, const DBT *, db_pgno_t *,
     int (*)(DB *, u_int32_t, PAGE **)));
-int __db_ovref __P((DB *, db_pgno_t, int));
+int __db_ovref __P((DB *, db_pgno_t, int32_t));
 int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
 int __db_moff __P((DB *, const DBT *, db_pgno_t));
 void __db_loadme __P((void));
@@ -97,7 +99,8 @@ int __db_prnpage __P((DB_MPOOLFILE *, db_pgno_t));
 int __db_prpage __P((PAGE *, int));
 int __db_isbad __P((PAGE *, int));
 void __db_pr __P((u_int8_t *, u_int32_t));
-void __db_prflags __P((u_int32_t, const FN *));
+int __db_prdbt __P((DBT *, int, FILE *));
+void __db_prflags __P((u_int32_t, const FN *, FILE *));
 int __db_addrem_recover
    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __db_split_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
@@ -108,8 +111,7 @@ int __db_relink_recover
 int __db_addpage_recover
    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __db_debug_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
-int __db_noop_recover
-  __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_noop_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __db_ret __P((DB *,
    PAGE *, u_int32_t, DBT *, void **, u_int32_t *));
 int __db_retcopy __P((DBT *,
diff --git a/db2/include/db_int.h.src b/db2/include/db_int.h.src
index 48790d6c9a..d67e2c428c 100644
--- a/db2/include/db_int.h.src
+++ b/db2/include/db_int.h.src
@@ -1,10 +1,10 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_int.h.src	10.41 (Sleepycat) 1/8/98
+ *	@(#)db_int.h.src	10.62 (Sleepycat) 5/23/98
  */
 
 #ifndef _DB_INTERNAL_H_
@@ -12,8 +12,6 @@
 
 #include "db.h"				/* Standard DB include file. */
 #include "queue.h"
-#include "os_func.h"
-#include "os_ext.h"
 
 /*******************************************************
  * General purpose constants and macros.
@@ -77,8 +75,8 @@
 #define	R_ADDR(base, offset)	((void *)((u_int8_t *)((base)->addr) + offset))
 #define	R_OFFSET(base, p)	((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
 
-/* Free and free-string macros that overwrite memory during debugging. */
-#ifdef DEBUG
+/* Free and free-string macros that overwrite memory. */
+#ifdef DIAGNOSTIC
 #undef	FREE
 #define	FREE(p, len) {							\
 	memset(p, 0xff, len);						\
@@ -117,36 +115,41 @@ typedef struct __fn {
 #undef	DB_LINE
 #define	DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
 
+/* Global variables. */
+typedef struct __db_globals {
+	int db_mutexlocks;		/* DB_MUTEXLOCKS */
+	int db_region_anon;		/* DB_REGION_ANON, DB_REGION_NAME */
+	int db_region_init;		/* DB_REGION_INIT */
+	int db_tsl_spins;		/* DB_TSL_SPINS */
+	int db_pageyield;		/* DB_PAGEYIELD */
+} DB_GLOBALS;
+extern	DB_GLOBALS	__db_global_values;
+#define	DB_GLOBAL(v)	__db_global_values.v
+
 /* Unused, or not-used-yet variable.  "Shut that bloody compiler up!" */
 #define	COMPQUIET(n, v)	(n) = (v)
 
+/*
+ * Win16 needs specific syntax on callback functions.  Nobody else cares.
+ */
+#ifndef	DB_CALLBACK
+#define	DB_CALLBACK	/* Nothing. */
+#endif
+
 /*******************************************************
  * Files.
  *******************************************************/
-#ifndef MAXPATHLEN		/* Maximum path length. */
-#ifdef PATH_MAX
-#define	MAXPATHLEN	PATH_MAX
-#else
+ /*
+  * We use 1024 as the maximum path length.  It's too hard to figure out what
+  * the real path length is, as it was traditionally stored in <sys/param.h>,
+  * and that file isn't always available.
+  */
+#undef	MAXPATHLEN
 #define	MAXPATHLEN	1024
-#endif
-#endif
 
 #define	PATH_DOT	"."	/* Current working directory. */
 #define	PATH_SEPARATOR	"/"	/* Path separator character. */
 
-#ifndef S_IRUSR			/* UNIX specific file permissions. */
-#define	S_IRUSR	0000400		/* R for owner */
-#define	S_IWUSR	0000200		/* W for owner */
-#define	S_IRGRP	0000040		/* R for group */
-#define	S_IWGRP	0000020		/* W for group */
-#define	S_IROTH	0000004		/* R for other */
-#define	S_IWOTH	0000002		/* W for other */
-#endif
-
-#ifndef S_ISDIR			/* UNIX specific: directory test. */
-#define	S_ISDIR(m)	((m & 0170000) == 0040000)
-#endif
-
 /*******************************************************
  * Mutex support.
  *******************************************************/
@@ -176,12 +179,12 @@ typedef struct __fn {
 typedef struct _db_mutex_t {
 #ifdef HAVE_SPINLOCKS
 	tsl_t	  tsl_resource;		/* Resource test and set. */
-#ifdef DEBUG
-	u_long	  pid;			/* Lock holder: 0 or process pid. */
+#ifdef DIAGNOSTIC
+	u_int32_t pid;			/* Lock holder: 0 or process pid. */
 #endif
 #else
 	u_int32_t off;			/* Backing file offset. */
-	u_long	  pid;			/* Lock holder: 0 or process pid. */
+	u_int32_t pid;			/* Lock holder: 0 or process pid. */
 #endif
 	u_int32_t spins;		/* Spins before block. */
 	u_int32_t mutex_set_wait;	/* Granted after wait. */
@@ -195,11 +198,11 @@ typedef struct _db_mutex_t {
  *******************************************************/
 /* Lock/unlock a DB thread. */
 #define	DB_THREAD_LOCK(dbp)						\
-	(F_ISSET(dbp, DB_AM_THREAD) ?					\
-	    __db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1) : 0)
+	if (F_ISSET(dbp, DB_AM_THREAD))					\
+	    (void)__db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1);
 #define	DB_THREAD_UNLOCK(dbp)						\
-	(F_ISSET(dbp, DB_AM_THREAD) ?					\
-	    __db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1) : 0)
+	if (F_ISSET(dbp, DB_AM_THREAD))					\
+	    (void)__db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1);
 
 /* Btree/recno local statistics structure. */
 struct __db_bt_lstat;	typedef struct __db_bt_lstat DB_BTREE_LSTAT;
@@ -228,7 +231,7 @@ typedef enum {
 } APPNAME;
 
 /*******************************************************
- * Regions.
+ * Shared memory regions.
  *******************************************************/
 /*
  * The shared memory regions share an initial structure so that the general
@@ -240,16 +243,69 @@ typedef enum {
  */
 typedef struct _rlayout {
 	db_mutex_t lock;		/* Region mutex. */
+#define	DB_REGIONMAGIC	0x120897
+	u_int32_t  valid;		/* Valid magic number. */
 	u_int32_t  refcnt;		/* Region reference count. */
 	size_t	   size;		/* Region length. */
 	int	   majver;		/* Major version number. */
 	int	   minver;		/* Minor version number. */
 	int	   patch;		/* Patch version number. */
+#define	INVALID_SEGID	-1
+	int	   segid;		/* shmget(2) ID, or Win16 segment ID. */
 
-#define	DB_R_DELETED	0x01		/* Region was deleted. */
+#define	REGION_ANONYMOUS	0x01	/* Region is/should be in anon mem. */
 	u_int32_t  flags;
 } RLAYOUT;
 
+/*
+ * DB creates all regions on 4K boundaries out of sheer paranoia, so that
+ * we don't make the underlying VM unhappy.
+ */
+#define	DB_VMPAGESIZE	(4 * 1024)
+#define	DB_ROUNDOFF(i) {						\
+	(i) += DB_VMPAGESIZE - 1;					\
+	(i) -= (i) % DB_VMPAGESIZE;					\
+}
+
+/*
+ * The interface to region attach is nasty, there is a lot of complex stuff
+ * going on, which has to be retained between create/attach and detach.  The
+ * REGINFO structure keeps track of it.
+ */
+struct __db_reginfo;	typedef struct __db_reginfo REGINFO;
+struct __db_reginfo {
+					/* Arguments. */
+	DB_ENV	   *dbenv;		/* Region naming info. */
+	APPNAME	    appname;		/* Region naming info. */
+	char	   *path;		/* Region naming info. */
+	const char *file;		/* Region naming info. */
+	int	    mode;		/* Region mode, if a file. */
+	size_t	    size;		/* Region size. */
+	u_int32_t   dbflags;		/* Region file open flags, if a file. */
+
+					/* Results. */
+	char	   *name;		/* Region name. */
+	void	   *addr;		/* Region address. */
+	int	    fd;			/* Fcntl(2) locking file descriptor.
+					   NB: this is only valid if a regular
+					   file is backing the shared region,
+					   and mmap(2) is being used to map it
+					   into our address space. */
+	int	    segid;		/* shmget(2) ID, or Win16 segment ID. */
+
+					/* Shared flags. */
+/*				0x0001	COMMON MASK with RLAYOUT structure. */
+#define	REGION_CANGROW		0x0002	/* Can grow. */
+#define	REGION_CREATED		0x0004	/* Created. */
+#define	REGION_HOLDINGSYS	0x0008	/* Holding system resources. */
+#define	REGION_LASTDETACH	0x0010	/* Delete on last detach. */
+#define	REGION_MALLOC		0x0020	/* Created in malloc'd memory. */
+#define	REGION_PRIVATE		0x0040	/* Private to thread/process. */
+#define	REGION_REMOVED		0x0080	/* Already deleted. */
+#define	REGION_SIZEDEF		0x0100	/* Use default region size if exists. */
+	u_int32_t   flags;
+};
+
 /*******************************************************
  * Mpool.
  *******************************************************/
@@ -281,7 +337,7 @@ typedef struct __dbpginfo {
 #define	DB_LOGGING(dbp)							\
 	(F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER))
 
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 /*
  * Debugging macro to log operations.
  *	If DEBUG_WOP is defined, log operations that modify the database.
@@ -318,7 +374,7 @@ typedef struct __dbpginfo {
 #else
 #define	DEBUG_LREAD(D, T, O, K, A, F)
 #define	DEBUG_LWRITE(D, T, O, K, A, F)
-#endif /* DEBUG */
+#endif /* DIAGNOSTIC */
 
 /*******************************************************
  * Transactions and recovery.
@@ -339,4 +395,8 @@ struct __db_txn {
 	size_t		off;		/* Detail structure within region. */
 	TAILQ_ENTRY(__db_txn) links;
 };
+
+#include "os_func.h"
+#include "os_ext.h"
+
 #endif /* !_DB_INTERNAL_H_ */
diff --git a/db2/include/db_page.h b/db2/include/db_page.h
index 30f6072fc3..e1846cbbbd 100644
--- a/db2/include/db_page.h
+++ b/db2/include/db_page.h
@@ -1,10 +1,10 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_page.h	10.13 (Sleepycat) 9/24/97
+ *	@(#)db_page.h	10.15 (Sleepycat) 5/1/98
  */
 
 #ifndef _DB_PAGE_H_
@@ -29,6 +29,14 @@
 #define	PGNO_INVALID	0	/* Metadata page number, therefore illegal. */
 #define	PGNO_ROOT	1	/* Root is page #1. */
 
+/*
+ * When we create pages in mpool, we ask mpool to clear some number of bytes
+ * in the header.  This number must be at least as big as the regular page
+ * headers and cover enough of the btree and hash meta-data pages to obliterate
+ * the magic and version numbers.
+ */
+#define	DB_PAGE_CLEAR_LEN	32
+
 /************************************************************************
  BTREE METADATA PAGE LAYOUT
  ************************************************************************/
diff --git a/db2/include/db_shash.h b/db2/include/db_shash.h
index b94e0f1d41..35ade395fc 100644
--- a/db2/include/db_shash.h
+++ b/db2/include/db_shash.h
@@ -1,10 +1,10 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_shash.h	10.2 (Sleepycat) 9/16/97
+ *	@(#)db_shash.h	10.3 (Sleepycat) 4/10/98
  */
 
 /* Hash Headers */
diff --git a/db2/include/db_swap.h b/db2/include/db_swap.h
index 278282f5e4..9f94ed721b 100644
--- a/db2/include/db_swap.h
+++ b/db2/include/db_swap.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -36,7 +36,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)db_swap.h	10.3 (Sleepycat) 6/10/97
+ *	@(#)db_swap.h	10.5 (Sleepycat) 4/10/98
  */
 
 #ifndef _DB_SWAP_H_
@@ -74,7 +74,7 @@
 /*
  * Little endian <==> big endian 16-bit swap macros.
  *	M_16_SWAP	swap a memory location
- *	P_16_COPY	copy potentially unaligned  from one location to another
+ *	P_16_COPY	copy potentially unaligned 2 byte quantities
  *	P_16_SWAP	swap a referenced memory location
  */
 #define	M_16_SWAP(a) {							\
diff --git a/db2/include/hash.h b/db2/include/hash.h
index ae6d3843c6..e55c2102cb 100644
--- a/db2/include/hash.h
+++ b/db2/include/hash.h
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -43,7 +43,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)hash.h	10.7 (Sleepycat) 11/1/97
+ *	@(#)hash.h	10.8 (Sleepycat) 4/10/98
  */
 
 /* Cursor structure definitions. */
diff --git a/db2/include/hash_ext.h b/db2/include/hash_ext.h
index 9b97d35a42..7086adcc44 100644
--- a/db2/include/hash_ext.h
+++ b/db2/include/hash_ext.h
@@ -2,7 +2,7 @@
 #ifndef _hash_ext_h_
 #define _hash_ext_h_
 int __ham_open __P((DB *, DB_INFO *));
-int  __ham_close __P((DB *));
+int __ham_close __P((DB *));
 int __ham_c_iclose __P((DB *, DBC *));
 int __ham_expand_table __P((HTAB *));
 u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
@@ -75,7 +75,7 @@ int __ham_mswap __P((void *));
 #ifdef DEBUG
 void __ham_dump_bucket __P((HTAB *, u_int32_t));
 #endif
-int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int));
+int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
 void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t));
 u_int32_t __ham_func2 __P((const void *, u_int32_t));
 u_int32_t __ham_func3 __P((const void *, u_int32_t));
@@ -90,14 +90,16 @@ int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
 int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
 int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
 void __ham_putitem __P((PAGE *p, const DBT *, int));
+void __ham_reputpair
+   __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *));
 int __ham_del_pair __P((HTAB *, HASH_CURSOR *, int));
 int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
 void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
     int32_t,  DBT *));
 int __ham_split_page __P((HTAB *, u_int32_t, u_int32_t));
-int __ham_add_el __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *,
-    int));
-void __ham_copy_item __P((HTAB *, PAGE *, int, PAGE *));
+int __ham_add_el
+   __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *, int));
+void __ham_copy_item __P((HTAB *, PAGE *, u_int32_t, PAGE *));
 int __ham_add_ovflpage __P((HTAB *, PAGE *, int, PAGE **));
 int __ham_new_page __P((HTAB *, u_int32_t, u_int32_t, PAGE **));
 int __ham_del_page __P((DB *, PAGE *));
@@ -106,12 +108,12 @@ int __ham_dirty_page __P((HTAB *, PAGE *));
 int __ham_get_page __P((DB *, db_pgno_t, PAGE **));
 int __ham_overflow_page __P((DB *, u_int32_t, PAGE **));
 #ifdef DEBUG
-int __bucket_to_page __P((HTAB *, int));
+db_pgno_t __bucket_to_page __P((HTAB *, db_pgno_t));
 #endif
 void __ham_init_ovflpages __P((HTAB *));
 int __ham_get_cpage __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
-int __ham_next_cpage __P((HTAB *, HASH_CURSOR *, db_pgno_t,
-    int, int));
+int __ham_next_cpage
+   __P((HTAB *, HASH_CURSOR *, db_pgno_t, int, u_int32_t));
 void __ham_dpair __P((DB *, PAGE *, u_int32_t));
 int __ham_insdel_recover
     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
diff --git a/db2/include/lock.h b/db2/include/lock.h
index 5031b65d06..47a38b8783 100644
--- a/db2/include/lock.h
+++ b/db2/include/lock.h
@@ -1,16 +1,19 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)lock.h	10.10 (Sleepycat) 11/13/97
+ *	@(#)lock.h	10.15 (Sleepycat) 5/10/98
  */
 
 typedef struct __db_lockobj	DB_LOCKOBJ;
 
 #define DB_DEFAULT_LOCK_FILE	"__db_lock.share"
-#define DB_LOCK_DEFAULT_N	5000
+
+#ifndef DB_LOCK_DEFAULT_N
+#define DB_LOCK_DEFAULT_N	5000	/* Default # of locks in region. */
+#endif
 
 /*
  * The locker id space is divided between the transaction manager and the lock
@@ -54,9 +57,9 @@ struct __db_lockregion {
 
 /* Macros to lock/unlock the region. */
 #define	LOCK_LOCKREGION(lt)						\
-	(void)__db_mutex_lock(&(lt)->region->hdr.lock, (lt)->fd)
+	(void)__db_mutex_lock(&(lt)->region->hdr.lock, (lt)->reginfo.fd)
 #define	UNLOCK_LOCKREGION(lt)						\
-	(void)__db_mutex_unlock(&(lt)->region->hdr.lock, (lt)->fd)
+	(void)__db_mutex_unlock(&(lt)->region->hdr.lock, (lt)->reginfo.fd)
 
 /*
  * Since we will be keeping DBTs in shared memory, we need the equivalent
@@ -69,9 +72,6 @@ typedef struct __sh_dbt {
 
 #define SH_DBT_PTR(p)	((void *)(((u_int8_t *)(p)) + (p)->off))
 
-/*
- * The lock table is the per-process cookie returned from a lock_open call.
- */
 struct __db_lockobj {
 	SH_DBT	lockobj;		/* Identifies object locked. */
 	SH_TAILQ_ENTRY links;		/* Links for free list. */
@@ -98,12 +98,14 @@ struct __db_lockobj {
 #define	holders	dlinks._holders
 #define	heldby	dlinks._heldby
 
+/*
+ * The lock table is the per-process cookie returned from a lock_open call.
+ */
 struct __db_locktab {
 	DB_ENV		*dbenv;		/* Environment. */
-	int		 fd;		/* mapped file descriptor */
-	DB_LOCKREGION	*region;	/* address of shared memory region */
+	REGINFO		 reginfo;	/* Region information. */
+	DB_LOCKREGION	*region;	/* Address of shared memory region. */
 	DB_HASHTAB 	*hashtab; 	/* Beginning of hash table. */
-	size_t		reg_size;	/* last known size of lock region */
 	void		*mem;		/* Beginning of string space. */
 	u_int8_t 	*conflicts;	/* Pointer to conflict matrix. */
 };
@@ -113,21 +115,6 @@ struct __db_locktab {
 	T->conflicts[HELD * T->region->nmodes + WANTED]
 
 /*
- * Status of a lock.
- */
-typedef enum {
-	DB_LSTAT_ABORTED,		/* Lock belongs to an aborted txn. */
-	DB_LSTAT_ERR,			/* Lock is bad. */
-	DB_LSTAT_FREE,			/* Lock is unallocated. */
-	DB_LSTAT_HELD,			/* Lock is currently held. */
-	DB_LSTAT_NOGRANT,		/* Lock was not granted. */
-	DB_LSTAT_PENDING,		/* Lock was waiting and has been
-					 * promoted; waiting for the owner
-					 * to run and upgrade it to held. */
-	DB_LSTAT_WAITING		/* Lock is on the wait queue. */
-} db_status_t;
-
-/*
  * Resources in the lock region.  Used to indicate which resource
  * is running low when we need to grow the region.
  */
@@ -187,17 +174,4 @@ struct __db_lock {
 	ALIGN((N) * sizeof(DB_LOCKOBJ), sizeof(size_t)) +		\
 	ALIGN(STRING_SIZE(N), sizeof(size_t)))
 
-#ifdef DEBUG
-#define	LOCK_DEBUG_LOCKERS	0x0001
-#define	LOCK_DEBUG_LOCK	 	0x0002
-#define	LOCK_DEBUG_OBJ	 	0x0004
-#define	LOCK_DEBUG_CONF	 	0x0008
-#define	LOCK_DEBUG_MEM	 	0x0010
-#define	LOCK_DEBUG_BUCKET	0x0020
-#define LOCK_DEBUG_OBJECTS	0x0040
-#define	LOCK_DEBUG_ALL	 	0xFFFF
-
-#define	LOCK_DEBUG_NOMUTEX	0x0100
-#endif
-
 #include "lock_ext.h"
diff --git a/db2/include/lock_ext.h b/db2/include/lock_ext.h
index d983b29069..1e0522c6b5 100644
--- a/db2/include/lock_ext.h
+++ b/db2/include/lock_ext.h
@@ -1,14 +1,17 @@
 /* DO NOT EDIT: automatically built by dist/distrib. */
 #ifndef _lock_ext_h_
 #define _lock_ext_h_
-void __lock_dump_region __P((DB_LOCKTAB *, u_int));
 int __lock_is_locked
    __P((DB_LOCKTAB *, u_int32_t, DBT *, db_lockmode_t));
+void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int));
 int __lock_getobj  __P((DB_LOCKTAB *,
     u_int32_t, const DBT *, u_int32_t type, DB_LOCKOBJ **));
+int __lock_validate_region __P((DB_LOCKTAB *));
+int __lock_grow_region __P((DB_LOCKTAB *, int, size_t));
+void __lock_dump_region __P((DB_LOCKTAB *, char *, FILE *));
 int __lock_cmp __P((const DBT *, DB_LOCKOBJ *));
 int __lock_locker_cmp __P((u_int32_t, DB_LOCKOBJ *));
-int __lock_ohash __P((const DBT *));
-u_int32_t __lock_locker_hash __P((u_int32_t));
+u_int32_t __lock_ohash __P((const DBT *));
 u_int32_t __lock_lhash __P((DB_LOCKOBJ *));
+u_int32_t __lock_locker_hash __P((u_int32_t));
 #endif /* _lock_ext_h_ */
diff --git a/db2/include/log.h b/db2/include/log.h
index 4e27b038d3..7d5161cc9d 100644
--- a/db2/include/log.h
+++ b/db2/include/log.h
@@ -1,10 +1,10 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)log.h	10.19 (Sleepycat) 1/17/98
+ *	@(#)log.h	10.25 (Sleepycat) 4/10/98
  */
 
 #ifndef _LOG_H_
@@ -15,9 +15,10 @@ struct __hdr;		typedef struct __hdr HDR;
 struct __log;		typedef struct __log LOG;
 struct __log_persist;	typedef struct __log_persist LOGP;
 
+#ifndef MAXLFNAME
 #define	MAXLFNAME	99999		/* Maximum log file name. */
 #define	LFNAME		"log.%05d"	/* Log file name template. */
-
+#endif
 					/* Default log name. */
 #define DB_DEFAULT_LOG_FILE	"__db_log.share"
 
@@ -31,17 +32,19 @@ struct __log_persist;	typedef struct __log_persist LOGP;
 	if (F_ISSET(dblp, DB_AM_THREAD))				\
 		(void)__db_mutex_unlock((dblp)->mutexp, -1);
 #define	LOCK_LOGREGION(dblp)						\
-	(void)__db_mutex_lock(&((RLAYOUT *)(dblp)->lp)->lock, (dblp)->fd)
+	(void)__db_mutex_lock(&((RLAYOUT *)(dblp)->lp)->lock,		\
+	    (dblp)->reginfo.fd)
 #define	UNLOCK_LOGREGION(dblp)						\
-	(void)__db_mutex_unlock(&((RLAYOUT *)(dblp)->lp)->lock, (dblp)->fd)
+	(void)__db_mutex_unlock(&((RLAYOUT *)(dblp)->lp)->lock,		\
+	    (dblp)->reginfo.fd)
 
 /*
  * The per-process table that maps log file-id's to DB structures.
  */
 typedef	struct __db_entry {
-	DB	*dbp;			/* Associated DB structure. */
-	int	refcount;		/* Reference counted. */
-	int	deleted;		/* File was not found during open. */
+	DB	 *dbp;			/* Associated DB structure. */
+	u_int32_t refcount;		/* Reference counted. */
+	int	  deleted;		/* File was not found during open. */
 } DB_ENTRY;
 
 /*
@@ -75,10 +78,9 @@ struct __db_log {
 	LOG	 *lp;			/* Address of the shared LOG. */
 
 	DB_ENV	 *dbenv;		/* Reference to error information. */
+	REGINFO	  reginfo;		/* Region information. */
 
-	void     *maddr;		/* Address of mmap'd region. */
 	void     *addr;			/* Address of shalloc() region. */
-	int	  fd;			/* Region file descriptor. */
 
 	char	 *dir;			/* Directory argument. */
 
@@ -131,7 +133,7 @@ struct __log {
 
 	u_int32_t w_off;		/* Current write offset in the file. */
 
-	DB_LSN	  c_lsn;		/* LSN of the last checkpoint. */
+	DB_LSN	  chkpt_lsn;		/* LSN of the last checkpoint. */
 	time_t	  chkpt;		/* Time of the last checkpoint. */
 
 	DB_LOG_STAT stat;		/* Log statistics. */
@@ -159,9 +161,8 @@ struct __fname {
 	u_int32_t id;			/* Logging file id. */
 	DBTYPE	  s_type;		/* Saved DB type. */
 
-	u_int32_t fileid_off;		/* Unique file id offset. */
-
 	size_t	  name_off;		/* Name offset. */
+	u_int8_t  ufid[DB_FILE_ID_LEN];	/* Unique file id. */
 };
 
 /* File open/close register log record opcodes. */
diff --git a/db2/include/log_ext.h b/db2/include/log_ext.h
index 8640b134cd..bf3bcb02ce 100644
--- a/db2/include/log_ext.h
+++ b/db2/include/log_ext.h
@@ -13,8 +13,8 @@ int __log_register_read __P((void *, __log_register_args **));
 int __log_init_print __P((DB_ENV *));
 int __log_init_recover __P((DB_ENV *));
 int __log_findckp __P((DB_LOG *, DB_LSN *));
-int __log_get __P((DB_LOG *, DB_LSN *, DBT *, int, int));
-int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
+int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int));
+int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
 int __log_name __P((DB_LOG *, int, char **));
 int __log_register_recover
     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
diff --git a/db2/include/mp.h b/db2/include/mp.h
index 4efbf9b95e..8635efa722 100644
--- a/db2/include/mp.h
+++ b/db2/include/mp.h
@@ -1,10 +1,10 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)mp.h	10.25 (Sleepycat) 1/8/98
+ *	@(#)mp.h	10.33 (Sleepycat) 5/4/98
  */
 
 struct __bh;		typedef struct __bh BH;
@@ -16,10 +16,12 @@ struct __mpoolfile;	typedef struct __mpoolfile MPOOLFILE;
 #define	DB_DEFAULT_MPOOL_FILE	"__db_mpool.share"
 
 /*
- *  We default to 128K (16 8K pages) if the user doesn't specify, and
+ * We default to 128K (16 8K pages) if the user doesn't specify, and
  * require a minimum of 20K.
  */
+#ifndef	DB_CACHESIZE_DEF
 #define	DB_CACHESIZE_DEF	(128 * 1024)
+#endif
 #define	DB_CACHESIZE_MIN	( 20 * 1024)
 
 #define	INVALID		0		/* Invalid shared memory offset. */
@@ -79,30 +81,30 @@ struct __mpoolfile;	typedef struct __mpoolfile MPOOLFILE;
 #define	LOCKINIT(dbmp, mutexp)						\
 	if (F_ISSET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION))		\
 		(void)__db_mutex_init(mutexp,				\
-		    MUTEX_LOCK_OFFSET((dbmp)->maddr, mutexp))
+		    MUTEX_LOCK_OFFSET((dbmp)->reginfo.addr, mutexp))
 
 #define	LOCKHANDLE(dbmp, mutexp)					\
 	if (F_ISSET(dbmp, MP_LOCKHANDLE))				\
-		(void)__db_mutex_lock(mutexp, (dbmp)->fd)
+		(void)__db_mutex_lock(mutexp, (dbmp)->reginfo.fd)
 #define	UNLOCKHANDLE(dbmp, mutexp)					\
 	if (F_ISSET(dbmp, MP_LOCKHANDLE))				\
-		(void)__db_mutex_unlock(mutexp, (dbmp)->fd)
+		(void)__db_mutex_unlock(mutexp, (dbmp)->reginfo.fd)
 
 #define	LOCKREGION(dbmp)						\
 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
 		(void)__db_mutex_lock(&((RLAYOUT *)(dbmp)->mp)->lock,	\
-		    (dbmp)->fd)
+		    (dbmp)->reginfo.fd)
 #define	UNLOCKREGION(dbmp)						\
 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
 		(void)__db_mutex_unlock(&((RLAYOUT *)(dbmp)->mp)->lock,	\
-		(dbmp)->fd)
+		(dbmp)->reginfo.fd)
 
 #define	LOCKBUFFER(dbmp, bhp)						\
 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
-		(void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->fd)
+		(void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->reginfo.fd)
 #define	UNLOCKBUFFER(dbmp, bhp)						\
 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
-		(void)__db_mutex_unlock(&(bhp)->mutex, (dbmp)->fd)
+		(void)__db_mutex_unlock(&(bhp)->mutex, (dbmp)->reginfo.fd)
 
 /*
  * DB_MPOOL --
@@ -120,20 +122,16 @@ struct __db_mpool {
 
 /* These fields are not protected. */
 	DB_ENV     *dbenv;		/* Reference to error information. */
+	REGINFO	    reginfo;		/* Region information. */
 
 	MPOOL	   *mp;			/* Address of the shared MPOOL. */
 
-	void	   *maddr;		/* Address of mmap'd region. */
 	void	   *addr;		/* Address of shalloc() region. */
 
 	DB_HASHTAB *htab;		/* Hash table of bucket headers. */
 
-	int	    fd;			/* Underlying mmap'd fd. */
-
-#define	MP_ISPRIVATE	0x01		/* Private, so local memory. */
-#define	MP_LOCKHANDLE	0x02		/* Threaded, lock handles and region. */
-#define	MP_LOCKREGION	0x04		/* Concurrent access, lock region. */
-#define	MP_MALLOC	0x08		/* If region in allocated memory. */
+#define	MP_LOCKHANDLE	0x01		/* Threaded, lock handles and region. */
+#define	MP_LOCKREGION	0x02		/* Concurrent access, lock region. */
 	u_int32_t  flags;
 };
 
@@ -146,8 +144,8 @@ struct __db_mpreg {
 
 	int ftype;			/* File type. */
 					/* Pgin, pgout routines. */
-	int (*pgin) __P((db_pgno_t, void *, DBT *));
-	int (*pgout) __P((db_pgno_t, void *, DBT *));
+	int (DB_CALLBACK *pgin) __P((db_pgno_t, void *, DBT *));
+	int (DB_CALLBACK *pgout) __P((db_pgno_t, void *, DBT *));
 };
 
 /*
@@ -207,7 +205,7 @@ struct __mpool {
 	size_t	    htab_buckets;	/* Number of hash table entries. */
 
 	DB_LSN	    lsn;		/* Maximum checkpoint LSN. */
-	int	    lsn_cnt;		/* Checkpoint buffers left to write. */
+	u_int32_t   lsn_cnt;		/* Checkpoint buffers left to write. */
 
 	DB_MPOOL_STAT stat;		/* Global mpool statistics. */
 
@@ -225,7 +223,9 @@ struct __mpoolfile {
 	u_int32_t ref;			/* Reference count. */
 
 	int	  ftype;		/* File type. */
-	int	  lsn_off;		/* Page's LSN offset. */
+
+	int32_t	  lsn_off;		/* Page's LSN offset. */
+	u_int32_t clear_len;		/* Bytes to clear on page create. */
 
 	size_t	  path_off;		/* File name location. */
 	size_t	  fileid_off;		/* File identification location. */
@@ -233,9 +233,10 @@ struct __mpoolfile {
 	size_t	  pgcookie_len;		/* Pgin/pgout cookie length. */
 	size_t	  pgcookie_off;		/* Pgin/pgout cookie location. */
 
-	int	  lsn_cnt;		/* Checkpoint buffers left to write. */
+	u_int32_t lsn_cnt;		/* Checkpoint buffers left to write. */
 
 	db_pgno_t last_pgno;		/* Last page in the file. */
+	db_pgno_t orig_last_pgno;	/* Original last page in the file. */
 
 #define	MP_CAN_MMAP	0x01		/* If the file can be mmap'd. */
 #define	MP_TEMP		0x02		/* Backing file is a temporary. */
diff --git a/db2/include/mp_ext.h b/db2/include/mp_ext.h
index 1928820637..3650839475 100644
--- a/db2/include/mp_ext.h
+++ b/db2/include/mp_ext.h
@@ -7,13 +7,13 @@ int __memp_pgread __P((DB_MPOOLFILE *, BH *, int));
 int __memp_pgwrite __P((DB_MPOOLFILE *, BH *, int *, int *));
 int __memp_pg __P((DB_MPOOLFILE *, BH *, int));
 void __memp_bhfree __P((DB_MPOOL *, MPOOLFILE *, BH *, int));
-int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, int,
-   int, int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
+int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *,
+   u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
 char * __memp_fn __P((DB_MPOOLFILE *));
 char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *));
-void __memp_debug __P((DB_MPOOL *, FILE *, int));
+void __memp_dump_region __P((DB_MPOOL *, char *, FILE *));
 int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *));
 int __memp_ropen
-   __P((DB_MPOOL *, const char *, size_t, int, int));
-int __memp_rclose __P((DB_MPOOL *));
+   __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
+int __mp_xxx_fd __P((DB_MPOOLFILE *, int *));
 #endif /* _mp_ext_h_ */
diff --git a/db2/include/mutex_ext.h b/db2/include/mutex_ext.h
index f0e68f3659..b48da5d2f4 100644
--- a/db2/include/mutex_ext.h
+++ b/db2/include/mutex_ext.h
@@ -1,7 +1,7 @@
 /* DO NOT EDIT: automatically built by dist/distrib. */
 #ifndef _mutex_ext_h_
 #define _mutex_ext_h_
-void __db_mutex_init __P((db_mutex_t *, u_int32_t));
+int __db_mutex_init __P((db_mutex_t *, u_int32_t));
 int __db_mutex_lock __P((db_mutex_t *, int));
 int __db_mutex_unlock __P((db_mutex_t *, int));
 #endif /* _mutex_ext_h_ */
diff --git a/db2/include/os_ext.h b/db2/include/os_ext.h
index 9c66a248c8..889a45a44e 100644
--- a/db2/include/os_ext.h
+++ b/db2/include/os_ext.h
@@ -2,23 +2,29 @@
 #ifndef _os_ext_h_
 #define _os_ext_h_
 int __db_abspath __P((const char *));
+char *__db_strdup __P((const char *));
 void *__db_calloc __P((size_t, size_t));
 void *__db_malloc __P((size_t));
 void *__db_realloc __P((void *, size_t));
-int __os_oldwin __P((void));
 int __os_dirlist __P((const char *, char ***, int *));
 void __os_dirfree __P((char **, int));
 int __db_fileid __P((DB_ENV *, const char *, int, u_int8_t *));
 int __db_fsync __P((int));
-int __os_map __P((int, size_t, int, int, void **));
-int __os_unmap __P((void *, size_t));
-int __db_oflags __P((int));
-int __db_open __P((const char *, int, int, int, int *));
+int __db_mapanon_ok __P((int));
+int __db_mapinit __P((void));
+int __db_mapregion __P((char *, REGINFO *));
+int __db_unmapregion __P((REGINFO *));
+int __db_unlinkregion __P((char *, REGINFO *));
+int __db_mapfile __P((char *, int, size_t, int, void **));
+int __db_unmapfile __P((void *, size_t));
+u_int32_t __db_oflags __P((int));
+int __db_omode __P((const char *));
+int __db_open __P((const char *, u_int32_t, u_int32_t, int, int *));
 int __db_close __P((int));
 char *__db_rpath __P((const char *));
 int __db_read __P((int, void *, size_t, ssize_t *));
 int __db_write __P((int, void *, size_t, ssize_t *));
-int __os_seek __P((int, size_t, db_pgno_t, u_long, int));
+int __os_seek __P((int, size_t, db_pgno_t, u_int32_t, int, int));
 int __os_sleep __P((u_long, u_long));
 int __os_spin __P((void));
 int __os_exists __P((const char *, int *));
diff --git a/db2/include/os_func.h b/db2/include/os_func.h
index b825fed5db..12794d550d 100644
--- a/db2/include/os_func.h
+++ b/db2/include/os_func.h
@@ -1,40 +1,40 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)os_func.h	10.5 (Sleepycat) 12/4/97
+ *	@(#)os_func.h	10.8 (Sleepycat) 4/19/98
  */
 
 /* Calls which can be replaced by the application. */
 struct __db_jumptab {
-	int	(*db_close) __P((int));			/* DB_FUNC_CLOSE */
-	void	(*db_dirfree) __P((char **, int));	/* DB_FUNC_DIRFREE */
-	int	(*db_dirlist)				/* DB_FUNC_DIRLIST */
+	int	(*j_close) __P((int));			/* DB_FUNC_CLOSE */
+	void	(*j_dirfree) __P((char **, int));	/* DB_FUNC_DIRFREE */
+	int	(*j_dirlist)				/* DB_FUNC_DIRLIST */
 		    __P((const char *, char ***, int *));
-	int	(*db_exists)				/* DB_FUNC_EXISTS */
+	int	(*j_exists)				/* DB_FUNC_EXISTS */
 		    __P((const char *, int *));
-	void	(*db_free) __P((void *));		/* DB_FUNC_FREE */
-	int	(*db_fsync) __P((int));			/* DB_FUNC_FSYNC */
-	int	(*db_ioinfo) __P((const char *,		/* DB_FUNC_IOINFO */
+	void	(*j_free) __P((void *));		/* DB_FUNC_FREE */
+	int	(*j_fsync) __P((int));			/* DB_FUNC_FSYNC */
+	int	(*j_ioinfo) __P((const char *,		/* DB_FUNC_IOINFO */
 		    int, u_int32_t *, u_int32_t *, u_int32_t *));
-	void   *(*db_malloc) __P((size_t));		/* DB_FUNC_MALLOC */
-	int	(*db_map)				/* DB_FUNC_MAP */
-		    __P((int, size_t, int, int, void **));
-	int	(*db_open)				/* DB_FUNC_OPEN */
+	void   *(*j_malloc) __P((size_t));		/* DB_FUNC_MALLOC */
+	int	(*j_map)				/* DB_FUNC_MAP */
+		    __P((char *, int, size_t, int, int, int, void **));
+	int	(*j_open)				/* DB_FUNC_OPEN */
 		    __P((const char *, int, ...));
-	ssize_t	(*db_read) __P((int, void *, size_t));	/* DB_FUNC_READ */
-	void   *(*db_realloc) __P((void *, size_t));	/* DB_FUNC_REALLOC */
-	int	(*db_seek)				/* DB_FUNC_SEEK */
-		    __P((int, size_t, db_pgno_t, u_long, int));
-	int	(*db_sleep) __P((u_long, u_long));	/* DB_FUNC_SLEEP */
-	char   *(*db_strdup) __P((const char *));	/* DB_FUNC_STRDUP */
-	int	(*db_unlink) __P((const char *));	/* DB_FUNC_UNLINK */
-	int	(*db_unmap) __P((void *, size_t));	/* DB_FUNC_UNMAP */
-	ssize_t	(*db_write)				/* DB_FUNC_WRITE */
+	ssize_t	(*j_read) __P((int, void *, size_t));	/* DB_FUNC_READ */
+	void   *(*j_realloc) __P((void *, size_t));	/* DB_FUNC_REALLOC */
+	int	(*j_runlink) __P((char *));		/* DB_FUNC_RUNLINK */
+	int	(*j_seek)				/* DB_FUNC_SEEK */
+		    __P((int, size_t, db_pgno_t, u_int32_t, int, int));
+	int	(*j_sleep) __P((u_long, u_long));	/* DB_FUNC_SLEEP */
+	int	(*j_unlink) __P((const char *));	/* DB_FUNC_UNLINK */
+	int	(*j_unmap) __P((void *, size_t));	/* DB_FUNC_UNMAP */
+	ssize_t	(*j_write)				/* DB_FUNC_WRITE */
 		    __P((int, const void *, size_t));
-	int	(*db_yield) __P((void));		/* DB_FUNC_YIELD */
+	int	(*j_yield) __P((void));			/* DB_FUNC_YIELD */
 };
 
 extern struct __db_jumptab __db_jump;
@@ -43,7 +43,7 @@ extern struct __db_jumptab __db_jump;
  * Names used by DB to call through the jump table.
  *
  * The naming scheme goes like this: if the functionality the application can
- * replace is the same as the DB functionality, e.g., calloc, or dirlist, then
+ * replace is the same as the DB functionality, e.g., malloc, or dirlist, then
  * we use the name __db_XXX, and the application is expected to replace the
  * complete functionality, which may or may not map directly to an ANSI C or
  * POSIX 1003.1 interface.  If the functionality that the aplication replaces
@@ -53,20 +53,17 @@ extern struct __db_jumptab __db_jump;
  * part of DB is the only code that should use the __os_XXX names, all other
  * parts of DB should be calling __db_XXX functions.
  */
-#define	__os_close	__db_jump.db_close	/* __db_close is a wrapper. */
-#define	__db_dirfree	__db_jump.db_dirfree
-#define	__db_dirlist	__db_jump.db_dirlist
-#define	__db_exists	__db_jump.db_exists
-#define	__db_free	__db_jump.db_free
-#define	__os_fsync	__db_jump.db_fsync	/* __db_fsync is a wrapper. */
-#define	__db_ioinfo	__db_jump.db_ioinfo
-#define	__db_map	__db_jump.db_map
-#define	__os_open	__db_jump.db_open	/* __db_open is a wrapper. */
-#define	__os_read	__db_jump.db_read	/* __db_read is a wrapper. */
-#define	__db_seek	__db_jump.db_seek
-#define	__db_sleep	__db_jump.db_sleep
-#define	__db_strdup	__db_jump.db_strdup
-#define	__os_unlink	__db_jump.db_unlink	/* __db_unlink is a wrapper. */
-#define	__db_unmap	__db_jump.db_unmap
-#define	__os_write	__db_jump.db_write	/* __db_write is a wrapper. */
-#define	__db_yield	__db_jump.db_yield
+#define	__os_close	__db_jump.j_close	/* __db_close is a wrapper. */
+#define	__db_dirfree	__db_jump.j_dirfree
+#define	__db_dirlist	__db_jump.j_dirlist
+#define	__db_exists	__db_jump.j_exists
+#define	__db_free	__db_jump.j_free
+#define	__os_fsync	__db_jump.j_fsync	/* __db_fsync is a wrapper. */
+#define	__db_ioinfo	__db_jump.j_ioinfo
+#define	__os_open	__db_jump.j_open	/* __db_open is a wrapper. */
+#define	__os_read	__db_jump.j_read	/* __db_read is a wrapper. */
+#define	__db_seek	__db_jump.j_seek
+#define	__db_sleep	__db_jump.j_sleep
+#define	__os_unlink	__db_jump.j_unlink	/* __db_unlink is a wrapper. */
+#define	__os_write	__db_jump.j_write	/* __db_write is a wrapper. */
+#define	__db_yield	__db_jump.j_yield
diff --git a/db2/include/queue.h b/db2/include/queue.h
index 0909c86c60..f606eb0497 100644
--- a/db2/include/queue.h
+++ b/db2/include/queue.h
@@ -1,6 +1,6 @@
 /*	BSDI $Id$	*/
 
-/* 
+/*
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
diff --git a/db2/include/shqueue.h b/db2/include/shqueue.h
index c596d33e92..00e5d76251 100644
--- a/db2/include/shqueue.h
+++ b/db2/include/shqueue.h
@@ -1,10 +1,10 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)shqueue.h	8.12 (Sleepycat) 9/10/97
+ *	@(#)shqueue.h	8.13 (Sleepycat) 4/10/98
  */
 #ifndef	_SYS_SHQUEUE_H_
 #define	_SYS_SHQUEUE_H_
diff --git a/db2/include/txn.h b/db2/include/txn.h
index c64ac3fc52..a2512ed152 100644
--- a/db2/include/txn.h
+++ b/db2/include/txn.h
@@ -1,10 +1,10 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)txn.h	10.11 (Sleepycat) 10/25/97
+ *	@(#)txn.h	10.15 (Sleepycat) 4/21/98
  */
 #ifndef	_TXN_H_
 #define	_TXN_H_
@@ -52,12 +52,11 @@ struct __db_txnmgr {
 	TAILQ_HEAD(_chain, __db_txn)	txn_chain;
 
 /* These fields are not protected. */
+	REGINFO		reginfo;	/* Region information. */
 	DB_ENV		*dbenv;		/* Environment. */
 	int (*recover)			/* Recovery dispatch routine */
 	    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
-	int		 fd;		/* mapped file descriptor */
-	u_int		 flags;		/* DB_TXN_NOSYNC, DB_THREAD */
-	size_t		 reg_size;	/* how large we think the region is */
+	u_int32_t	 flags;		/* DB_TXN_NOSYNC, DB_THREAD */
 	DB_TXNREGION	*region;	/* address of shared memory region */
 	void		*mem;		/* address of the shalloc space */
 };
@@ -102,17 +101,16 @@ struct __db_txnregion {
 		(void)__db_mutex_unlock((tmgrp)->mutexp, -1)
 
 #define	LOCK_TXNREGION(tmgrp)						\
-	(void)__db_mutex_lock(&(tmgrp)->region->hdr.lock, (tmgrp)->fd)
+	(void)__db_mutex_lock(&(tmgrp)->region->hdr.lock, (tmgrp)->reginfo.fd)
 #define	UNLOCK_TXNREGION(tmgrp)						\
-	(void)__db_mutex_unlock(&(tmgrp)->region->hdr.lock, (tmgrp)->fd)
+	(void)__db_mutex_unlock(&(tmgrp)->region->hdr.lock, (tmgrp)->reginfo.fd)
 
 /*
  * Log record types.
  */
-#define	TXN_BEGIN	1
-#define	TXN_COMMIT	2
-#define	TXN_PREPARE	3
-#define	TXN_CHECKPOINT	4
+#define	TXN_COMMIT	1
+#define	TXN_PREPARE	2
+#define	TXN_CHECKPOINT	3
 
 #include "txn_auto.h"
 #include "txn_ext.h"
diff --git a/db2/lock/lock.c b/db2/lock/lock.c
index 0846d3c29f..3d20e0d65b 100644
--- a/db2/lock/lock.c
+++ b/db2/lock/lock.c
@@ -1,28 +1,21 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)lock.c	10.43 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)lock.c	10.52 (Sleepycat) 5/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <fcntl.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
@@ -34,248 +27,15 @@ static const char sccsid[] = "@(#)lock.c	10.43 (Sleepycat) 1/8/98";
 #include "db_am.h"
 
 static void __lock_checklocker __P((DB_LOCKTAB *, struct __db_lock *, int));
-static int  __lock_count_locks __P((DB_LOCKREGION *));
-static int  __lock_count_objs __P((DB_LOCKREGION *));
-static int  __lock_create __P((const char *, int, DB_ENV *));
 static void __lock_freeobj __P((DB_LOCKTAB *, DB_LOCKOBJ *));
-static int  __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, int, const DBT *,
-    db_lockmode_t, struct __db_lock **));
-static int  __lock_grow_region __P((DB_LOCKTAB *, int, size_t));
+static int  __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, u_int32_t,
+    const DBT *, db_lockmode_t, struct __db_lock **));
 static int  __lock_put_internal __P((DB_LOCKTAB *, struct __db_lock *, int));
 static void __lock_remove_waiter
     __P((DB_LOCKTAB *, DB_LOCKOBJ *, struct __db_lock *, db_status_t));
-static void __lock_reset_region __P((DB_LOCKTAB *));
-static int  __lock_validate_region __P((DB_LOCKTAB *));
-#ifdef DEBUG
-static void __lock_dump_locker __P((DB_LOCKTAB *, DB_LOCKOBJ *));
-static void __lock_dump_object __P((DB_LOCKTAB *, DB_LOCKOBJ *));
-static void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int));
-#endif
-
-/*
- * Create and initialize a lock region in shared memory.
- */
-
-/*
- * __lock_create --
- *	Create the lock region.  Returns an errno.  In most cases,
- * the errno should be that returned by __db_ropen, in which case
- * an EAGAIN means that we should retry, and an EEXIST means that
- * the region exists and we didn't need to create it.  Any other
- * sort of errno should be treated as a system error, leading to a
- * failure of the original interface call.
- */
-static int
-__lock_create(path, mode, dbenv)
-	const char *path;
-	int mode;
-	DB_ENV *dbenv;
-{
-	struct __db_lock *lp;
-	struct lock_header *tq_head;
-	struct obj_header *obj_head;
-	DB_LOCKOBJ *op;
-	DB_LOCKREGION *lrp;
-	u_int maxlocks;
-	u_int32_t i;
-	int fd, lock_modes, nelements, ret;
-	const u_int8_t *conflicts;
-	u_int8_t *curaddr;
-
-	maxlocks = dbenv == NULL || dbenv->lk_max == 0 ?
-	    DB_LOCK_DEFAULT_N : dbenv->lk_max;
-	lock_modes = dbenv == NULL || dbenv->lk_modes == 0 ?
-	    DB_LOCK_RW_N : dbenv->lk_modes;
-	conflicts = dbenv == NULL || dbenv->lk_conflicts == NULL ?
-	    db_rw_conflicts : dbenv->lk_conflicts;
-
-	if ((ret =
-	    __db_rcreate(dbenv, DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, mode,
-	    LOCK_REGION_SIZE(lock_modes, maxlocks, __db_tablesize(maxlocks)),
-	    0, &fd, &lrp)) != 0)
-		return (ret);
-
-	/* Region exists; now initialize it. */
-	lrp->table_size = __db_tablesize(maxlocks);
-	lrp->magic = DB_LOCKMAGIC;
-	lrp->version = DB_LOCKVERSION;
-	lrp->id = 0;
-	lrp->maxlocks = maxlocks;
-	lrp->need_dd = 0;
-	lrp->detect = DB_LOCK_NORUN;
-	lrp->numobjs = maxlocks;
-	lrp->nlockers = 0;
-	lrp->mem_bytes = ALIGN(STRING_SIZE(maxlocks), sizeof(size_t));
-	lrp->increment = lrp->hdr.size / 2;
-	lrp->nmodes = lock_modes;
-	lrp->nconflicts = 0;
-	lrp->nrequests = 0;
-	lrp->nreleases = 0;
-	lrp->ndeadlocks = 0;
-
-	/*
-	 * As we write the region, we've got to maintain the alignment
-	 * for the structures that follow each chunk.  This information
-	 * ends up being encapsulated both in here as well as in the
-	 * lock.h file for the XXX_SIZE macros.
-	 */
-	/* Initialize conflict matrix. */
-	curaddr = (u_int8_t *)lrp + sizeof(DB_LOCKREGION);
-	memcpy(curaddr, conflicts, lock_modes * lock_modes);
-	curaddr += lock_modes * lock_modes;
-
-	/*
-	 * Initialize hash table.
-	 */
-	curaddr = (u_int8_t *)ALIGNP(curaddr, LOCK_HASH_ALIGN);
-	lrp->hash_off = curaddr - (u_int8_t *)lrp;
-	nelements = lrp->table_size;
-	__db_hashinit(curaddr, nelements);
-	curaddr += nelements * sizeof(DB_HASHTAB);
-
-	/*
-	 * Initialize locks onto a free list. Since locks contains mutexes,
-	 * we need to make sure that each lock is aligned on a MUTEX_ALIGNMENT
-	 * boundary.
-	 */
-	curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT);
-	tq_head = &lrp->free_locks;
-	SH_TAILQ_INIT(tq_head);
-
-	for (i = 0; i++ < maxlocks;
-	    curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) {
-		lp = (struct __db_lock *)curaddr;
-		lp->status = DB_LSTAT_FREE;
-		SH_TAILQ_INSERT_HEAD(tq_head, lp, links, __db_lock);
-	}
-
-	/* Initialize objects onto a free list.  */
-	obj_head = &lrp->free_objs;
-	SH_TAILQ_INIT(obj_head);
-
-	for (i = 0; i++ < maxlocks; curaddr += sizeof(DB_LOCKOBJ)) {
-		op = (DB_LOCKOBJ *)curaddr;
-		SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj);
-	}
-
-	/*
-	 * Initialize the string space; as for all shared memory allocation
-	 * regions, this requires size_t alignment, since we store the
-	 * lengths of malloc'd areas in the area..
-	 */
-	curaddr = (u_int8_t *)ALIGNP(curaddr, sizeof(size_t));
-	lrp->mem_off = curaddr - (u_int8_t *)lrp;
-	__db_shalloc_init(curaddr, lrp->mem_bytes);
-
-	/* Release the lock. */
-	(void)__db_mutex_unlock(&lrp->hdr.lock, fd);
-
-	/* Now unmap the region. */
-	if ((ret = __db_rclose(dbenv, fd, lrp)) != 0) {
-		(void)lock_unlink(path, 1 /* force */, dbenv);
-		return (ret);
-	}
-
-	return (0);
-}
 
 int
-lock_open(path, flags, mode, dbenv, ltp)
-	const char *path;
-	int flags, mode;
-	DB_ENV *dbenv;
-	DB_LOCKTAB **ltp;
-{
-	DB_LOCKTAB *lt;
-	int ret, retry_cnt;
-
-	/* Validate arguments. */
-#ifdef HAVE_SPINLOCKS
-#define	OKFLAGS	(DB_CREATE | DB_THREAD)
-#else
-#define	OKFLAGS	(DB_CREATE)
-#endif
-	if ((ret = __db_fchk(dbenv, "lock_open", flags, OKFLAGS)) != 0)
-		return (ret);
-
-	/*
-	 * Create the lock table structure.
-	 */
-	if ((lt = (DB_LOCKTAB *)__db_calloc(1, sizeof(DB_LOCKTAB))) == NULL) {
-		__db_err(dbenv, "%s", strerror(ENOMEM));
-		return (ENOMEM);
-	}
-	lt->dbenv = dbenv;
-
-	/*
-	 * Now, create the lock region if it doesn't already exist.
-	 */
-	retry_cnt = 0;
-retry:	if (LF_ISSET(DB_CREATE) &&
-	    (ret = __lock_create(path, mode, dbenv)) != 0)
-		if (ret == EAGAIN && ++retry_cnt < 3) {
-			(void)__db_sleep(1, 0);
-			goto retry;
-		} else if (ret == EEXIST) /* We did not create the region */
-			LF_CLR(DB_CREATE);
-		else
-			goto out;
-
-	/*
-	 * Finally, open the region, map it in, and increment the
-	 * reference count.
-	 */
-	retry_cnt = 0;
-retry1:	if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE,
-	    LF_ISSET(~(DB_CREATE | DB_THREAD)), &lt->fd, &lt->region)) != 0) {
-		if (ret == EAGAIN && ++retry_cnt < 3) {
-			(void)__db_sleep(1, 0);
-			goto retry1;
-		}
-		goto out;
-	 }
-
-	if (lt->region->magic != DB_LOCKMAGIC) {
-		__db_err(dbenv, "lock_open: Bad magic number");
-		ret = EINVAL;
-		goto out;
-	}
-
-	/* Check for automatic deadlock detection. */
-	if (dbenv->lk_detect != DB_LOCK_NORUN) {
-		if (lt->region->detect != DB_LOCK_NORUN &&
-		    dbenv->lk_detect != DB_LOCK_DEFAULT &&
-		    lt->region->detect != dbenv->lk_detect) {
-			__db_err(dbenv,
-			    "lock_open: incompatible deadlock detector mode");
-			ret = EINVAL;
-			goto out;
-		}
-		if (lt->region->detect == DB_LOCK_NORUN)
-			lt->region->detect = dbenv->lk_detect;
-	}
-
-	/* Set up remaining pointers into region. */
-	lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION);
-	lt->hashtab =
-	    (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off);
-	lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off);
-	lt->reg_size = lt->region->hdr.size;
-
-	*ltp = lt;
-	return (0);
-
-/* Error handling. */
-out:	if (lt->region != NULL)
-		(void)__db_rclose(lt->dbenv, lt->fd, lt->region);
-	if (LF_ISSET(DB_CREATE))
-		(void)lock_unlink(path, 1, lt->dbenv);
-	__db_free(lt);
-	return (ret);
-}
-
-int
-lock_id (lt, idp)
+lock_id(lt, idp)
 	DB_LOCKTAB *lt;
 	u_int32_t *idp;
 {
@@ -294,8 +54,8 @@ lock_id (lt, idp)
 int
 lock_vec(lt, locker, flags, list, nlist, elistp)
 	DB_LOCKTAB *lt;
-	u_int32_t locker;
-	int flags, nlist;
+	u_int32_t locker, flags;
+	int nlist;
 	DB_LOCKREQ *list, **elistp;
 {
 	struct __db_lock *lp;
@@ -345,7 +105,7 @@ lock_vec(lt, locker, flags, list, nlist, elistp)
 			for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
 			    lp != NULL;
 			    lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) {
-				if ((ret = __lock_put_internal(lt, lp, 0)) != 0)
+				if ((ret = __lock_put_internal(lt, lp, 1)) != 0)
 					break;
 			}
 			__lock_freeobj(lt, sh_locker);
@@ -436,8 +196,7 @@ lock_vec(lt, locker, flags, list, nlist, elistp)
 int
 lock_get(lt, locker, flags, obj, lock_mode, lock)
 	DB_LOCKTAB *lt;
-	u_int32_t locker;
-	int flags;
+	u_int32_t locker, flags;
 	const DBT *obj;
 	db_lockmode_t lock_mode;
 	DB_LOCK *lock;
@@ -496,35 +255,6 @@ lock_put(lt, lock)
 	return (ret);
 }
 
-int
-lock_close(lt)
-	DB_LOCKTAB *lt;
-{
-	int ret;
-
-	if ((ret = __db_rclose(lt->dbenv, lt->fd, lt->region)) != 0)
-		return (ret);
-
-	/* Free lock table. */
-	__db_free(lt);
-	return (0);
-}
-
-int
-lock_unlink(path, force, dbenv)
-	const char *path;
-	int force;
-	DB_ENV *dbenv;
-{
-	return (__db_runlink(dbenv,
-	    DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, force));
-}
-
-/*
- * XXX This looks like it could be void, but I'm leaving it returning
- * an int because I think it will have to when we go through and add
- * the appropriate error checking for the EINTR on mutexes.
- */
 static int
 __lock_put_internal(lt, lockp, do_all)
 	DB_LOCKTAB *lt;
@@ -593,7 +323,7 @@ __lock_put_internal(lt, lockp, do_all)
 		SH_TAILQ_INSERT_TAIL(&sh_obj->holders, lp_w, links);
 
 		/* Wake up waiter. */
-		(void)__db_mutex_unlock(&lp_w->mutex, lt->fd);
+		(void)__db_mutex_unlock(&lp_w->mutex, lt->reginfo.fd);
 		state_changed = 1;
 	}
 
@@ -626,8 +356,7 @@ __lock_put_internal(lt, lockp, do_all)
 static int
 __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
 	DB_LOCKTAB *lt;
-	u_int32_t locker;
-	int flags;
+	u_int32_t locker, flags;
 	const DBT *obj;
 	db_lockmode_t lock_mode;
 	struct __db_lock **lockp;
@@ -741,7 +470,7 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
 	 */
 	(void)__db_mutex_init(&newl->mutex,
 	    MUTEX_LOCK_OFFSET(lt->region, &newl->mutex));
-	(void)__db_mutex_lock(&newl->mutex, lt->fd);
+	(void)__db_mutex_lock(&newl->mutex, lt->reginfo.fd);
 
 	/*
 	 * Now, insert the lock onto its locker's list.
@@ -772,7 +501,7 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
 		if (lrp->detect != DB_LOCK_NORUN)
 			ret = lock_detect(lt, 0, lrp->detect);
 
-		(void)__db_mutex_lock(&newl->mutex, lt->fd);
+		(void)__db_mutex_lock(&newl->mutex, lt->reginfo.fd);
 
 		LOCK_LOCKREGION(lt);
 		if (newl->status != DB_LSTAT_PENDING) {
@@ -802,306 +531,6 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
 }
 
 /*
- * This is called at every interface to verify if the region
- * has changed size, and if so, to remap the region in and
- * reset the process pointers.
- */
-static int
-__lock_validate_region(lt)
-	DB_LOCKTAB *lt;
-{
-	int ret;
-
-	if (lt->reg_size == lt->region->hdr.size)
-		return (0);
-
-	/* Grow the region. */
-	if ((ret = __db_rremap(lt->dbenv, lt->region,
-	    lt->reg_size, lt->region->hdr.size, lt->fd, &lt->region)) != 0)
-		return (ret);
-
-	__lock_reset_region(lt);
-
-	return (0);
-}
-
-/*
- * We have run out of space; time to grow the region.
- */
-static int
-__lock_grow_region(lt, which, howmuch)
-	DB_LOCKTAB *lt;
-	int which;
-	size_t howmuch;
-{
-	struct __db_lock *newl;
-	struct lock_header *lock_head;
-	struct obj_header *obj_head;
-	DB_LOCKOBJ *op;
-	DB_LOCKREGION *lrp;
-	float lock_ratio, obj_ratio;
-	size_t incr, oldsize, used;
-	u_int32_t i, newlocks, newmem, newobjs;
-	int ret, usedlocks, usedmem, usedobjs;
-	u_int8_t *curaddr;
-
-	lrp = lt->region;
-	oldsize = lrp->hdr.size;
-	incr = lrp->increment;
-
-	/* Figure out how much of each sort of space we have. */
-	usedmem = lrp->mem_bytes - __db_shalloc_count(lt->mem);
-	usedobjs = lrp->numobjs - __lock_count_objs(lrp);
-	usedlocks = lrp->maxlocks - __lock_count_locks(lrp);
-
-	/*
-	 * Figure out what fraction of the used space belongs to each
-	 * different type of "thing" in the region.  Then partition the
-	 * new space up according to this ratio.
-	 */
-	used = usedmem +
-	    usedlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) +
-	    usedobjs * sizeof(DB_LOCKOBJ);
-
-	lock_ratio = usedlocks *
-	    ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) / (float)used;
-	obj_ratio = usedobjs * sizeof(DB_LOCKOBJ) / (float)used;
-
-	newlocks = (u_int32_t)(lock_ratio *
-	    incr / ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT));
-	newobjs = (u_int32_t)(obj_ratio * incr / sizeof(DB_LOCKOBJ));
-	newmem = incr -
-	    (newobjs * sizeof(DB_LOCKOBJ) +
-	    newlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT));
-
-	/*
-	 * Make sure we allocate enough memory for the object being
-	 * requested.
-	 */
-	switch (which) {
-		case DB_LOCK_LOCK:
-			if (newlocks == 0) {
-				newlocks = 10;
-				incr += newlocks * sizeof(struct __db_lock);
-			}
-			break;
-		case DB_LOCK_OBJ:
-			if (newobjs == 0) {
-				newobjs = 10;
-				incr += newobjs * sizeof(DB_LOCKOBJ);
-			}
-			break;
-		case DB_LOCK_MEM:
-			if (newmem < howmuch * 2) {
-				incr += howmuch * 2 - newmem;
-				newmem = howmuch * 2;
-			}
-			break;
-	}
-
-	newmem += ALIGN(incr, sizeof(size_t)) - incr;
-	incr = ALIGN(incr, sizeof(size_t));
-
-	/*
-	 * Since we are going to be allocating locks at the beginning of the
-	 * new chunk, we need to make sure that the chunk is MUTEX_ALIGNMENT
-	 * aligned.  We did not guarantee this when we created the region, so
-	 * we may need to pad the old region by extra bytes to ensure this
-	 * alignment.
-	 */
-	incr += ALIGN(oldsize, MUTEX_ALIGNMENT) - oldsize;
-
-	__db_err(lt->dbenv,
-	    "Growing lock region: %lu locks %lu objs %lu bytes",
-	    (u_long)newlocks, (u_long)newobjs, (u_long)newmem);
-
-	if ((ret = __db_rgrow(lt->dbenv, lt->fd, incr)) != 0)
-		return (ret);
-	if ((ret = __db_rremap(lt->dbenv,
-	    lt->region, oldsize, oldsize + incr, lt->fd, &lt->region)) != 0)
-		return (ret);
-	__lock_reset_region(lt);
-
-	/* Update region parameters. */
-	lrp = lt->region;
-	lrp->increment = incr << 1;
-	lrp->maxlocks += newlocks;
-	lrp->numobjs += newobjs;
-	lrp->mem_bytes += newmem;
-
-	curaddr = (u_int8_t *)lrp + oldsize;
-	curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT);
-
-	/* Put new locks onto the free list. */
-	lock_head = &lrp->free_locks;
-	for (i = 0; i++ < newlocks;
-	    curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) {
-		newl = (struct __db_lock *)curaddr;
-		SH_TAILQ_INSERT_HEAD(lock_head, newl, links, __db_lock);
-	}
-
-	/* Put new objects onto the free list.  */
-	obj_head = &lrp->free_objs;
-	for (i = 0; i++ < newobjs; curaddr += sizeof(DB_LOCKOBJ)) {
-		op = (DB_LOCKOBJ *)curaddr;
-		SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj);
-	}
-
-	*((size_t *)curaddr) = newmem - sizeof(size_t);
-	curaddr += sizeof(size_t);
-	__db_shalloc_free(lt->mem, curaddr);
-
-	return (0);
-}
-
-#ifdef DEBUG
-/*
- * __lock_dump_region --
- *
- * PUBLIC: void __lock_dump_region __P((DB_LOCKTAB *, u_int));
- */
-void
-__lock_dump_region(lt, flags)
-	DB_LOCKTAB *lt;
-	u_int flags;
-{
-	struct __db_lock *lp;
-	DB_LOCKOBJ *op;
-	DB_LOCKREGION *lrp;
-	u_int32_t i, j;
-
-	lrp = lt->region;
-
-	printf("Lock region parameters\n");
-	printf("%s:0x%x\t%s:%lu\t%s:%lu\t%s:%lu\n%s:%lu\t%s:%lu\t%s:%lu\t\n",
-	    "magic      ", lrp->magic,
-	    "version    ", (u_long)lrp->version,
-	    "processes  ", (u_long)lrp->hdr.refcnt,
-	    "maxlocks   ", (u_long)lrp->maxlocks,
-	    "table size ", (u_long)lrp->table_size,
-	    "nmodes     ", (u_long)lrp->nmodes,
-	    "numobjs    ", (u_long)lrp->numobjs);
-	printf("%s:%lu\t%s:%lu\t%s:%lu\n%s:%lu\t%s:%lu\t%s:%lu\n",
-	    "size       ", (u_long)lrp->hdr.size,
-	    "nlockers   ", (u_long)lrp->nlockers,
-	    "hash_off   ", (u_long)lrp->hash_off,
-	    "increment  ", (u_long)lrp->increment,
-	    "mem_off    ", (u_long)lrp->mem_off,
-	    "mem_bytes  ", (u_long)lrp->mem_bytes);
-#ifndef HAVE_SPINLOCKS
-	printf("Mutex: off %lu", (u_long)lrp->hdr.lock.off);
-#endif
-	printf(" waits %lu nowaits %lu",
-	    (u_long)lrp->hdr.lock.mutex_set_wait,
-	    (u_long)lrp->hdr.lock.mutex_set_nowait);
-	printf("\n%s:%lu\t%s:%lu\t%s:%lu\t%s:%lu\n",
-	    "nconflicts ", (u_long)lrp->nconflicts,
-	    "nrequests  ", (u_long)lrp->nrequests,
-	    "nreleases  ", (u_long)lrp->nreleases,
-	    "ndeadlocks ", (u_long)lrp->ndeadlocks);
-	printf("need_dd    %lu\n", (u_long)lrp->need_dd);
-	if (flags & LOCK_DEBUG_CONF) {
-		printf("\nConflict matrix\n");
-
-		for (i = 0; i < lrp->nmodes; i++) {
-			for (j = 0; j < lrp->nmodes; j++)
-				printf("%lu\t",
-				    (u_long)lt->conflicts[i * lrp->nmodes + j]);
-			printf("\n");
-		}
-	}
-
-	for (i = 0; i < lrp->table_size; i++) {
-		op = SH_TAILQ_FIRST(&lt->hashtab[i], __db_lockobj);
-		if (op != NULL && flags & LOCK_DEBUG_BUCKET)
-			printf("Bucket %lu:\n", (unsigned long)i);
-		while (op != NULL) {
-			if (op->type == DB_LOCK_LOCKER &&
-			    flags & LOCK_DEBUG_LOCKERS)
-				__lock_dump_locker(lt, op);
-			else if (flags & LOCK_DEBUG_OBJECTS &&
-			    op->type == DB_LOCK_OBJTYPE)
-				__lock_dump_object(lt, op);
-			op = SH_TAILQ_NEXT(op, links, __db_lockobj);
-		}
-	}
-
-	if (flags & LOCK_DEBUG_LOCK) {
-		printf("\nLock Free List\n");
-		for (lp = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
-		    lp != NULL;
-		    lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
-			printf("0x%x: %lu\t%lu\t%lu\t0x%x\n", (u_int)lp,
-			    (u_long)lp->holder, (u_long)lp->mode,
-			    (u_long)lp->status, (u_int)lp->obj);
-		}
-	}
-
-	if (flags & LOCK_DEBUG_LOCK) {
-		printf("\nObject Free List\n");
-		for (op = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
-		    op != NULL;
-		    op = SH_TAILQ_NEXT(op, links, __db_lockobj))
-			printf("0x%x\n", (u_int)op);
-	}
-
-	if (flags & LOCK_DEBUG_MEM) {
-		printf("\nMemory Free List\n");
-		__db_shalloc_dump(stdout, lt->mem);
-	}
-}
-
-static void
-__lock_dump_locker(lt, op)
-	DB_LOCKTAB *lt;
-	DB_LOCKOBJ *op;
-{
-	struct __db_lock *lp;
-	u_int32_t locker;
-	void *ptr;
-
-	ptr = SH_DBT_PTR(&op->lockobj);
-	memcpy(&locker, ptr, sizeof(u_int32_t));
-	printf("L %lx", (u_long)locker);
-
-	lp = SH_LIST_FIRST(&op->heldby, __db_lock);
-	if (lp == NULL) {
-		printf("\n");
-		return;
-	}
-	for (; lp != NULL; lp = SH_LIST_NEXT(lp, locker_links, __db_lock))
-		__lock_printlock(lt, lp, 0);
-}
-
-static void
-__lock_dump_object(lt, op)
-	DB_LOCKTAB *lt;
-	DB_LOCKOBJ *op;
-{
-	struct __db_lock *lp;
-	u_int32_t j;
-	char *ptr;
-
-	ptr = SH_DBT_PTR(&op->lockobj);
-	for (j = 0; j < op->lockobj.size; ptr++, j++)
-		printf("%c", (int)*ptr);
-	printf("\n");
-
-	printf("H:");
-	for (lp =
-	    SH_TAILQ_FIRST(&op->holders, __db_lock);
-	    lp != NULL;
-	    lp = SH_TAILQ_NEXT(lp, links, __db_lock))
-		__lock_printlock(lt, lp, 0);
-	lp = SH_TAILQ_FIRST(&op->waiters, __db_lock);
-	if (lp != NULL) {
-		printf("\nW:");
-		for (; lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock))
-			__lock_printlock(lt, lp, 0);
-	}
-}
-
-/*
  * __lock_is_locked --
  *
  * PUBLIC: int __lock_is_locked
@@ -1136,7 +565,12 @@ __lock_is_locked(lt, locker, dbt, mode)
 	return (0);
 }
 
-static void
+/*
+ * __lock_printlock --
+ *
+ * PUBLIC: void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int));
+ */
+void
 __lock_printlock(lt, lp, ispgno)
 	DB_LOCKTAB *lt;
 	struct __db_lock *lp;
@@ -1213,39 +647,6 @@ __lock_printlock(lt, lp, ispgno)
 		printf("\n");
 	}
 }
-#endif
-
-static int
-__lock_count_locks(lrp)
-	DB_LOCKREGION *lrp;
-{
-	struct __db_lock *newl;
-	int count;
-
-	count = 0;
-	for (newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
-	    newl != NULL;
-	    newl = SH_TAILQ_NEXT(newl, links, __db_lock))
-		count++;
-
-	return (count);
-}
-
-static int
-__lock_count_objs(lrp)
-	DB_LOCKREGION *lrp;
-{
-	DB_LOCKOBJ *obj;
-	int count;
-
-	count = 0;
-	for (obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
-	    obj != NULL;
-	    obj = SH_TAILQ_NEXT(obj, links, __db_lockobj))
-		count++;
-
-	return (count);
-}
 
 /*
  * PUBLIC: int __lock_getobj  __P((DB_LOCKTAB *,
@@ -1354,19 +755,7 @@ __lock_remove_waiter(lt, sh_obj, lockp, status)
 	lockp->status = status;
 
 	/* Wake whoever is waiting on this lock. */
-	(void)__db_mutex_unlock(&lockp->mutex, lt->fd);
-}
-
-static void
-__lock_freeobj(lt, obj)
-	DB_LOCKTAB *lt;
-	DB_LOCKOBJ *obj;
-{
-	HASHREMOVE_EL(lt->hashtab,
-	    __db_lockobj, links, obj, lt->region->table_size, __lock_lhash);
-	if (obj->lockobj.size > sizeof(obj->objdata))
-		__db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj));
-	SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, obj, links, __db_lockobj);
+	(void)__db_mutex_unlock(&lockp->mutex, lt->reginfo.fd);
 }
 
 static void
@@ -1384,17 +773,18 @@ __lock_checklocker(lt, lockp, do_remove)
 	if (__lock_getobj(lt, lockp->holder, NULL, DB_LOCK_LOCKER, &sh_locker)
 	    == 0 && SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL) {
 		__lock_freeobj(lt, sh_locker);
-		lt->region->nlockers--;
+		    lt->region->nlockers--;
 	}
 }
 
 static void
-__lock_reset_region(lt)
+__lock_freeobj(lt, obj)
 	DB_LOCKTAB *lt;
+	DB_LOCKOBJ *obj;
 {
-	lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION);
-	lt->hashtab =
-	    (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off);
-	lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off);
-	lt->reg_size = lt->region->hdr.size;
+	HASHREMOVE_EL(lt->hashtab,
+	    __db_lockobj, links, obj, lt->region->table_size, __lock_lhash);
+	if (obj->lockobj.size > sizeof(obj->objdata))
+		__db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj));
+	SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, obj, links, __db_lockobj);
 }
diff --git a/db2/lock/lock_conflict.c b/db2/lock/lock_conflict.c
index ff0287f07e..870aa0dc17 100644
--- a/db2/lock/lock_conflict.c
+++ b/db2/lock/lock_conflict.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)lock_conflict.c	10.2 (Sleepycat) 6/21/97";
+static const char sccsid[] = "@(#)lock_conflict.c	10.3 (Sleepycat) 4/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
diff --git a/db2/lock/lock_deadlock.c b/db2/lock/lock_deadlock.c
index 93c438ca36..4de492944e 100644
--- a/db2/lock/lock_deadlock.c
+++ b/db2/lock/lock_deadlock.c
@@ -1,25 +1,21 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char copyright[] =
-"@(#) Copyright (c) 1997\n\
-	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)lock_deadlock.c	10.26 (Sleepycat) 11/25/97";
-#endif
+static const char sccsid[] = "@(#)lock_deadlock.c	10.32 (Sleepycat) 4/26/98";
+#endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
 #include <string.h>
-#include <stdlib.h>
 #endif
 
 #include "db_int.h"
@@ -59,14 +55,14 @@ static int  __dd_build
 static u_int32_t
 	   *__dd_find __P((u_int32_t *, locker_info *, u_int32_t));
 
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t));
 #endif
 
 int
 lock_detect(lt, flags, atype)
 	DB_LOCKTAB *lt;
-	int flags, atype;
+	u_int32_t flags, atype;
 {
 	DB_ENV *dbenv;
 	locker_info *idmap;
@@ -96,7 +92,7 @@ lock_detect(lt, flags, atype)
 
 	if (nlockers == 0)
 		return (0);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if (dbenv->db_verbose != 0)
 		__dd_debug(dbenv, idmap, bitmap, nlockers);
 #endif
@@ -202,7 +198,7 @@ __dd_build(dbenv, bmp, nlockers, idmap)
 	u_int8_t *pptr;
 	locker_info *id_array;
 	u_int32_t *bitmap, count, *entryp, i, id, nentries, *tmpmap;
-	int is_first, ret;
+	int is_first;
 
 	lt = dbenv->lk_info;
 
@@ -322,8 +318,8 @@ retry:	count = lt->region->nlockers;
 			    lp != NULL;
 			    is_first = 0,
 			    lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
-				if ((ret = __lock_getobj(lt, lp->holder,
-				    NULL, DB_LOCK_LOCKER, &lockerp)) != 0) {
+				if (__lock_getobj(lt, lp->holder,
+				    NULL, DB_LOCK_LOCKER, &lockerp) != 0) {
 					__db_err(dbenv,
 					    "warning unable to find object");
 					continue;
@@ -357,8 +353,8 @@ retry:	count = lt->region->nlockers;
 	for (id = 0; id < count; id++) {
 		if (!id_array[id].valid)
 			continue;
-		if ((ret = __lock_getobj(lt,
-		    id_array[id].id, NULL, DB_LOCK_LOCKER, &lockerp)) != 0) {
+		if (__lock_getobj(lt,
+		    id_array[id].id, NULL, DB_LOCK_LOCKER, &lockerp) != 0) {
 			__db_err(dbenv,
 			    "No locks for locker %lu", (u_long)id_array[id].id);
 			continue;
@@ -448,7 +444,7 @@ __dd_abort(dbenv, info)
 	SH_LIST_REMOVE(lockp, locker_links, __db_lock);
 	sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
 	SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock);
-        (void)__db_mutex_unlock(&lockp->mutex, lt->fd);
+        (void)__db_mutex_unlock(&lockp->mutex, lt->reginfo.fd);
 
 	ret = 0;
 
@@ -456,7 +452,7 @@ out:	UNLOCK_LOCKREGION(lt);
 	return (ret);
 }
 
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 static void
 __dd_debug(dbenv, idmap, bitmap, nlockers)
 	DB_ENV *dbenv;
diff --git a/db2/lock/lock_region.c b/db2/lock/lock_region.c
new file mode 100644
index 0000000000..b597560744
--- /dev/null
+++ b/db2/lock/lock_region.c
@@ -0,0 +1,726 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)lock_region.c	10.15 (Sleepycat) 6/2/98";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "lock.h"
+#include "common_ext.h"
+
+static u_int32_t __lock_count_locks __P((DB_LOCKREGION *));
+static u_int32_t __lock_count_objs __P((DB_LOCKREGION *));
+static void	 __lock_dump_locker __P((DB_LOCKTAB *, DB_LOCKOBJ *, FILE *));
+static void	 __lock_dump_object __P((DB_LOCKTAB *, DB_LOCKOBJ *, FILE *));
+static const char *__lock_dump_status __P((db_status_t));
+static void	 __lock_reset_region __P((DB_LOCKTAB *));
+static int	 __lock_tabinit __P((DB_ENV *, DB_LOCKREGION *));
+
+int
+lock_open(path, flags, mode, dbenv, ltp)
+	const char *path;
+	u_int32_t flags;
+	int mode;
+	DB_ENV *dbenv;
+	DB_LOCKTAB **ltp;
+{
+	DB_LOCKTAB *lt;
+	u_int32_t lock_modes, maxlocks, regflags;
+	int ret;
+
+	/* Validate arguments. */
+#ifdef HAVE_SPINLOCKS
+#define	OKFLAGS	(DB_CREATE | DB_THREAD)
+#else
+#define	OKFLAGS	(DB_CREATE)
+#endif
+	if ((ret = __db_fchk(dbenv, "lock_open", flags, OKFLAGS)) != 0)
+		return (ret);
+
+	/* Create the lock table structure. */
+	if ((lt = (DB_LOCKTAB *)__db_calloc(1, sizeof(DB_LOCKTAB))) == NULL) {
+		__db_err(dbenv, "%s", strerror(ENOMEM));
+		return (ENOMEM);
+	}
+	lt->dbenv = dbenv;
+
+	/* Grab the values that we need to compute the region size. */
+	lock_modes = DB_LOCK_RW_N;
+	maxlocks = DB_LOCK_DEFAULT_N;
+	regflags = REGION_SIZEDEF;
+	if (dbenv != NULL) {
+		if (dbenv->lk_modes != 0) {
+			lock_modes = dbenv->lk_modes;
+			regflags = 0;
+		}
+		if (dbenv->lk_max != 0) {
+			maxlocks = dbenv->lk_max;
+			regflags = 0;
+		}
+	}
+
+	/* Join/create the lock region. */
+	lt->reginfo.dbenv = dbenv;
+	lt->reginfo.appname = DB_APP_NONE;
+	if (path == NULL)
+		lt->reginfo.path = NULL;
+	else
+		if ((lt->reginfo.path = (char *)__db_strdup(path)) == NULL)
+			goto err;
+	lt->reginfo.file = DB_DEFAULT_LOCK_FILE;
+	lt->reginfo.mode = mode;
+	lt->reginfo.size =
+	    LOCK_REGION_SIZE(lock_modes, maxlocks, __db_tablesize(maxlocks));
+	lt->reginfo.dbflags = flags;
+	lt->reginfo.addr = NULL;
+	lt->reginfo.fd = -1;
+	lt->reginfo.flags = regflags;
+
+	if ((ret = __db_rattach(&lt->reginfo)) != 0)
+		goto err;
+
+	/* Now set up the pointer to the region. */
+	lt->region = lt->reginfo.addr;
+
+	/* Initialize the region if we created it. */
+	if (F_ISSET(&lt->reginfo, REGION_CREATED)) {
+		lt->region->maxlocks = maxlocks;
+		lt->region->nmodes = lock_modes;
+		if ((ret = __lock_tabinit(dbenv, lt->region)) != 0)
+			goto err;
+	} else {
+		/* Check for an unexpected region. */
+		if (lt->region->magic != DB_LOCKMAGIC) {
+			__db_err(dbenv,
+			    "lock_open: %s: bad magic number", path);
+			ret = EINVAL;
+			goto err;
+		}
+	}
+
+	/* Check for automatic deadlock detection. */
+	if (dbenv != NULL && dbenv->lk_detect != DB_LOCK_NORUN) {
+		if (lt->region->detect != DB_LOCK_NORUN &&
+		    dbenv->lk_detect != DB_LOCK_DEFAULT &&
+		    lt->region->detect != dbenv->lk_detect) {
+			__db_err(dbenv,
+		    "lock_open: incompatible deadlock detector mode");
+			ret = EINVAL;
+			goto err;
+		}
+		if (lt->region->detect == DB_LOCK_NORUN)
+			lt->region->detect = dbenv->lk_detect;
+	}
+
+	/* Set up remaining pointers into region. */
+	lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION);
+	lt->hashtab =
+	    (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off);
+	lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off);
+
+	UNLOCK_LOCKREGION(lt);
+	*ltp = lt;
+	return (0);
+
+err:	if (lt->reginfo.addr != NULL) {
+		UNLOCK_LOCKREGION(lt);
+		(void)__db_rdetach(&lt->reginfo);
+		if (F_ISSET(&lt->reginfo, REGION_CREATED))
+			(void)lock_unlink(path, 1, dbenv);
+	}
+
+	if (lt->reginfo.path != NULL)
+		FREES(lt->reginfo.path);
+	FREE(lt, sizeof(*lt));
+	return (ret);
+}
+
+/*
+ * __lock_tabinit --
+ *	Initialize the lock region.
+ */
+static int
+__lock_tabinit(dbenv, lrp)
+	DB_ENV *dbenv;
+	DB_LOCKREGION *lrp;
+{
+	struct __db_lock *lp;
+	struct lock_header *tq_head;
+	struct obj_header *obj_head;
+	DB_LOCKOBJ *op;
+	u_int32_t i, nelements;
+	const u_int8_t *conflicts;
+	u_int8_t *curaddr;
+
+	conflicts = dbenv == NULL || dbenv->lk_conflicts == NULL ?
+	    db_rw_conflicts : dbenv->lk_conflicts;
+
+	lrp->table_size = __db_tablesize(lrp->maxlocks);
+	lrp->magic = DB_LOCKMAGIC;
+	lrp->version = DB_LOCKVERSION;
+	lrp->id = 0;
+	/*
+	 * These fields (lrp->maxlocks, lrp->nmodes) are initialized
+	 * in the caller, since we had to grab those values to size
+	 * the region.
+	 */
+	lrp->need_dd = 0;
+	lrp->detect = DB_LOCK_NORUN;
+	lrp->numobjs = lrp->maxlocks;
+	lrp->nlockers = 0;
+	lrp->mem_bytes = ALIGN(STRING_SIZE(lrp->maxlocks), sizeof(size_t));
+	lrp->increment = lrp->hdr.size / 2;
+	lrp->nconflicts = 0;
+	lrp->nrequests = 0;
+	lrp->nreleases = 0;
+	lrp->ndeadlocks = 0;
+
+	/*
+	 * As we write the region, we've got to maintain the alignment
+	 * for the structures that follow each chunk.  This information
+	 * ends up being encapsulated both in here as well as in the
+	 * lock.h file for the XXX_SIZE macros.
+	 */
+	/* Initialize conflict matrix. */
+	curaddr = (u_int8_t *)lrp + sizeof(DB_LOCKREGION);
+	memcpy(curaddr, conflicts, lrp->nmodes * lrp->nmodes);
+	curaddr += lrp->nmodes * lrp->nmodes;
+
+	/*
+	 * Initialize hash table.
+	 */
+	curaddr = (u_int8_t *)ALIGNP(curaddr, LOCK_HASH_ALIGN);
+	lrp->hash_off = curaddr - (u_int8_t *)lrp;
+	nelements = lrp->table_size;
+	__db_hashinit(curaddr, nelements);
+	curaddr += nelements * sizeof(DB_HASHTAB);
+
+	/*
+	 * Initialize locks onto a free list. Since locks contains mutexes,
+	 * we need to make sure that each lock is aligned on a MUTEX_ALIGNMENT
+	 * boundary.
+	 */
+	curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT);
+	tq_head = &lrp->free_locks;
+	SH_TAILQ_INIT(tq_head);
+
+	for (i = 0; i++ < lrp->maxlocks;
+	    curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) {
+		lp = (struct __db_lock *)curaddr;
+		lp->status = DB_LSTAT_FREE;
+		SH_TAILQ_INSERT_HEAD(tq_head, lp, links, __db_lock);
+	}
+
+	/* Initialize objects onto a free list.  */
+	obj_head = &lrp->free_objs;
+	SH_TAILQ_INIT(obj_head);
+
+	for (i = 0; i++ < lrp->maxlocks; curaddr += sizeof(DB_LOCKOBJ)) {
+		op = (DB_LOCKOBJ *)curaddr;
+		SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj);
+	}
+
+	/*
+	 * Initialize the string space; as for all shared memory allocation
+	 * regions, this requires size_t alignment, since we store the
+	 * lengths of malloc'd areas in the area.
+	 */
+	curaddr = (u_int8_t *)ALIGNP(curaddr, sizeof(size_t));
+	lrp->mem_off = curaddr - (u_int8_t *)lrp;
+	__db_shalloc_init(curaddr, lrp->mem_bytes);
+	return (0);
+}
+
+int
+lock_close(lt)
+	DB_LOCKTAB *lt;
+{
+	int ret;
+
+	if ((ret = __db_rdetach(&lt->reginfo)) != 0)
+		return (ret);
+
+	if (lt->reginfo.path != NULL)
+		FREES(lt->reginfo.path);
+	FREE(lt, sizeof(*lt));
+
+	return (0);
+}
+
+int
+lock_unlink(path, force, dbenv)
+	const char *path;
+	int force;
+	DB_ENV *dbenv;
+{
+	REGINFO reginfo;
+	int ret;
+
+	memset(&reginfo, 0, sizeof(reginfo));
+	reginfo.dbenv = dbenv;
+	reginfo.appname = DB_APP_NONE;
+	if (path != NULL && (reginfo.path = (char *)__db_strdup(path)) == NULL)
+		return (ENOMEM);
+	reginfo.file = DB_DEFAULT_LOCK_FILE;
+	ret = __db_runlink(&reginfo, force);
+	if (reginfo.path != NULL)
+		FREES(reginfo.path);
+	return (ret);
+}
+
+/*
+ * __lock_validate_region --
+ *	Called at every interface to verify if the region has changed size,
+ *	and if so, to remap the region in and reset the process' pointers.
+ *
+ * PUBLIC: int __lock_validate_region __P((DB_LOCKTAB *));
+ */
+int
+__lock_validate_region(lt)
+	DB_LOCKTAB *lt;
+{
+	int ret;
+
+	if (lt->reginfo.size == lt->region->hdr.size)
+		return (0);
+
+	/* Detach/reattach the region. */
+	if ((ret = __db_rreattach(&lt->reginfo, lt->region->hdr.size)) != 0)
+		return (ret);
+
+	/* Reset region information. */
+	lt->region = lt->reginfo.addr;
+	__lock_reset_region(lt);
+
+	return (0);
+}
+
+/*
+ * __lock_grow_region --
+ *	We have run out of space; time to grow the region.
+ *
+ * PUBLIC: int __lock_grow_region __P((DB_LOCKTAB *, int, size_t));
+ */
+int
+__lock_grow_region(lt, which, howmuch)
+	DB_LOCKTAB *lt;
+	int which;
+	size_t howmuch;
+{
+	struct __db_lock *newl;
+	struct lock_header *lock_head;
+	struct obj_header *obj_head;
+	DB_LOCKOBJ *op;
+	DB_LOCKREGION *lrp;
+	float lock_ratio, obj_ratio;
+	size_t incr, oldsize, used, usedmem;
+	u_int32_t i, newlocks, newmem, newobjs, usedlocks, usedobjs;
+	u_int8_t *curaddr;
+	int ret;
+
+	lrp = lt->region;
+	oldsize = lrp->hdr.size;
+	incr = lrp->increment;
+
+	/* Figure out how much of each sort of space we have. */
+	usedmem = lrp->mem_bytes - __db_shalloc_count(lt->mem);
+	usedobjs = lrp->numobjs - __lock_count_objs(lrp);
+	usedlocks = lrp->maxlocks - __lock_count_locks(lrp);
+
+	/*
+	 * Figure out what fraction of the used space belongs to each
+	 * different type of "thing" in the region.  Then partition the
+	 * new space up according to this ratio.
+	 */
+	used = usedmem +
+	    usedlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) +
+	    usedobjs * sizeof(DB_LOCKOBJ);
+
+	lock_ratio = usedlocks *
+	    ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) / (float)used;
+	obj_ratio = usedobjs * sizeof(DB_LOCKOBJ) / (float)used;
+
+	newlocks = (u_int32_t)(lock_ratio *
+	    incr / ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT));
+	newobjs = (u_int32_t)(obj_ratio * incr / sizeof(DB_LOCKOBJ));
+	newmem = incr -
+	    (newobjs * sizeof(DB_LOCKOBJ) +
+	    newlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT));
+
+	/*
+	 * Make sure we allocate enough memory for the object being
+	 * requested.
+	 */
+	switch (which) {
+	case DB_LOCK_LOCK:
+		if (newlocks == 0) {
+			newlocks = 10;
+			incr += newlocks * sizeof(struct __db_lock);
+		}
+		break;
+	case DB_LOCK_OBJ:
+		if (newobjs == 0) {
+			newobjs = 10;
+			incr += newobjs * sizeof(DB_LOCKOBJ);
+		}
+		break;
+	case DB_LOCK_MEM:
+		if (newmem < howmuch * 2) {
+			incr += howmuch * 2 - newmem;
+			newmem = howmuch * 2;
+		}
+		break;
+	}
+
+	newmem += ALIGN(incr, sizeof(size_t)) - incr;
+	incr = ALIGN(incr, sizeof(size_t));
+
+	/*
+	 * Since we are going to be allocating locks at the beginning of the
+	 * new chunk, we need to make sure that the chunk is MUTEX_ALIGNMENT
+	 * aligned.  We did not guarantee this when we created the region, so
+	 * we may need to pad the old region by extra bytes to ensure this
+	 * alignment.
+	 */
+	incr += ALIGN(oldsize, MUTEX_ALIGNMENT) - oldsize;
+
+	__db_err(lt->dbenv,
+	    "Growing lock region: %lu locks %lu objs %lu bytes",
+	    (u_long)newlocks, (u_long)newobjs, (u_long)newmem);
+
+	if ((ret = __db_rgrow(&lt->reginfo, oldsize + incr)) != 0)
+		return (ret);
+	lt->region = lt->reginfo.addr;
+	__lock_reset_region(lt);
+
+	/* Update region parameters. */
+	lrp = lt->region;
+	lrp->increment = incr << 1;
+	lrp->maxlocks += newlocks;
+	lrp->numobjs += newobjs;
+	lrp->mem_bytes += newmem;
+
+	curaddr = (u_int8_t *)lrp + oldsize;
+	curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT);
+
+	/* Put new locks onto the free list. */
+	lock_head = &lrp->free_locks;
+	for (i = 0; i++ < newlocks;
+	    curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) {
+		newl = (struct __db_lock *)curaddr;
+		SH_TAILQ_INSERT_HEAD(lock_head, newl, links, __db_lock);
+	}
+
+	/* Put new objects onto the free list.  */
+	obj_head = &lrp->free_objs;
+	for (i = 0; i++ < newobjs; curaddr += sizeof(DB_LOCKOBJ)) {
+		op = (DB_LOCKOBJ *)curaddr;
+		SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj);
+	}
+
+	*((size_t *)curaddr) = newmem - sizeof(size_t);
+	curaddr += sizeof(size_t);
+	__db_shalloc_free(lt->mem, curaddr);
+
+	return (0);
+}
+
+static void
+__lock_reset_region(lt)
+	DB_LOCKTAB *lt;
+{
+	lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION);
+	lt->hashtab =
+	    (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off);
+	lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off);
+}
+
+/*
+ * lock_stat --
+ *	Return LOCK statistics.
+ */
+int
+lock_stat(lt, gspp, db_malloc)
+	DB_LOCKTAB *lt;
+	DB_LOCK_STAT **gspp;
+	void *(*db_malloc) __P((size_t));
+{
+	DB_LOCKREGION *rp;
+
+	*gspp = NULL;
+
+	if ((*gspp = db_malloc == NULL ?
+	    (DB_LOCK_STAT *)__db_malloc(sizeof(**gspp)) :
+	    (DB_LOCK_STAT *)db_malloc(sizeof(**gspp))) == NULL)
+		return (ENOMEM);
+
+	/* Copy out the global statistics. */
+	LOCK_LOCKREGION(lt);
+
+	rp = lt->region;
+	(*gspp)->st_magic = rp->magic;
+	(*gspp)->st_version = rp->version;
+	(*gspp)->st_maxlocks = rp->maxlocks;
+	(*gspp)->st_nmodes = rp->nmodes;
+	(*gspp)->st_numobjs = rp->numobjs;
+	(*gspp)->st_nlockers = rp->nlockers;
+	(*gspp)->st_nconflicts = rp->nconflicts;
+	(*gspp)->st_nrequests = rp->nrequests;
+	(*gspp)->st_nreleases = rp->nreleases;
+	(*gspp)->st_ndeadlocks = rp->ndeadlocks;
+	(*gspp)->st_region_nowait = rp->hdr.lock.mutex_set_nowait;
+	(*gspp)->st_region_wait = rp->hdr.lock.mutex_set_wait;
+	(*gspp)->st_refcnt = rp->hdr.refcnt;
+	(*gspp)->st_regsize = rp->hdr.size;
+
+	UNLOCK_LOCKREGION(lt);
+
+	return (0);
+}
+
+static u_int32_t
+__lock_count_locks(lrp)
+	DB_LOCKREGION *lrp;
+{
+	struct __db_lock *newl;
+	u_int32_t count;
+
+	count = 0;
+	for (newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
+	    newl != NULL;
+	    newl = SH_TAILQ_NEXT(newl, links, __db_lock))
+		count++;
+
+	return (count);
+}
+
+static u_int32_t
+__lock_count_objs(lrp)
+	DB_LOCKREGION *lrp;
+{
+	DB_LOCKOBJ *obj;
+	u_int32_t count;
+
+	count = 0;
+	for (obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
+	    obj != NULL;
+	    obj = SH_TAILQ_NEXT(obj, links, __db_lockobj))
+		count++;
+
+	return (count);
+}
+
+#define	LOCK_DUMP_CONF		0x001		/* Conflict matrix. */
+#define	LOCK_DUMP_FREE		0x002		/* Display lock free list. */
+#define	LOCK_DUMP_LOCKERS	0x004		/* Display lockers. */
+#define	LOCK_DUMP_MEM		0x008		/* Display region memory. */
+#define	LOCK_DUMP_OBJECTS	0x010		/* Display objects. */
+#define	LOCK_DUMP_ALL		0x01f		/* Display all. */
+
+/*
+ * __lock_dump_region --
+ *
+ * PUBLIC: void __lock_dump_region __P((DB_LOCKTAB *, char *, FILE *));
+ */
+void
+__lock_dump_region(lt, area, fp)
+	DB_LOCKTAB *lt;
+	char *area;
+	FILE *fp;
+{
+	struct __db_lock *lp;
+	DB_LOCKOBJ *op;
+	DB_LOCKREGION *lrp;
+	u_int32_t flags, i, j;
+	int label;
+
+	/* Make it easy to call from the debugger. */
+	if (fp == NULL)
+		fp = stderr;
+
+	for (flags = 0; *area != '\0'; ++area)
+		switch (*area) {
+		case 'A':
+			LF_SET(LOCK_DUMP_ALL);
+			break;
+		case 'c':
+			LF_SET(LOCK_DUMP_CONF);
+			break;
+		case 'f':
+			LF_SET(LOCK_DUMP_FREE);
+			break;
+		case 'l':
+			LF_SET(LOCK_DUMP_LOCKERS);
+			break;
+		case 'm':
+			LF_SET(LOCK_DUMP_MEM);
+			break;
+		case 'o':
+			LF_SET(LOCK_DUMP_OBJECTS);
+			break;
+		}
+
+	lrp = lt->region;
+
+	fprintf(fp, "%s\nLock region parameters\n", DB_LINE);
+	fprintf(fp, "%s: %lu, %s: %lu, %s: %lu, %s: %lu\n%s: %lu, %s: %lu\n",
+	    "table size", (u_long)lrp->table_size,
+	    "hash_off", (u_long)lrp->hash_off,
+	    "increment", (u_long)lrp->increment,
+	    "mem_off", (u_long)lrp->mem_off,
+	    "mem_bytes", (u_long)lrp->mem_bytes,
+	    "need_dd", (u_long)lrp->need_dd);
+
+	if (LF_ISSET(LOCK_DUMP_CONF)) {
+		fprintf(fp, "\n%s\nConflict matrix\n", DB_LINE);
+		for (i = 0; i < lrp->nmodes; i++) {
+			for (j = 0; j < lrp->nmodes; j++)
+				fprintf(fp, "%lu\t",
+				    (u_long)lt->conflicts[i * lrp->nmodes + j]);
+			fprintf(fp, "\n");
+		}
+	}
+
+	if (LF_ISSET(LOCK_DUMP_LOCKERS | LOCK_DUMP_OBJECTS)) {
+		fprintf(fp, "%s\nLock hash buckets\n", DB_LINE);
+		for (i = 0; i < lrp->table_size; i++) {
+			label = 1;
+			for (op = SH_TAILQ_FIRST(&lt->hashtab[i], __db_lockobj);
+			    op != NULL;
+			    op = SH_TAILQ_NEXT(op, links, __db_lockobj)) {
+				if (LF_ISSET(LOCK_DUMP_LOCKERS) &&
+				    op->type == DB_LOCK_LOCKER) {
+					if (label) {
+						fprintf(fp,
+						    "Bucket %lu:\n", (u_long)i);
+						label = 0;
+					}
+					__lock_dump_locker(lt, op, fp);
+				}
+				if (LF_ISSET(LOCK_DUMP_OBJECTS) &&
+				    op->type == DB_LOCK_OBJTYPE) {
+					if (label) {
+						fprintf(fp,
+						    "Bucket %lu:\n", (u_long)i);
+						label = 0;
+					}
+					__lock_dump_object(lt, op, fp);
+				}
+			}
+		}
+	}
+
+	if (LF_ISSET(LOCK_DUMP_FREE)) {
+		fprintf(fp, "%s\nLock free list\n", DB_LINE);
+		for (lp = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
+		    lp != NULL;
+		    lp = SH_TAILQ_NEXT(lp, links, __db_lock))
+			fprintf(fp, "0x%x: %lu\t%lu\t%s\t0x%x\n", (u_int)lp,
+			    (u_long)lp->holder, (u_long)lp->mode,
+			    __lock_dump_status(lp->status), (u_int)lp->obj);
+
+		fprintf(fp, "%s\nObject free list\n", DB_LINE);
+		for (op = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
+		    op != NULL;
+		    op = SH_TAILQ_NEXT(op, links, __db_lockobj))
+			fprintf(fp, "0x%x\n", (u_int)op);
+	}
+
+	if (LF_ISSET(LOCK_DUMP_MEM))
+		__db_shalloc_dump(lt->mem, fp);
+}
+
+static void
+__lock_dump_locker(lt, op, fp)
+	DB_LOCKTAB *lt;
+	DB_LOCKOBJ *op;
+	FILE *fp;
+{
+	struct __db_lock *lp;
+	u_int32_t locker;
+	void *ptr;
+
+	ptr = SH_DBT_PTR(&op->lockobj);
+	memcpy(&locker, ptr, sizeof(u_int32_t));
+	fprintf(fp, "L %lx", (u_long)locker);
+
+	lp = SH_LIST_FIRST(&op->heldby, __db_lock);
+	if (lp == NULL) {
+		fprintf(fp, "\n");
+		return;
+	}
+	for (; lp != NULL; lp = SH_LIST_NEXT(lp, locker_links, __db_lock))
+		__lock_printlock(lt, lp, 0);
+}
+
+static void
+__lock_dump_object(lt, op, fp)
+	DB_LOCKTAB *lt;
+	DB_LOCKOBJ *op;
+	FILE *fp;
+{
+	struct __db_lock *lp;
+	u_int32_t j;
+	u_int8_t *ptr;
+	u_int ch;
+
+	ptr = SH_DBT_PTR(&op->lockobj);
+	for (j = 0; j < op->lockobj.size; ptr++, j++) {
+		ch = *ptr;
+		fprintf(fp, isprint(ch) ? "%c" : "\\%o", ch);
+	}
+	fprintf(fp, "\n");
+
+	fprintf(fp, "H:");
+	for (lp =
+	    SH_TAILQ_FIRST(&op->holders, __db_lock);
+	    lp != NULL;
+	    lp = SH_TAILQ_NEXT(lp, links, __db_lock))
+		__lock_printlock(lt, lp, 0);
+	lp = SH_TAILQ_FIRST(&op->waiters, __db_lock);
+	if (lp != NULL) {
+		fprintf(fp, "\nW:");
+		for (; lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock))
+			__lock_printlock(lt, lp, 0);
+	}
+}
+
+static const char *
+__lock_dump_status(status)
+	db_status_t status;
+{
+	switch (status) {
+	case DB_LSTAT_ABORTED:
+		return ("aborted");
+	case DB_LSTAT_ERR:
+		return ("err");
+	case DB_LSTAT_FREE:
+		return ("free");
+	case DB_LSTAT_HELD:
+		return ("held");
+	case DB_LSTAT_NOGRANT:
+		return ("nogrant");
+	case DB_LSTAT_PENDING:
+		return ("pending");
+	case DB_LSTAT_WAITING:
+		return ("waiting");
+	}
+	return ("unknown status");
+}
diff --git a/db2/lock/lock_util.c b/db2/lock/lock_util.c
index 6c1e30f27c..7274a50422 100644
--- a/db2/lock/lock_util.c
+++ b/db2/lock/lock_util.c
@@ -1,25 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)lock_util.c	10.5 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)lock_util.c	10.9 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
-#include <fcntl.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
@@ -30,11 +25,13 @@ static const char sccsid[] = "@(#)lock_util.c	10.5 (Sleepycat) 1/8/98";
 #include "lock.h"
 
 /*
- * This function is used to compare a DBT that is about to be entered
- * into a hash table with an object already in the hash table.  Note
- * that it just returns true on equal and 0 on not-equal.  Therefore this
- * cannot be used as a sort function; its purpose is to be used as a
- * hash comparison function.
+ * __lock_cmp --
+ *	This function is used to compare a DBT that is about to be entered
+ *	into a hash table with an object already in the hash table.  Note
+ *	that it just returns true on equal and 0 on not-equal.  Therefore
+ *	this function cannot be used as a sort function; its purpose is to
+ *	be used as a hash comparison function.
+ *
  * PUBLIC: int __lock_cmp __P((const DBT *, DB_LOCKOBJ *));
  */
 int
@@ -46,6 +43,7 @@ __lock_cmp(dbt, lock_obj)
 
 	if (lock_obj->type != DB_LOCK_OBJTYPE)
 		return (0);
+
 	obj_data = SH_DBT_PTR(&lock_obj->lockobj);
 	return (dbt->size == lock_obj->lockobj.size &&
 		memcmp(dbt->data, obj_data, dbt->size) == 0);
@@ -69,35 +67,86 @@ __lock_locker_cmp(locker, lock_obj)
 }
 
 /*
- * PUBLIC: int __lock_ohash __P((const DBT *));
+ * The next two functions are the hash functions used to store objects in the
+ * lock hash table.  They are hashing the same items, but one (__lock_ohash)
+ * takes a DBT (used for hashing a parameter passed from the user) and the
+ * other (__lock_lhash) takes a DB_LOCKOBJ (used for hashing something that is
+ * already in the lock manager).  In both cases, we have a special check to
+ * fast path the case where we think we are doing a hash on a DB page/fileid
+ * pair.  If the size is right, then we do the fast hash.
+ *
+ * We know that DB uses struct __db_ilocks for its lock objects.  The first
+ * four bytes are the 4-byte page number and the next DB_FILE_ID_LEN bytes
+ * are a unique file id, where the first 4 bytes on UNIX systems are the file
+ * inode number, and the first 4 bytes on Windows systems are the FileIndexLow
+ * bytes.  So, we use the XOR of the page number and the first four bytes of
+ * the file id to produce a 32-bit hash value.
+ *
+ * We have no particular reason to believe that this algorithm will produce
+ * a good hash, but we want a fast hash more than we want a good one, when
+ * we're coming through this code path.
  */
-int
-__lock_ohash(dbt)
-	const DBT *dbt;
-{
-	return (__ham_func5(dbt->data, dbt->size));
+#define FAST_HASH(P) {			\
+	u_int32_t __h;			\
+	u_int8_t *__cp, *__hp;		\
+	__hp = (u_int8_t *)&__h;	\
+	__cp = (u_int8_t *)(P);		\
+	__hp[0] = __cp[0] ^ __cp[4];	\
+	__hp[1] = __cp[1] ^ __cp[5];	\
+	__hp[2] = __cp[2] ^ __cp[6];	\
+	__hp[3] = __cp[3] ^ __cp[7];	\
+	return (__h);			\
 }
 
 /*
- * PUBLIC: u_int32_t __lock_locker_hash __P((u_int32_t));
+ * __lock_ohash --
+ *
+ * PUBLIC: u_int32_t __lock_ohash __P((const DBT *));
  */
 u_int32_t
-__lock_locker_hash(locker)
-	u_int32_t locker;
+__lock_ohash(dbt)
+	const DBT *dbt;
 {
-	return (__ham_func5(&locker, sizeof(locker)));
+	if (dbt->size == sizeof(struct __db_ilock))
+		FAST_HASH(dbt->data);
+
+	return (__ham_func5(dbt->data, dbt->size));
 }
 
 /*
+ * __lock_lhash --
+ *
  * PUBLIC: u_int32_t __lock_lhash __P((DB_LOCKOBJ *));
  */
 u_int32_t
 __lock_lhash(lock_obj)
 	DB_LOCKOBJ *lock_obj;
 {
+	u_int32_t tmp;
 	void *obj_data;
 
 	obj_data = SH_DBT_PTR(&lock_obj->lockobj);
+	if (lock_obj->type == DB_LOCK_LOCKER) {
+		memcpy(&tmp, obj_data, sizeof(u_int32_t));
+		return (tmp);
+	}
+
+	if (lock_obj->lockobj.size == sizeof(struct __db_ilock))
+		FAST_HASH(obj_data);
+
 	return (__ham_func5(obj_data, lock_obj->lockobj.size));
 }
 
+/*
+ * __lock_locker_hash --
+ *	Hash function for entering lockers into the hash table.  Since these
+ *	are simply 32-bit unsigned integers, just return the locker value.
+ *
+ * PUBLIC: u_int32_t __lock_locker_hash __P((u_int32_t));
+ */
+u_int32_t
+__lock_locker_hash(locker)
+	u_int32_t locker;
+{
+	return (locker);
+}
diff --git a/db2/log/log.c b/db2/log/log.c
index 8013d42aef..d642c9f9ef 100644
--- a/db2/log/log.c
+++ b/db2/log/log.c
@@ -1,21 +1,19 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log.c	10.39 (Sleepycat) 1/17/98";
+static const char sccsid[] = "@(#)log.c	10.54 (Sleepycat) 5/31/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <fcntl.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
@@ -23,7 +21,6 @@ static const char sccsid[] = "@(#)log.c	10.39 (Sleepycat) 1/17/98";
 
 #include "db_int.h"
 #include "shqueue.h"
-#include "db_shash.h"
 #include "log.h"
 #include "db_dispatch.h"
 #include "txn_auto.h"
@@ -38,15 +35,14 @@ static int __log_recover __P((DB_LOG *));
 int
 log_open(path, flags, mode, dbenv, lpp)
 	const char *path;
-	int flags;
+	u_int32_t flags;
 	int mode;
 	DB_ENV *dbenv;
 	DB_LOG **lpp;
 {
 	DB_LOG *dblp;
 	LOG *lp;
-	size_t len;
-	int fd, newregion, ret, retry_cnt;
+	int ret;
 
 	/* Validate arguments. */
 #ifdef HAVE_SPINLOCKS
@@ -57,22 +53,13 @@ log_open(path, flags, mode, dbenv, lpp)
 	if ((ret = __db_fchk(dbenv, "log_open", flags, OKFLAGS)) != 0)
 		return (ret);
 
-	/*
-	 * We store 4-byte offsets into the file, so the maximum file
-	 * size can't be larger than that.
-	 */
-	if (dbenv != NULL && dbenv->lg_max > UINT32_T_MAX) {
-		__db_err(dbenv, "log_open: maximum file size too large");
-		return (EINVAL);
-	}
-
 	/* Create and initialize the DB_LOG structure. */
 	if ((dblp = (DB_LOG *)__db_calloc(1, sizeof(DB_LOG))) == NULL)
 		return (ENOMEM);
 
 	if (path != NULL && (dblp->dir = __db_strdup(path)) == NULL) {
-		__db_free(dblp);
-		return (ENOMEM);
+		ret = ENOMEM;
+		goto err;
 	}
 
 	dblp->dbenv = dbenv;
@@ -85,102 +72,87 @@ log_open(path, flags, mode, dbenv, lpp)
 	 * file names there.  Make it fairly large so that we don't have to
 	 * grow it.
 	 */
-	len = 30 * 1024;
+#define	DEF_LOG_SIZE	(30 * 1024)
 
 	/* Map in the region. */
-	retry_cnt = newregion = 0;
-retry:	if (LF_ISSET(DB_CREATE)) {
-		ret = __db_rcreate(dbenv, DB_APP_LOG, path,
-		    DB_DEFAULT_LOG_FILE, mode, len, 0, &fd, &dblp->maddr);
-		if (ret == 0) {
-			/* Put the LOG structure first in the region. */
-			lp = dblp->maddr;
-
-			/* Initialize the rest of the region as free space. */
-			dblp->addr = (u_int8_t *)dblp->maddr + sizeof(LOG);
-			__db_shalloc_init(dblp->addr, len - sizeof(LOG));
-
-			/* Initialize the LOG structure. */
-			lp->persist.lg_max = dbenv == NULL ? 0 : dbenv->lg_max;
-			if (lp->persist.lg_max == 0)
-				lp->persist.lg_max = DEFAULT_MAX;
-			lp->persist.magic = DB_LOGMAGIC;
-			lp->persist.version = DB_LOGVERSION;
-			lp->persist.mode = mode;
-			SH_TAILQ_INIT(&lp->fq);
-
-			/* Initialize LOG LSNs. */
-			lp->lsn.file = 1;
-			lp->lsn.offset = 0;
-
-			newregion = 1;
-		} else if (ret != EEXIST)
+	dblp->reginfo.dbenv = dbenv;
+	dblp->reginfo.appname = DB_APP_LOG;
+	if (path == NULL)
+		dblp->reginfo.path = NULL;
+	else
+		if ((dblp->reginfo.path = __db_strdup(path)) == NULL)
 			goto err;
-	}
-
-	/* If we didn't or couldn't create the region, try and join it. */
-	if (!newregion &&
-	    (ret = __db_ropen(dbenv, DB_APP_LOG,
-	    path, DB_DEFAULT_LOG_FILE, 0, &fd, &dblp->maddr)) != 0) {
-		/*
-		 * If we fail because the file isn't available, wait a
-		 * second and try again.
-		 */
-		if (ret == EAGAIN && ++retry_cnt < 3) {
-			(void)__db_sleep(1, 0);
-			goto retry;
-		}
+	dblp->reginfo.file = DB_DEFAULT_LOG_FILE;
+	dblp->reginfo.mode = mode;
+	dblp->reginfo.size = DEF_LOG_SIZE;
+	dblp->reginfo.dbflags = flags;
+	dblp->reginfo.flags = REGION_SIZEDEF;
+	if ((ret = __db_rattach(&dblp->reginfo)) != 0)
 		goto err;
-	}
 
-	/* Set up the common information. */
-	dblp->lp = dblp->maddr;
-	dblp->addr = (u_int8_t *)dblp->maddr + sizeof(LOG);
-	dblp->fd = fd;
+	/*
+	 * The LOG structure is first in the region, the rest of the region
+	 * is free space.
+	 */
+	dblp->lp = dblp->reginfo.addr;
+	dblp->addr = (u_int8_t *)dblp->lp + sizeof(LOG);
+
+	/* Initialize a created region. */
+	if (F_ISSET(&dblp->reginfo, REGION_CREATED)) {
+		__db_shalloc_init(dblp->addr, DEF_LOG_SIZE - sizeof(LOG));
+
+		/* Initialize the LOG structure. */
+		lp = dblp->lp;
+		lp->persist.lg_max = dbenv == NULL ? 0 : dbenv->lg_max;
+		if (lp->persist.lg_max == 0)
+			lp->persist.lg_max = DEFAULT_MAX;
+		lp->persist.magic = DB_LOGMAGIC;
+		lp->persist.version = DB_LOGVERSION;
+		lp->persist.mode = mode;
+		SH_TAILQ_INIT(&lp->fq);
+
+		/* Initialize LOG LSNs. */
+		lp->lsn.file = 1;
+		lp->lsn.offset = 0;
+	}
 
-	/* Initialize thread information. */
+	/* Initialize thread information, mutex. */
 	if (LF_ISSET(DB_THREAD)) {
 		F_SET(dblp, DB_AM_THREAD);
-
-		if (!newregion)
-			LOCK_LOGREGION(dblp);
 		if ((ret = __db_shalloc(dblp->addr,
-		    sizeof(db_mutex_t), MUTEX_ALIGNMENT, &dblp->mutexp)) == 0)
-			(void)__db_mutex_init(dblp->mutexp, -1);
-		if (!newregion)
-			UNLOCK_LOGREGION(dblp);
-		if (ret != 0) {
-			(void)log_close(dblp);
-			if (newregion)
-				(void)log_unlink(path, 1, dbenv);
-			return (ret);
-		}
+		    sizeof(db_mutex_t), MUTEX_ALIGNMENT, &dblp->mutexp)) != 0)
+			goto err;
+		(void)__db_mutex_init(dblp->mutexp, -1);
 	}
 
 	/*
-	 * If doing recovery, try and recover any previous log files
-	 * before releasing the lock.
+	 * If doing recovery, try and recover any previous log files before
+	 * releasing the lock.
 	 */
-	if (newregion) {
-		ret = __log_recover(dblp);
-		UNLOCK_LOGREGION(dblp);
+	if (F_ISSET(&dblp->reginfo, REGION_CREATED) &&
+	    (ret = __log_recover(dblp)) != 0)
+		goto err;
 
-		if (ret != 0) {
-			(void)log_close(dblp);
-			(void)log_unlink(path, 1, dbenv);
-			return (ret);
-		}
-	}
+	UNLOCK_LOGREGION(dblp);
 	*lpp = dblp;
 	return (0);
 
-err:	/*
-	 * We never get here with an allocated thread-mutex, so we do
-	 * not have to worry about freeing it.
-	 */
-	FREE(dblp, sizeof(DB_LOG));
-	return (ret);
+err:	if (dblp->reginfo.addr != NULL) {
+		if (dblp->mutexp != NULL)
+			__db_shalloc_free(dblp->addr, dblp->mutexp);
+
+		UNLOCK_LOGREGION(dblp);
+		(void)__db_rdetach(&dblp->reginfo);
+		if (F_ISSET(&dblp->reginfo, REGION_CREATED))
+			(void)log_unlink(path, 1, dbenv);
+	}
 
+	if (dblp->reginfo.path != NULL)
+		FREES(dblp->reginfo.path);
+	if (dblp->dir != NULL)
+		FREES(dblp->dir);
+	FREE(dblp, sizeof(*dblp));
+	return (ret);
 }
 
 /*
@@ -234,7 +206,7 @@ __log_recover(dblp)
 			continue;
 		memcpy(&chk, dbt.data, sizeof(u_int32_t));
 		if (chk == DB_txn_ckp) {
-			lp->c_lsn = lsn;
+			lp->chkpt_lsn = lsn;
 			found_checkpoint = 1;
 		}
 	}
@@ -273,7 +245,7 @@ __log_recover(dblp)
 				continue;
 			memcpy(&chk, dbt.data, sizeof(u_int32_t));
 			if (chk == DB_txn_ckp) {
-				lp->c_lsn = lsn;
+				lp->chkpt_lsn = lsn;
 				found_checkpoint = 1;
 			}
 		}
@@ -281,7 +253,7 @@ __log_recover(dblp)
 
 	/* If we never find a checkpoint, that's okay, just 0 it out. */
 	if (!found_checkpoint)
-		ZERO_LSN(lp->c_lsn);
+		ZERO_LSN(lp->chkpt_lsn);
 
 	__db_err(dblp->dbenv,
 	    "Recovering the log: last valid LSN: file: %lu offset %lu",
@@ -380,7 +352,7 @@ __log_valid(dblp, lp, cnt)
 	if ((ret = __db_open(p,
 	    DB_RDONLY | DB_SEQUENTIAL,
 	    DB_RDONLY | DB_SEQUENTIAL, 0, &fd)) != 0 ||
-	    (ret = __db_seek(fd, 0, 0, sizeof(HDR), SEEK_SET)) != 0 ||
+	    (ret = __db_seek(fd, 0, 0, sizeof(HDR), 0, SEEK_SET)) != 0 ||
 	    (ret = __db_read(fd, &persist, sizeof(LOGP), &nw)) != 0 ||
 	    nw != sizeof(LOGP)) {
 		if (ret == 0)
@@ -429,8 +401,6 @@ log_close(dblp)
 {
 	int ret, t_ret;
 
-	ret = 0;
-
 	/* Discard the per-thread pointer. */
 	if (dblp->mutexp != NULL) {
 		LOCK_LOGREGION(dblp);
@@ -439,9 +409,7 @@ log_close(dblp)
 	}
 
 	/* Close the region. */
-	if ((t_ret =
-	    __db_rclose(dblp->dbenv, dblp->fd, dblp->maddr)) != 0 && ret == 0)
-		ret = t_ret;
+	ret = __db_rdetach(&dblp->reginfo);
 
 	/* Close open files, release allocated memory. */
 	if (dblp->lfd != -1 && (t_ret = __db_close(dblp->lfd)) != 0 && ret == 0)
@@ -456,8 +424,9 @@ log_close(dblp)
 	if (dblp->dir != NULL)
 		FREES(dblp->dir);
 
-	/* Free the structure. */
-	FREE(dblp, sizeof(DB_LOG));
+	if (dblp->reginfo.path != NULL)
+		FREES(dblp->reginfo.path);
+	FREE(dblp, sizeof(*dblp));
 
 	return (ret);
 }
@@ -472,8 +441,19 @@ log_unlink(path, force, dbenv)
 	int force;
 	DB_ENV *dbenv;
 {
-	return (__db_runlink(dbenv,
-	    DB_APP_LOG, path, DB_DEFAULT_LOG_FILE, force));
+	REGINFO reginfo;
+	int ret;
+
+	memset(&reginfo, 0, sizeof(reginfo));
+	reginfo.dbenv = dbenv;
+	reginfo.appname = DB_APP_LOG;
+	if (path != NULL && (reginfo.path = __db_strdup(path)) == NULL)
+		return (ENOMEM);
+	reginfo.file = DB_DEFAULT_LOG_FILE;
+	ret = __db_runlink(&reginfo, force);
+	if (reginfo.path != NULL)
+		FREES(reginfo.path);
+	return (ret);
 }
 
 /*
@@ -511,6 +491,9 @@ log_stat(dblp, gspp, db_malloc)
 	(*gspp)->st_cur_file = lp->lsn.file;
 	(*gspp)->st_cur_offset = lp->lsn.offset;
 
+	(*gspp)->st_refcnt = lp->rlayout.refcnt;
+	(*gspp)->st_regsize = lp->rlayout.size;
+
 	UNLOCK_LOGREGION(dblp);
 
 	return (0);
diff --git a/db2/log/log.src b/db2/log/log.src
index f3d9f32b2d..12883bd1e3 100644
--- a/db2/log/log.src
+++ b/db2/log/log.src
@@ -1,43 +1,12 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)log.src	10.4 (Sleepycat) 1/17/98
- *
- * This is the source file used to create the logging functions for the
- * log package.  Each access method (or set of routines wishing to register
- * record types with the transaction system) should have a file like this.
- * Each type of log record and its parameters is defined.  The basic
- * format of a record definition is:
- *
- * BEGIN	<RECORD_TYPE>
- * ARG|STRING|POINTER	<variable name>	<variable type> <printf format>
- * ...
- * END
- * ARG the argument is a simple parameter of the type *	specified.
- * DBT the argument is a DBT (db.h) containing a length and pointer.
- * PTR the argument is a pointer to the data type specified; the entire
- *     type should be logged.
- *
- * There are a set of shell scripts of the form xxx.sh that generate c
- * code and or h files to process these.  (This is probably better done
- * in a single PERL script, but for now, this works.)
- *
- * The DB recovery system requires the following three fields appear in
- * every record, and will assign them to the per-record-type structures
- * as well as making them the first parameters to the appropriate logging
- * call.
- * rectype:	record-type, identifies the structure and log/read call
- * txnid:	transaction id, a DBT in this implementation
- * prev:	the last LSN for this transaction
+ *	@(#)log.src	10.5 (Sleepycat) 4/10/98
  */
 
-/*
- * Use the argument of PREFIX as the prefix for all record types,
- * routines, id numbers, etc.
- */
 PREFIX	log
 
 /* Used for registering name/id translations at open or close. */
diff --git a/db2/log/log_archive.c b/db2/log/log_archive.c
index 91ae5abe0b..7db0cc3e36 100644
--- a/db2/log/log_archive.c
+++ b/db2/log/log_archive.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_archive.c	10.30 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)log_archive.c	10.37 (Sleepycat) 5/3/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -24,8 +24,8 @@ static const char sccsid[] = "@(#)log_archive.c	10.30 (Sleepycat) 1/8/98";
 #include "db_dispatch.h"
 #include "shqueue.h"
 #include "log.h"
-#include "clib_ext.h"
 #include "common_ext.h"
+#include "clib_ext.h"			/* XXX: needed for getcwd. */
 
 static int __absname __P((char *, char *, char **));
 static int __build_data __P((DB_LOG *, char *, char ***, void *(*)(size_t)));
@@ -40,7 +40,7 @@ int
 log_archive(dblp, listp, flags, db_malloc)
 	DB_LOG *dblp;
 	char ***listp;
-	int flags;
+	u_int32_t flags;
 	void *(*db_malloc) __P((size_t));
 {
 	DBT rec;
@@ -89,6 +89,11 @@ log_archive(dblp, listp, flags, db_malloc)
 		break;
 	case 0:
 		if ((ret = __log_findckp(dblp, &stable_lsn)) != 0) {
+			/*
+			 * A return of DB_NOTFOUND means that we didn't find
+			 * any records in the log (so we are not going to be
+			 * deleting any log files).
+			 */
 			if (ret != DB_NOTFOUND)
 				return (ret);
 			*listp = NULL;
@@ -269,7 +274,7 @@ lg_free:		if (F_ISSET(&rec, DB_DBT_MALLOC) && rec.data != NULL)
 
 		/* Get the real name. */
 		if ((ret = __db_appname(dblp->dbenv,
-		    DB_APP_DATA, NULL, array[last], NULL, &real_name)) != 0)
+		    DB_APP_DATA, NULL, array[last], 0, NULL, &real_name)) != 0)
 			goto err2;
 
 		/* If the file doesn't exist, ignore it. */
@@ -335,21 +340,25 @@ __absname(pref, name, newnamep)
 	char *pref, *name, **newnamep;
 {
 	size_t l_pref, l_name;
+	int isabspath;
 	char *newname;
 
-	l_pref = strlen(pref);
 	l_name = strlen(name);
+	isabspath = __db_abspath(name);
+	l_pref = isabspath ? 0 : strlen(pref);
 
 	/* Malloc space for concatenating the two. */
-	if ((newname = (char *)__db_malloc(l_pref + l_name + 2)) == NULL)
+	if ((*newnamep =
+	    newname = (char *)__db_malloc(l_pref + l_name + 2)) == NULL)
 		return (ENOMEM);
 
-	/* Build the name. */
-	memcpy(newname, pref, l_pref);
-	if (strchr(PATH_SEPARATOR, newname[l_pref - 1]) == NULL)
-		newname[l_pref++] = PATH_SEPARATOR[0];
+	/* Build the name.  If `name' is an absolute path, ignore any prefix. */
+	if (!isabspath) {
+		memcpy(newname, pref, l_pref);
+		if (strchr(PATH_SEPARATOR, newname[l_pref - 1]) == NULL)
+			newname[l_pref++] = PATH_SEPARATOR[0];
+	}
 	memcpy(newname + l_pref, name, l_name + 1);
-	*newnamep = newname;
 
 	return (0);
 }
@@ -409,5 +418,5 @@ static int
 __cmpfunc(p1, p2)
 	const void *p1, *p2;
 {
-	return (strcmp(*((char **)p1), *((char **)p2)));
+	return (strcmp(*((char * const *)p1), *((char * const *)p2)));
 }
diff --git a/db2/log/log_auto.c b/db2/log/log_auto.c
index 2fe17834c3..b17b1ffb2f 100644
--- a/db2/log/log_auto.c
+++ b/db2/log/log_auto.c
@@ -15,8 +15,6 @@
 #include "db_dispatch.h"
 #include "log.h"
 #include "db_am.h"
-#include "common_ext.h"
-
 /*
  * PUBLIC: int __log_register_log
  * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
@@ -92,7 +90,7 @@ int __log_register_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(id);
 	memcpy(bp, &ftype, sizeof(ftype));
 	bp += sizeof(ftype);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -108,22 +106,23 @@ int __log_register_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__log_register_print(notused1, dbtp, lsnp, notused3, notused4)
+__log_register_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__log_register_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __log_register_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -137,20 +136,20 @@ __log_register_print(notused1, dbtp, lsnp, notused3, notused4)
 	printf("\topcode: %lu\n", (u_long)argp->opcode);
 	printf("\tname: ");
 	for (i = 0; i < argp->name.size; i++) {
-		c = ((char *)argp->name.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->name.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tuid: ");
 	for (i = 0; i < argp->uid.size; i++) {
-		c = ((char *)argp->uid.data)[i];
-		if (isprint(c) || c == 0xa)
-			putchar(c);
+		ch = ((u_int8_t *)argp->uid.data)[i];
+		if (isprint(ch) || ch == 0xa)
+			putchar(ch);
 		else
-			printf("%#x ", c);
+			printf("%#x ", ch);
 	}
 	printf("\n");
 	printf("\tid: %lu\n", (u_long)argp->id);
diff --git a/db2/log/log_compare.c b/db2/log/log_compare.c
index 601b25c626..320b34af4d 100644
--- a/db2/log/log_compare.c
+++ b/db2/log/log_compare.c
@@ -1,13 +1,13 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_compare.c	10.2 (Sleepycat) 6/21/97";
+static const char sccsid[] = "@(#)log_compare.c	10.3 (Sleepycat) 4/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
diff --git a/db2/log/log_findckp.c b/db2/log/log_findckp.c
index 115a00e8aa..82bd5890e6 100644
--- a/db2/log/log_findckp.c
+++ b/db2/log/log_findckp.c
@@ -1,21 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_findckp.c	10.12 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)log_findckp.c	10.15 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -44,10 +43,10 @@ static const char sccsid[] = "@(#)log_findckp.c	10.12 (Sleepycat) 10/25/97";
  * We find one at 500.  This means that we can truncate the log before
  * 500 or run recovery beginning at 500.
  *
- * Returns 0 if we find a checkpoint.
+ * Returns 0 if we find a suitable checkpoint or we retrieved the
+ * first record in the log from which to start.
+ * Returns DB_NOTFOUND if there are no log records.
  * Returns errno on error.
- * Returns DB_NOTFOUND if we could not find a suitable start point and
- * we should start from the beginning.
  *
  * PUBLIC: int __log_findckp __P((DB_LOG *, DB_LSN *));
  */
@@ -70,9 +69,12 @@ __log_findckp(lp, lsnp)
 	memset(&data, 0, sizeof(data));
 	if (F_ISSET(lp, DB_AM_THREAD))
 		F_SET(&data, DB_DBT_MALLOC);
-	if ((ret = log_get(lp, &last_ckp, &data, DB_CHECKPOINT)) != 0)
-		return (ret == ENOENT ? DB_NOTFOUND : ret);
 	ZERO_LSN(ckp_lsn);
+	if ((ret = log_get(lp, &last_ckp, &data, DB_CHECKPOINT)) != 0)
+		if (ret == ENOENT)
+			goto get_first;
+		else
+			return (ret);
 
 	next_lsn = last_ckp;
 	do {
@@ -115,16 +117,12 @@ __log_findckp(lp, lsnp)
 	 * beginning of the log.
 	 */
 	if (log_compare(&last_ckp, &ckp_lsn) > 0) {
-		if ((ret = log_get(lp, &last_ckp, &data, DB_FIRST)) != 0)
+get_first:	if ((ret = log_get(lp, &last_ckp, &data, DB_FIRST)) != 0)
 			return (ret);
 		if (F_ISSET(lp, DB_AM_THREAD))
 			__db_free(data.data);
 	}
 	*lsnp = last_ckp;
 
-	if (verbose)
-		__db_err(lp->dbenv, "Rolling forward from [%lu][%lu]",
-			(u_long)last_ckp.file, (u_long)last_ckp.offset);
-
 	return (IS_ZERO_LSN(last_ckp) ? DB_NOTFOUND : 0);
 }
diff --git a/db2/log/log_get.c b/db2/log/log_get.c
index ab6f6247cb..9a055de0a6 100644
--- a/db2/log/log_get.c
+++ b/db2/log/log_get.c
@@ -1,21 +1,19 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_get.c	10.24 (Sleepycat) 1/17/98";
+static const char sccsid[] = "@(#)log_get.c	10.32 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <fcntl.h>
-#include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #endif
@@ -36,9 +34,8 @@ log_get(dblp, alsn, dbt, flags)
 	DB_LOG *dblp;
 	DB_LSN *alsn;
 	DBT *dbt;
-	int flags;
+	u_int32_t flags;
 {
-	LOG *lp;
 	int ret;
 
 	/* Validate arguments. */
@@ -66,8 +63,6 @@ log_get(dblp, alsn, dbt, flags)
 			return (__db_ferr(dblp->dbenv, "threaded data", 1));
 	}
 
-	lp = dblp->lp;
-
 	LOCK_LOGREGION(dblp);
 
 	/*
@@ -97,14 +92,15 @@ log_get(dblp, alsn, dbt, flags)
  * __log_get --
  *	Get a log record; internal version.
  *
- * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, int, int));
+ * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int));
  */
 int
 __log_get(dblp, alsn, dbt, flags, silent)
 	DB_LOG *dblp;
 	DB_LSN *alsn;
 	DBT *dbt;
-	int flags, silent;
+	u_int32_t flags;
+	int silent;
 {
 	DB_LSN nlsn;
 	HDR hdr;
@@ -122,7 +118,7 @@ __log_get(dblp, alsn, dbt, flags, silent)
 	nlsn = dblp->c_lsn;
 	switch (flags) {
 	case DB_CHECKPOINT:
-		nlsn = lp->c_lsn;
+		nlsn = lp->chkpt_lsn;
 		if (IS_ZERO_LSN(nlsn)) {
 			__db_err(dblp->dbenv,
 	"log_get: unable to find checkpoint record: no checkpoint set.");
@@ -219,7 +215,8 @@ retry:
 	}
 
 	/* Seek to the header offset and read the header. */
-	if ((ret = __db_seek(dblp->c_fd, 0, 0, nlsn.offset, SEEK_SET)) != 0) {
+	if ((ret =
+	    __db_seek(dblp->c_fd, 0, 0, nlsn.offset, 0, SEEK_SET)) != 0) {
 		fail = "seek";
 		goto err1;
 	}
@@ -272,7 +269,13 @@ retry:
 		goto cksum;
 	}
 
-	/* Allocate temporary memory to hold the record. */
+	/*
+	 * Allocate temporary memory to hold the record.
+	 *
+	 * XXX
+	 * We're calling malloc(3) with a region locked.  This isn't
+	 * a good idea.
+	 */
 	if ((tbuf = (char *)__db_malloc(len)) == NULL) {
 		ret = ENOMEM;
 		goto err1;
diff --git a/db2/log/log_put.c b/db2/log/log_put.c
index 65a3990799..d00e7dde21 100644
--- a/db2/log/log_put.c
+++ b/db2/log/log_put.c
@@ -1,21 +1,19 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_put.c	10.24 (Sleepycat) 1/17/98";
+static const char sccsid[] = "@(#)log_put.c	10.35 (Sleepycat) 5/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <fcntl.h>
-#include <stdlib.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
@@ -43,18 +41,19 @@ log_put(dblp, lsn, dbt, flags)
 	DB_LOG *dblp;
 	DB_LSN *lsn;
 	const DBT *dbt;
-	int flags;
+	u_int32_t flags;
 {
 	int ret;
 
 	/* Validate arguments. */
-#define	OKFLAGS	(DB_CHECKPOINT | DB_FLUSH)
+#define	OKFLAGS	(DB_CHECKPOINT | DB_FLUSH | DB_CURLSN)
 	if (flags != 0) {
 		if ((ret =
 		    __db_fchk(dblp->dbenv, "log_put", flags, OKFLAGS)) != 0)
 			return (ret);
 		switch (flags) {
 		case DB_CHECKPOINT:
+		case DB_CURLSN:
 		case DB_FLUSH:
 		case 0:
 			break;
@@ -73,14 +72,14 @@ log_put(dblp, lsn, dbt, flags)
  * __log_put --
  *	Write a log record; internal version.
  *
- * PUBLIC: int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
+ * PUBLIC: int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
  */
 int
 __log_put(dblp, lsn, dbt, flags)
 	DB_LOG *dblp;
 	DB_LSN *lsn;
 	const DBT *dbt;
-	int flags;
+	u_int32_t flags;
 {
 	DBT fid_dbt, t;
 	DB_LSN r_unused;
@@ -91,6 +90,17 @@ __log_put(dblp, lsn, dbt, flags)
 
 	lp = dblp->lp;
 
+	/*
+	 * If the application just wants to know where we are, fill in
+	 * the information.  Currently used by the transaction manager
+	 * to avoid writing TXN_begin records.
+	 */
+	if (LF_ISSET(DB_CURLSN)) {
+		lsn->file = lp->lsn.file;
+		lsn->offset = lp->lsn.offset;
+		return (0);
+	}
+
 	/* If this information won't fit in the file, swap files. */
 	if (lp->lsn.offset + sizeof(HDR) + dbt->size > lp->persist.lg_max) {
 		if (sizeof(HDR) +
@@ -151,7 +161,7 @@ __log_put(dblp, lsn, dbt, flags)
 	 *	Append the set of file name information into the log.
 	 */
 	if (flags == DB_CHECKPOINT) {
-		lp->c_lsn = *lsn;
+		lp->chkpt_lsn = *lsn;
 
 		for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
 		    fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
@@ -159,7 +169,7 @@ __log_put(dblp, lsn, dbt, flags)
 			t.data = R_ADDR(dblp, fnp->name_off);
 			t.size = strlen(t.data) + 1;
 			memset(&fid_dbt, 0, sizeof(fid_dbt));
-			fid_dbt.data = R_ADDR(dblp, fnp->fileid_off);
+			fid_dbt.data = fnp->ufid;
 			fid_dbt.size = DB_FILE_ID_LEN;
 			if ((ret = __log_register_log(dblp, NULL, &r_unused, 0,
 			    LOG_CHECKPOINT, &t, &fid_dbt, fnp->id, fnp->s_type))
@@ -324,7 +334,11 @@ __log_flush(dblp, lsn)
 	 */
 	lp->s_lsn = lp->f_lsn;
 	if (!current)
-		--lp->s_lsn.offset;
+		if (lp->s_lsn.offset == 0) {
+			--lp->s_lsn.file;
+			lp->s_lsn.offset = lp->persist.lg_max;
+		} else
+			--lp->s_lsn.offset;
 
 	return (0);
 }
@@ -416,7 +430,7 @@ __log_write(dblp, addr, len)
 	 * Seek to the offset in the file (someone may have written it
 	 * since we last did).
 	 */
-	if ((ret = __db_seek(dblp->lfd, 0, 0, lp->w_off, SEEK_SET)) != 0)
+	if ((ret = __db_seek(dblp->lfd, 0, 0, lp->w_off, 0, SEEK_SET)) != 0)
 		return (ret);
 	if ((ret = __db_write(dblp->lfd, addr, len, &nw)) != 0)
 		return (ret);
@@ -461,7 +475,7 @@ log_file(dblp, lsn, namep, len)
 		return (ret);
 
 	/* Check to make sure there's enough room and copy the name. */
-	if (len < strlen(p)) {
+	if (len < strlen(p) + 1) {
 		*namep = '\0';
 		return (ENOMEM);
 	}
@@ -518,5 +532,5 @@ __log_name(dblp, filenumber, namep)
 
 	(void)snprintf(name, sizeof(name), LFNAME, filenumber);
 	return (__db_appname(dblp->dbenv,
-	    DB_APP_LOG, dblp->dir, name, NULL, namep));
+	    DB_APP_LOG, dblp->dir, name, 0, NULL, namep));
 }
diff --git a/db2/log/log_rec.c b/db2/log/log_rec.c
index 69334f8bc8..5deac46298 100644
--- a/db2/log/log_rec.c
+++ b/db2/log/log_rec.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -40,16 +40,13 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_rec.c	10.16 (Sleepycat) 1/17/98";
+static const char sccsid[] = "@(#)log_rec.c	10.20 (Sleepycat) 4/28/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <fcntl.h>
-#include <stddef.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -90,7 +87,7 @@ __log_register_recover(logp, dbtp, lsnp, redo, info)
 
 	if ((argp->opcode == LOG_CHECKPOINT && redo == TXN_OPENFILES) ||
 	    (argp->opcode == LOG_OPEN &&
-	    (redo == TXN_REDO || redo == TXN_OPENFILES || 
+	    (redo == TXN_REDO || redo == TXN_OPENFILES ||
 	     redo == TXN_FORWARD_ROLL)) ||
 	    (argp->opcode == LOG_CLOSE &&
 	    (redo == TXN_UNDO || redo == TXN_BACKWARD_ROLL))) {
@@ -121,6 +118,7 @@ __log_register_recover(logp, dbtp, lsnp, redo, info)
 			if (!logp->dbentry[argp->id].deleted)
 				ret = EINVAL;
 		} else if (--logp->dbentry[argp->id].refcount == 0) {
+			F_SET(logp->dbentry[argp->id].dbp, DB_AM_RECOVER);
 			ret = logp->dbentry[argp->id].dbp->close(
 			    logp->dbentry[argp->id].dbp, 0);
 			logp->dbentry[argp->id].dbp = NULL;
diff --git a/db2/log/log_register.c b/db2/log/log_register.c
index 9907d6e25a..a6fc4c1b3b 100644
--- a/db2/log/log_register.c
+++ b/db2/log/log_register.c
@@ -1,20 +1,19 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_register.c	10.14 (Sleepycat) 1/19/98";
+static const char sccsid[] = "@(#)log_register.c	10.18 (Sleepycat) 5/3/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -42,12 +41,12 @@ log_register(dblp, dbp, name, type, idp)
 	u_int32_t fid;
 	int inserted, ret;
 	char *fullname;
-	void *fidp, *namep;
+	void *namep;
 
 	fid = 0;
 	inserted = 0;
 	fullname = NULL;
-	fnp = fidp = namep = NULL;
+	fnp = namep = NULL;
 
 	/* Check the arguments. */
 	if (type != DB_BTREE && type != DB_HASH && type != DB_RECNO) {
@@ -57,7 +56,7 @@ log_register(dblp, dbp, name, type, idp)
 
 	/* Get the log file id. */
 	if ((ret = __db_appname(dblp->dbenv,
-	    DB_APP_DATA, NULL, name, NULL, &fullname)) != 0)
+	    DB_APP_DATA, NULL, name, 0, NULL, &fullname)) != 0)
 		return (ret);
 
 	LOCK_LOGREGION(dblp);
@@ -70,8 +69,7 @@ log_register(dblp, dbp, name, type, idp)
 	    fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
 		if (fid <= fnp->id)
 			fid = fnp->id + 1;
-		if (!memcmp(dbp->lock.fileid,
-		    R_ADDR(dblp, fnp->fileid_off), DB_FILE_ID_LEN)) {
+		if (!memcmp(dbp->lock.fileid, fnp->ufid, DB_FILE_ID_LEN)) {
 			++fnp->ref;
 			fid = fnp->id;
 			goto found;
@@ -84,15 +82,7 @@ log_register(dblp, dbp, name, type, idp)
 	fnp->ref = 1;
 	fnp->id = fid;
 	fnp->s_type = type;
-
-	if ((ret = __db_shalloc(dblp->addr, DB_FILE_ID_LEN, 0, &fidp)) != 0)
-		goto err;
-	/*
-	 * XXX Now that uids are fixed size, we can put them in the fnp
-	 * structure.
-	 */
-	fnp->fileid_off = R_OFFSET(dblp, fidp);
-	memcpy(fidp, dbp->lock.fileid, DB_FILE_ID_LEN);
+	memcpy(fnp->ufid, dbp->lock.fileid, DB_FILE_ID_LEN);
 
 	len = strlen(name) + 1;
 	if ((ret = __db_shalloc(dblp->addr, len, 0, &namep)) != 0)
@@ -126,8 +116,6 @@ err:		/*
 			SH_TAILQ_REMOVE(&dblp->lp->fq, fnp, q, __fname);
 		if (namep != NULL)
 			__db_shalloc_free(dblp->addr, namep);
-		if (fidp != NULL)
-			__db_shalloc_free(dblp->addr, fidp);
 		if (fnp != NULL)
 			__db_shalloc_free(dblp->addr, fnp);
 	}
@@ -176,7 +164,7 @@ log_unregister(dblp, fid)
 		r_name.data = R_ADDR(dblp, fnp->name_off);
 		r_name.size = strlen(r_name.data) + 1;
 		memset(&fid_dbt, 0, sizeof(fid_dbt));
-		fid_dbt.data =  R_ADDR(dblp, fnp->fileid_off);
+		fid_dbt.data = fnp->ufid;
 		fid_dbt.size = DB_FILE_ID_LEN;
 		if ((ret = __log_register_log(dblp, NULL, &r_unused,
 		    0, LOG_CLOSE, &r_name, &fid_dbt, fid, fnp->s_type)) != 0)
@@ -190,7 +178,6 @@ log_unregister(dblp, fid)
 	if (fnp->ref > 1)
 		--fnp->ref;
 	else {
-		__db_shalloc_free(dblp->addr, R_ADDR(dblp, fnp->fileid_off));
 		__db_shalloc_free(dblp->addr, R_ADDR(dblp, fnp->name_off));
 		SH_TAILQ_REMOVE(&dblp->lp->fq, fnp, q, __fname);
 		__db_shalloc_free(dblp->addr, fnp);
diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c
index c23abdda24..d89f9c2ded 100644
--- a/db2/mp/mp_bh.c
+++ b/db2/mp/mp_bh.c
@@ -1,13 +1,13 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_bh.c	10.28 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)mp_bh.c	10.38 (Sleepycat) 5/20/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -59,8 +59,10 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
 	    dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q))
 		if (dbmfp->mfp == mfp) {
 			if (F_ISSET(dbmfp, MP_READONLY) &&
-			    __memp_upgrade(dbmp, dbmfp, mfp))
+			    __memp_upgrade(dbmp, dbmfp, mfp)) {
+				UNLOCKHANDLE(dbmp, dbmp->mutexp);
 				return (0);
+			}
 			break;
 		}
 	UNLOCKHANDLE(dbmp, dbmp->mutexp);
@@ -111,8 +113,8 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
 	if (F_ISSET(mfp, MP_TEMP))
 		return (0);
 
-	if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off), mfp->ftype,
-	    0, 0, mfp->stat.st_pagesize, 0, NULL, NULL, 0, &dbmfp) != 0)
+	if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off),
+	    0, 0, mfp->stat.st_pagesize, 0, NULL, &dbmfp) != 0)
 		return (0);
 
 found:	return (__memp_pgwrite(dbmfp, bhp, restartp, wrotep));
@@ -152,7 +154,7 @@ __memp_pgread(dbmfp, bhp, can_create)
 	ret = 0;
 	LOCKHANDLE(dbmp, dbmfp->mutexp);
 	if (dbmfp->fd == -1 || (ret =
-	    __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) {
+	    __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, 0, SEEK_SET)) != 0) {
 		if (!can_create) {
 			if (dbmfp->fd == -1)
 				ret = EINVAL;
@@ -164,8 +166,17 @@ __memp_pgread(dbmfp, bhp, can_create)
 		}
 		UNLOCKHANDLE(dbmp, dbmfp->mutexp);
 
-		/* Clear any uninitialized data. */
-		memset(bhp->buf, 0, pagesize);
+		/* Clear the created page. */
+		if (mfp->clear_len == 0)
+			memset(bhp->buf, 0, pagesize);
+		else {
+			memset(bhp->buf, 0, mfp->clear_len);
+#ifdef DIAGNOSTIC
+			memset(bhp->buf + mfp->clear_len,
+			    0xff, pagesize - mfp->clear_len);
+#endif
+		}
+
 		goto pgin;
 	}
 
@@ -186,8 +197,16 @@ __memp_pgread(dbmfp, bhp, can_create)
 			goto err;
 		}
 
-		/* Clear any uninitialized data. */
-		memset(bhp->buf + nr, 0, pagesize - nr);
+		/*
+		 * If we didn't fail until we tried the read, don't clear the
+		 * whole page, it wouldn't be insane for a filesystem to just
+		 * always behave that way.  Else, clear any uninitialized data.
+		 */
+		if (nr == 0)
+			memset(bhp->buf, 0,
+			    mfp->clear_len == 0 ? pagesize : mfp->clear_len);
+		else
+			memset(bhp->buf + nr, 0, pagesize - nr);
 	}
 
 	/* Call any pgin function. */
@@ -308,31 +327,31 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 
 	/* Temporary files may not yet have been created. */
 	LOCKHANDLE(dbmp, dbmfp->mutexp);
-	if (dbmfp->fd == -1)
-		if ((ret = __db_appname(dbenv, DB_APP_TMP,
-		    NULL, NULL, &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1) {
-			UNLOCKHANDLE(dbmp, dbmfp->mutexp);
-			__db_err(dbenv,
-			    "unable to create temporary backing file");
-			goto err;
-		}
+	if (dbmfp->fd == -1 &&
+	    ((ret = __db_appname(dbenv, DB_APP_TMP, NULL, NULL,
+	    DB_CREATE | DB_EXCL | DB_TEMPORARY, &dbmfp->fd, NULL)) != 0 ||
+	    dbmfp->fd == -1)) {
+		UNLOCKHANDLE(dbmp, dbmfp->mutexp);
+		__db_err(dbenv, "unable to create temporary backing file");
+		goto err;
+	}
 
-	/* Write the page out. */
-	if ((ret = __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0)
+	/*
+	 * Write the page out.
+	 *
+	 * XXX
+	 * Shut the compiler up; it doesn't understand the correlation between
+	 * the failing clauses to __db_lseek and __db_write and this ret != 0.
+	 */
+	COMPQUIET(fail, NULL);
+	if ((ret =
+	    __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, 0, SEEK_SET)) != 0)
 		fail = "seek";
 	else if ((ret = __db_write(dbmfp->fd, bhp->buf, pagesize, &nw)) != 0)
 		fail = "write";
 	UNLOCKHANDLE(dbmp, dbmfp->mutexp);
-	if (ret != 0) {
-		/*
-		 * XXX
-		 * Shut the compiler up; it doesn't understand the correlation
-		 * between the failing clauses to __db_lseek and __db_write and
-		 * this ret != 0.
-		 */
-		COMPQUIET(fail, NULL);
+	if (ret != 0)
 		goto syserr;
-	}
 
 	if (nw != (ssize_t)pagesize) {
 		ret = EIO;
@@ -548,7 +567,7 @@ __memp_upgrade(dbmp, dbmfp, mfp)
 	 * way we could have gotten a file descriptor of any kind.
 	 */
 	if ((ret = __db_appname(dbmp->dbenv, DB_APP_DATA,
-	    NULL, R_ADDR(dbmp, mfp->path_off), NULL, &rpath)) != 0)
+	    NULL, R_ADDR(dbmp, mfp->path_off), 0, NULL, &rpath)) != 0)
 		return (ret);
 	if (__db_open(rpath, 0, 0, 0, &fd) != 0) {
 		F_SET(dbmfp, MP_UPGRADE_FAIL);
diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c
index f5955c4c6f..c8ae2e9d98 100644
--- a/db2/mp/mp_fget.c
+++ b/db2/mp/mp_fget.c
@@ -1,21 +1,19 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fget.c	10.33 (Sleepycat) 12/2/97";
+static const char sccsid[] = "@(#)mp_fget.c	10.48 (Sleepycat) 6/2/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -25,8 +23,6 @@ static const char sccsid[] = "@(#)mp_fget.c	10.33 (Sleepycat) 12/2/97";
 #include "mp.h"
 #include "common_ext.h"
 
-int __sleep_on_every_page_get;		/* XXX: thread debugging option. */
-
 /*
  * memp_fget --
  *	Get a page from the file.
@@ -35,7 +31,7 @@ int
 memp_fget(dbmfp, pgnoaddr, flags, addrp)
 	DB_MPOOLFILE *dbmfp;
 	db_pgno_t *pgnoaddr;
-	int flags;
+	u_int32_t flags;
 	void *addrp;
 {
 	BH *bhp;
@@ -43,11 +39,12 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 	MPOOL *mp;
 	MPOOLFILE *mfp;
 	size_t bucket, mf_offset;
-	u_long cnt;
-	int b_incr, b_inserted, readonly_alloc, ret;
-	void *addr;
+	u_int32_t st_hsearch;
+	int b_incr, first, ret;
 
 	dbmp = dbmfp->dbmp;
+	mp = dbmp->mp;
+	mfp = dbmfp->mfp;
 
 	/*
 	 * Validate arguments.
@@ -79,32 +76,62 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 		}
 	}
 
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	/*
 	 * XXX
 	 * We want to switch threads as often as possible.  Sleep every time
 	 * we get a new page to make it more likely.
 	 */
-	if (__sleep_on_every_page_get &&
+	if (DB_GLOBAL(db_pageyield) &&
 	    (__db_yield == NULL || __db_yield() != 0))
 		__db_sleep(0, 1);
 #endif
 
-	mp = dbmp->mp;
-	mfp = dbmfp->mfp;
+	/* Initialize remaining local variables. */
 	mf_offset = R_OFFSET(dbmp, mfp);
-	addr = NULL;
 	bhp = NULL;
-	b_incr = b_inserted = ret = 0;
+	st_hsearch = 0;
+	b_incr = ret = 0;
+
+	/* Determine the hash bucket where this page will live. */
+	bucket = BUCKET(mp, mf_offset, *pgnoaddr);
 
 	LOCKREGION(dbmp);
 
 	/*
-	 * If mmap'ing the file, just return a pointer.  However, if another
-	 * process has opened the file for writing since we mmap'd it, start
-	 * playing the game by their rules, i.e. everything goes through the
-	 * cache.  All pages previously returned should be safe, as long as
-	 * a locking protocol was observed.
+	 * Check for the last or last + 1 page requests.
+	 *
+	 * Examine and update the file's last_pgno value.  We don't care if
+	 * the last_pgno value immediately changes due to another thread --
+	 * at this instant in time, the value is correct.  We do increment the
+	 * current last_pgno value if the thread is asking for a new page,
+	 * however, to ensure that two threads creating pages don't get the
+	 * same one.
+	 */
+	if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) {
+		if (LF_ISSET(DB_MPOOL_NEW))
+			++mfp->last_pgno;
+		*pgnoaddr = mfp->last_pgno;
+		bucket = BUCKET(mp, mf_offset, mfp->last_pgno);
+
+		if (LF_ISSET(DB_MPOOL_NEW))
+			goto alloc;
+	}
+
+	/*
+	 * If mmap'ing the file and the page is not past the end of the file,
+	 * just return a pointer.
+	 *
+	 * The page may be past the end of the file, so check the page number
+	 * argument against the original length of the file.  If we previously
+	 * returned pages past the original end of the file, last_pgno will
+	 * have been updated to match the "new" end of the file, and checking
+	 * against it would return pointers past the end of the mmap'd region.
+	 *
+	 * If another process has opened the file for writing since we mmap'd
+	 * it, we will start playing the game by their rules, i.e. everything
+	 * goes through the cache.  All pages previously returned will be safe,
+	 * as long as the correct locking protocol was observed.
 	 *
 	 * XXX
 	 * We don't discard the map because we don't know when all of the
@@ -112,203 +139,180 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 	 * It would be possible to do so by reference counting the open
 	 * pages from the mmap, but it's unclear to me that it's worth it.
 	 */
-	if (dbmfp->addr != NULL && F_ISSET(dbmfp->mfp, MP_CAN_MMAP)) {
-		readonly_alloc = 0;
-		if (LF_ISSET(DB_MPOOL_LAST))
-			*pgnoaddr = mfp->last_pgno;
-		else {
+	if (dbmfp->addr != NULL && F_ISSET(mfp, MP_CAN_MMAP))
+		if (*pgnoaddr > mfp->orig_last_pgno) {
 			/*
 			 * !!!
-			 * Allocate a page that can never really exist.  See
-			 * the comment above about non-existent pages and the
-			 * hash access method.
+			 * See the comment above about non-existent pages and
+			 * the hash access method.
 			 */
-			if (LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))
-				readonly_alloc = 1;
-			else if (*pgnoaddr > mfp->last_pgno) {
+			if (!LF_ISSET(DB_MPOOL_CREATE)) {
 				__db_err(dbmp->dbenv,
 				    "%s: page %lu doesn't exist",
 				    __memp_fn(dbmfp), (u_long)*pgnoaddr);
 				ret = EINVAL;
 				goto err;
 			}
-		}
-		if (!readonly_alloc) {
-			addr = R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
-
+		} else {
+			*(void **)addrp =
+			    R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
 			++mp->stat.st_map;
 			++mfp->stat.st_map;
+			goto done;
+		}
 
-			goto mapret;
+	/* Search the hash chain for the page. */
+	for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
+	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
+		++st_hsearch;
+		if (bhp->pgno != *pgnoaddr || bhp->mf_offset != mf_offset)
+			continue;
+
+		/* Increment the reference count. */
+		if (bhp->ref == UINT16_T_MAX) {
+			__db_err(dbmp->dbenv,
+			    "%s: page %lu: reference count overflow",
+			    __memp_fn(dbmfp), (u_long)bhp->pgno);
+			ret = EINVAL;
+			goto err;
 		}
-	}
 
-	/* Check if requesting the last page or a new page. */
-	if (LF_ISSET(DB_MPOOL_LAST))
-		*pgnoaddr = mfp->last_pgno;
+		/*
+		 * Increment the reference count.  We may discard the region
+		 * lock as we evaluate and/or read the buffer, so we need to
+		 * ensure that it doesn't move and that its contents remain
+		 * unchanged.
+		 */
+		++bhp->ref;
+		b_incr = 1;
 
-	if (LF_ISSET(DB_MPOOL_NEW)) {
-		*pgnoaddr = mfp->last_pgno + 1;
-		goto alloc;
-	}
+		/*
+	 	 * Any buffer we find might be trouble.
+		 *
+		 * BH_LOCKED --
+		 * I/O is in progress.  Because we've incremented the buffer
+		 * reference count, we know the buffer can't move.  Unlock
+		 * the region lock, wait for the I/O to complete, and reacquire
+		 * the region.
+		 */
+		for (first = 1; F_ISSET(bhp, BH_LOCKED); first = 0) {
+			UNLOCKREGION(dbmp);
 
-	/* Check the BH hash bucket queue. */
-	bucket = BUCKET(mp, mf_offset, *pgnoaddr);
-	for (cnt = 0,
-	    bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
-	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
-		++cnt;
-		if (bhp->pgno == *pgnoaddr && bhp->mf_offset == mf_offset) {
-			addr = bhp->buf;
-			++mp->stat.st_hash_searches;
-			if (cnt > mp->stat.st_hash_longest)
-				mp->stat.st_hash_longest = cnt;
-			mp->stat.st_hash_examined += cnt;
-			goto found;
+			/*
+			 * Explicitly yield the processor if it's not the first
+			 * pass through this loop -- if we don't, we might end
+			 * up running to the end of our CPU quantum as we will
+			 * simply be swapping between the two locks.
+			 */
+			if (!first && (__db_yield == NULL || __db_yield() != 0))
+				__db_sleep(0, 1);
+
+			LOCKBUFFER(dbmp, bhp);
+			/* Wait for I/O to finish... */
+			UNLOCKBUFFER(dbmp, bhp);
+			LOCKREGION(dbmp);
 		}
-	}
-	if (cnt != 0) {
-		++mp->stat.st_hash_searches;
-		if (cnt > mp->stat.st_hash_longest)
-			mp->stat.st_hash_longest = cnt;
-		mp->stat.st_hash_examined += cnt;
+
+		/*
+		 * BH_TRASH --
+		 * The contents of the buffer are garbage.  Shouldn't happen,
+		 * and this read is likely to fail, but might as well try.
+		 */
+		if (F_ISSET(bhp, BH_TRASH))
+			goto reread;
+
+		/*
+		 * BH_CALLPGIN --
+		 * The buffer was converted so it could be written, and the
+		 * contents need to be converted again.
+		 */
+		if (F_ISSET(bhp, BH_CALLPGIN)) {
+			if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
+				goto err;
+			F_CLR(bhp, BH_CALLPGIN);
+		}
+
+		++mp->stat.st_cache_hit;
+		++mfp->stat.st_cache_hit;
+		*(void **)addrp = bhp->buf;
+		goto done;
 	}
 
-alloc:	/*
-	 * Allocate a new buffer header and data space, and mark the contents
-	 * as useless.
-	 */
+alloc:	/* Allocate new buffer header and data space. */
 	if ((ret = __memp_ralloc(dbmp, sizeof(BH) -
 	    sizeof(u_int8_t) + mfp->stat.st_pagesize, NULL, &bhp)) != 0)
 		goto err;
-	addr = bhp->buf;
-#ifdef DEBUG
-	if ((ALIGNTYPE)addr & (sizeof(size_t) - 1)) {
+
+#ifdef DIAGNOSTIC
+	if ((ALIGNTYPE)bhp->buf & (sizeof(size_t) - 1)) {
 		__db_err(dbmp->dbenv,
 		    "Internal error: BH data NOT size_t aligned.");
-		abort();
+		ret = EINVAL;
+		goto err;
 	}
 #endif
+	/* Initialize the BH fields. */
 	memset(bhp, 0, sizeof(BH));
 	LOCKINIT(dbmp, &bhp->mutex);
+	bhp->ref = 1;
+	bhp->pgno = *pgnoaddr;
+	bhp->mf_offset = mf_offset;
 
 	/*
 	 * Prepend the bucket header to the head of the appropriate MPOOL
 	 * bucket hash list.  Append the bucket header to the tail of the
 	 * MPOOL LRU chain.
-	 *
-	 * We have to do this before we read in the page so we can discard
-	 * our region lock without screwing up the world.
 	 */
-	bucket = BUCKET(mp, mf_offset, *pgnoaddr);
 	SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, hq, __bh);
 	SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
-	++mp->stat.st_page_clean;
-	b_inserted = 1;
-
-	/* Set the page number, and associated MPOOLFILE. */
-	bhp->mf_offset = mf_offset;
-	bhp->pgno = *pgnoaddr;
 
 	/*
-	 * If we know we created the page, zero it out and continue.
+	 * If we created the page, zero it out and continue.
 	 *
 	 * !!!
-	 * Note: DB_MPOOL_NEW deliberately doesn't call the pgin function.
+	 * Note: DB_MPOOL_NEW specifically doesn't call the pgin function.
 	 * If DB_MPOOL_CREATE is used, then the application's pgin function
 	 * has to be able to handle pages of 0's -- if it uses DB_MPOOL_NEW,
 	 * it can detect all of its page creates, and not bother.
 	 *
 	 * Otherwise, read the page into memory, optionally creating it if
 	 * DB_MPOOL_CREATE is set.
-	 *
-	 * Increment the reference count for created buffers, but importantly,
-	 * increment the reference count for buffers we're about to read so
-	 * that the buffer can't move.
 	 */
-	++bhp->ref;
-	b_incr = 1;
+	if (LF_ISSET(DB_MPOOL_NEW)) {
+		if (mfp->clear_len == 0)
+			memset(bhp->buf, 0, mfp->stat.st_pagesize);
+		else {
+			memset(bhp->buf, 0, mfp->clear_len);
+#ifdef DIAGNOSTIC
+			memset(bhp->buf + mfp->clear_len, 0xff,
+			    mfp->stat.st_pagesize - mfp->clear_len);
+#endif
+		}
 
-	if (LF_ISSET(DB_MPOOL_NEW))
-		memset(addr, 0, mfp->stat.st_pagesize);
-	else {
+		++mp->stat.st_page_create;
+		++mfp->stat.st_page_create;
+	} else {
 		/*
 		 * It's possible for the read function to fail, which means
-		 * that we fail as well.
+		 * that we fail as well.  Note, the __memp_pgread() function
+		 * discards the region lock, so the buffer must be pinned
+		 * down so that it cannot move and its contents are unchanged.
 		 */
 reread:		if ((ret = __memp_pgread(dbmfp,
-		    bhp, LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))) != 0)
-			goto err;
-
-		/*
-		 * !!!
-		 * The __memp_pgread call discarded and reacquired the region
-		 * lock.  Because the buffer reference count was incremented
-		 * before the region lock was discarded the buffer can't move
-		 * and its contents can't change.
-		 */
-		++mp->stat.st_cache_miss;
-		++mfp->stat.st_cache_miss;
-	}
-
-	if (0) {
-found:		/* Increment the reference count. */
-		if (bhp->ref == UINT16_T_MAX) {
-			__db_err(dbmp->dbenv,
-			    "%s: too many references to page %lu",
-			    __memp_fn(dbmfp), bhp->pgno);
-			ret = EINVAL;
-			goto err;
-		}
-		++bhp->ref;
-		b_incr = 1;
-
-		/*
-	 	 * Any found buffer might be trouble.
-		 *
-		 * BH_LOCKED --
-		 * I/O in progress, wait for it to finish.  Because the buffer
-		 * reference count was incremented before the region lock was
-		 * discarded we know the buffer can't move and its contents
-		 * can't change.
-		 */
-		for (cnt = 0; F_ISSET(bhp, BH_LOCKED); ++cnt) {
-			UNLOCKREGION(dbmp);
-
+		    bhp, LF_ISSET(DB_MPOOL_CREATE))) != 0) {
 			/*
-			 * Sleep so that we don't simply spin, switching locks.
-			 * (See the comment in include/mp.h.)
+			 * !!!
+			 * Discard the buffer unless another thread is waiting
+			 * on our I/O to complete.  Regardless, the header has
+			 * the BH_TRASH flag set.
 			 */
-			if (cnt != 0 &&
-			    (__db_yield == NULL || __db_yield() != 0))
-				__db_sleep(0, 1);
-
-			LOCKBUFFER(dbmp, bhp);
-			/* Waiting for I/O to finish... */
-			UNLOCKBUFFER(dbmp, bhp);
-			LOCKREGION(dbmp);
-		}
-
-		/*
-		 * BH_TRASH --
-		 * The buffer is garbage.
-		 */
-		if (F_ISSET(bhp, BH_TRASH))
-			goto reread;
-
-		/*
-		 * BH_CALLPGIN --
-		 * The buffer was written, and the contents need to be
-		 * converted again.
-		 */
-		if (F_ISSET(bhp, BH_CALLPGIN)) {
-			if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
-				goto err;
-			F_CLR(bhp, BH_CALLPGIN);
+			if (bhp->ref == 1)
+				__memp_bhfree(dbmp, mfp, bhp, 1);
+			goto err;
 		}
 
-		++mp->stat.st_cache_hit;
-		++mfp->stat.st_cache_hit;
+		++mp->stat.st_cache_miss;
+		++mfp->stat.st_cache_miss;
 	}
 
 	/*
@@ -319,23 +323,30 @@ found:		/* Increment the reference count. */
 	if (bhp->pgno > mfp->last_pgno)
 		mfp->last_pgno = bhp->pgno;
 
-mapret:	LOCKHANDLE(dbmp, dbmfp->mutexp);
+	++mp->stat.st_page_clean;
+	*(void **)addrp = bhp->buf;
+
+done:	/* Update the chain search statistics. */
+	if (st_hsearch) {
+		++mp->stat.st_hash_searches;
+		if (st_hsearch > mp->stat.st_hash_longest)
+			mp->stat.st_hash_longest = st_hsearch;
+		mp->stat.st_hash_examined += st_hsearch;
+	}
+
+	UNLOCKREGION(dbmp);
+
+	LOCKHANDLE(dbmp, dbmfp->mutexp);
 	++dbmfp->pinref;
 	UNLOCKHANDLE(dbmp, dbmfp->mutexp);
 
-	if (0) {
-err:		/*
-		 * If no other process is already waiting on a created buffer,
-		 * go ahead and discard it, it's not useful.
-		 */
-		if (b_incr)
-			--bhp->ref;
-		if (b_inserted && bhp->ref == 0)
-			__memp_bhfree(dbmp, mfp, bhp, 1);
-	}
+	return (0);
 
+err:	/* Discard our reference. */
+	if (b_incr)
+		--bhp->ref;
 	UNLOCKREGION(dbmp);
 
-	*(void **)addrp = addr;
+	*(void **)addrp = NULL;
 	return (ret);
 }
diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c
index 0f41122373..a4cbac8d4e 100644
--- a/db2/mp/mp_fopen.c
+++ b/db2/mp/mp_fopen.c
@@ -1,24 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fopen.c	10.37 (Sleepycat) 1/18/98";
+static const char sccsid[] = "@(#)mp_fopen.c	10.47 (Sleepycat) 5/4/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
@@ -28,22 +24,21 @@ static const char sccsid[] = "@(#)mp_fopen.c	10.37 (Sleepycat) 1/18/98";
 #include "common_ext.h"
 
 static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
-static int __memp_mf_open __P((DB_MPOOL *, const char *,
-    int, size_t, db_pgno_t, int, DBT *, u_int8_t *, MPOOLFILE **));
+static int __memp_mf_open __P((DB_MPOOL *,
+    const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **));
 
 /*
  * memp_fopen --
  *	Open a backing file for the memory pool.
  */
 int
-memp_fopen(dbmp, path, ftype,
-    flags, mode, pagesize, lsn_offset, pgcookie, fileid, retp)
+memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp)
 	DB_MPOOL *dbmp;
 	const char *path;
-	int ftype, flags, mode, lsn_offset;
+	u_int32_t flags;
+	int mode;
 	size_t pagesize;
-	DBT *pgcookie;
-	u_int8_t *fileid;
+	DB_MPOOL_FINFO *finfop;
 	DB_MPOOLFILE **retp;
 {
 	int ret;
@@ -59,31 +54,31 @@ memp_fopen(dbmp, path, ftype,
 		return (EINVAL);
 	}
 
-	return (__memp_fopen(dbmp, NULL, path, ftype,
-	    flags, mode, pagesize, lsn_offset, pgcookie, fileid, 1, retp));
+	return (__memp_fopen(dbmp,
+	    NULL, path, flags, mode, pagesize, 1, finfop, retp));
 }
 
 /*
  * __memp_fopen --
  *	Open a backing file for the memory pool; internal version.
  *
- * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, int,
- * PUBLIC:    int, int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
+ * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *,
+ * PUBLIC:    u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
  */
 int
-__memp_fopen(dbmp, mfp, path,
-    ftype, flags, mode, pagesize, lsn_offset, pgcookie, fileid, needlock, retp)
+__memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp)
 	DB_MPOOL *dbmp;
 	MPOOLFILE *mfp;
 	const char *path;
-	int ftype, flags, mode, lsn_offset, needlock;
+	u_int32_t flags;
+	int mode, needlock;
 	size_t pagesize;
-	DBT *pgcookie;
-	u_int8_t *fileid;
+	DB_MPOOL_FINFO *finfop;
 	DB_MPOOLFILE **retp;
 {
 	DB_ENV *dbenv;
 	DB_MPOOLFILE *dbmfp;
+	DB_MPOOL_FINFO finfo;
 	db_pgno_t last_pgno;
 	size_t size;
 	u_int32_t mbytes, bytes;
@@ -91,18 +86,34 @@ __memp_fopen(dbmp, mfp, path,
 	u_int8_t idbuf[DB_FILE_ID_LEN];
 	char *rpath;
 
-	/*
-	 * XXX
-	 * If mfp is provided, the following arguments do NOT need to be
-	 * specified:
-	 *      lsn_offset
-	 *      pgcookie
-	 *      fileid
-	 */
 	dbenv = dbmp->dbenv;
 	ret = 0;
 	rpath = NULL;
 
+	/*
+	 * If mfp is provided, we take the DB_MPOOL_FINFO information from
+	 * the mfp.  We don't bother initializing everything, because some
+	 * of them are expensive to acquire.  If no mfp is provided and the
+	 * finfop argument is NULL, we default the values.
+	 */
+	if (finfop == NULL) {
+		memset(&finfo, 0, sizeof(finfo));
+		if (mfp != NULL) {
+			finfo.ftype = mfp->ftype;
+			finfo.pgcookie = NULL;
+			finfo.fileid = NULL;
+			finfo.lsn_offset = mfp->lsn_off;
+			finfo.clear_len = mfp->clear_len;
+		} else {
+			finfo.ftype = 0;
+			finfo.pgcookie = NULL;
+			finfo.fileid = NULL;
+			finfo.lsn_offset = -1;
+			finfo.clear_len = 0;
+		}
+		finfop = &finfo;
+	}
+
 	/* Allocate and initialize the per-process structure. */
 	if ((dbmfp =
 	    (DB_MPOOLFILE *)__db_calloc(1, sizeof(DB_MPOOLFILE))) == NULL) {
@@ -126,11 +137,11 @@ __memp_fopen(dbmp, mfp, path,
 	} else {
 		/* Get the real name for this file and open it. */
 		if ((ret = __db_appname(dbenv,
-		    DB_APP_DATA, NULL, path, NULL, &rpath)) != 0)
+		    DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0)
 			goto err;
 		if ((ret = __db_open(rpath,
-		    LF_ISSET(DB_CREATE | DB_RDONLY), DB_CREATE | DB_RDONLY,
-		    mode, &dbmfp->fd)) != 0) {
+		   LF_ISSET(DB_CREATE | DB_RDONLY),
+		   DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) {
 			__db_err(dbenv, "%s: %s", rpath, strerror(ret));
 			goto err;
 		}
@@ -156,12 +167,11 @@ __memp_fopen(dbmp, mfp, path,
 		 * don't use timestamps, otherwise there'd be no chance of any
 		 * other process joining the party.
 		 */
-		if (mfp == NULL && fileid == NULL) {
+		if (finfop->fileid == NULL) {
 			if ((ret = __db_fileid(dbenv, rpath, 0, idbuf)) != 0)
 				goto err;
-			fileid = idbuf;
+			finfop->fileid = idbuf;
 		}
-		FREES(rpath);
 	}
 
 	/*
@@ -173,8 +183,8 @@ __memp_fopen(dbmp, mfp, path,
 		LOCKREGION(dbmp);
 
 	if (mfp == NULL)
-		ret = __memp_mf_open(dbmp, path, ftype,
-		    pagesize, last_pgno, lsn_offset, pgcookie, fileid, &mfp);
+		ret = __memp_mf_open(dbmp,
+		    path, pagesize, last_pgno, finfop, &mfp);
 	else {
 		++mfp->ref;
 		ret = 0;
@@ -218,7 +228,7 @@ __memp_fopen(dbmp, mfp, path,
 			F_CLR(mfp, MP_CAN_MMAP);
 		if (path == NULL)
 			F_CLR(mfp, MP_CAN_MMAP);
-		if (ftype != 0)
+		if (finfop->ftype != 0)
 			F_CLR(mfp, MP_CAN_MMAP);
 		if (LF_ISSET(DB_NOMMAP))
 			F_CLR(mfp, MP_CAN_MMAP);
@@ -229,11 +239,14 @@ __memp_fopen(dbmp, mfp, path,
 	dbmfp->addr = NULL;
 	if (F_ISSET(mfp, MP_CAN_MMAP)) {
 		dbmfp->len = size;
-		if (__db_map(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) {
+		if (__db_mapfile(rpath,
+		    dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) {
 			dbmfp->addr = NULL;
 			F_CLR(mfp, MP_CAN_MMAP);
 		}
 	}
+	if (rpath != NULL)
+		FREES(rpath);
 
 	LOCKHANDLE(dbmp, dbmp->mutexp);
 	TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q);
@@ -260,15 +273,12 @@ err:	/*
  *	Open an MPOOLFILE.
  */
 static int
-__memp_mf_open(dbmp, path,
-    ftype, pagesize, last_pgno, lsn_offset, pgcookie, fileid, retp)
+__memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp)
 	DB_MPOOL *dbmp;
 	const char *path;
-	int ftype, lsn_offset;
 	size_t pagesize;
 	db_pgno_t last_pgno;
-	DBT *pgcookie;
-	u_int8_t *fileid;
+	DB_MPOOL_FINFO *finfop;
 	MPOOLFILE **retp;
 {
 	MPOOLFILE *mfp;
@@ -286,12 +296,13 @@ __memp_mf_open(dbmp, path,
 		    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
 			if (F_ISSET(mfp, MP_TEMP))
 				continue;
-			if (!memcmp(fileid,
+			if (!memcmp(finfop->fileid,
 			    R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
-				if (ftype != mfp->ftype ||
+				if (finfop->clear_len != mfp->clear_len ||
+				    finfop->ftype != mfp->ftype ||
 				    pagesize != mfp->stat.st_pagesize) {
 					__db_err(dbmp->dbenv,
-					    "%s: ftype or pagesize changed",
+			    "%s: ftype, clear length or pagesize changed",
 					    path);
 					return (EINVAL);
 				}
@@ -311,8 +322,9 @@ __memp_mf_open(dbmp, path,
 	/* Initialize the structure. */
 	memset(mfp, 0, sizeof(MPOOLFILE));
 	mfp->ref = 1;
-	mfp->ftype = ftype;
-	mfp->lsn_off = lsn_offset;
+	mfp->ftype = finfop->ftype;
+	mfp->lsn_off = finfop->lsn_offset;
+	mfp->clear_len = finfop->clear_len;
 
 	/*
 	 * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget,
@@ -320,7 +332,7 @@ __memp_mf_open(dbmp, path,
 	 * it away.
 	 */
 	mfp->stat.st_pagesize = pagesize;
-	mfp->last_pgno = last_pgno;
+	mfp->orig_last_pgno = mfp->last_pgno = last_pgno;
 
 	F_SET(mfp, MP_CAN_MMAP);
 	if (ISTEMPORARY)
@@ -336,19 +348,19 @@ __memp_mf_open(dbmp, path,
 		if ((ret = __memp_ralloc(dbmp,
 		    DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
 			goto err;
-		memcpy(p, fileid, DB_FILE_ID_LEN);
+		memcpy(p, finfop->fileid, DB_FILE_ID_LEN);
 	}
 
 	/* Copy the page cookie into shared memory. */
-	if (pgcookie == NULL || pgcookie->size == 0) {
+	if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) {
 		mfp->pgcookie_len = 0;
 		mfp->pgcookie_off = 0;
 	} else {
 		if ((ret = __memp_ralloc(dbmp,
-		    pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
+		    finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
 			goto err;
-		memcpy(p, pgcookie->data, pgcookie->size);
-		mfp->pgcookie_len = pgcookie->size;
+		memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size);
+		mfp->pgcookie_len = finfop->pgcookie->size;
 	}
 
 	/* Prepend the MPOOLFILE to the list of MPOOLFILE's. */
@@ -397,7 +409,7 @@ memp_fclose(dbmfp)
 
 	/* Discard any mmap information. */
 	if (dbmfp->addr != NULL &&
-	    (ret = __db_unmap(dbmfp->addr, dbmfp->len)) != 0)
+	    (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0)
 		__db_err(dbmp->dbenv,
 		    "%s: %s", __memp_fn(dbmfp), strerror(ret));
 
@@ -480,13 +492,13 @@ __memp_mf_close(dbmp, dbmfp)
 	SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);
 
 	/* Free the space. */
-	__db_shalloc_free(dbmp->addr, mfp);
 	if (mfp->path_off != 0)
 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
 	if (mfp->fileid_off != 0)
 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));
 	if (mfp->pgcookie_off != 0)
 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off));
+	__db_shalloc_free(dbmp->addr, mfp);
 
 ret1:	UNLOCKREGION(dbmp);
 	return (0);
diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c
index 335ee9ff16..5675493137 100644
--- a/db2/mp/mp_fput.c
+++ b/db2/mp/mp_fput.c
@@ -1,20 +1,19 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fput.c	10.17 (Sleepycat) 12/20/97";
+static const char sccsid[] = "@(#)mp_fput.c	10.22 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdlib.h>
 #endif
 
 #include "db_int.h"
@@ -31,12 +30,11 @@ int
 memp_fput(dbmfp, pgaddr, flags)
 	DB_MPOOLFILE *dbmfp;
 	void *pgaddr;
-	int flags;
+	u_int32_t flags;
 {
 	BH *bhp;
 	DB_MPOOL *dbmp;
 	MPOOL *mp;
-	MPOOLFILE *mfp;
 	int wrote, ret;
 
 	dbmp = dbmfp->dbmp;
@@ -71,8 +69,9 @@ memp_fput(dbmfp, pgaddr, flags)
 
 	/*
 	 * If we're mapping the file, there's nothing to do.  Because we can
-	 * quit mapping at any time, we have to check on each buffer to see
-	 * if it's in the map region.
+	 * stop mapping the file at any time, we have to check on each buffer
+	 * to see if the address we gave the application was part of the map
+	 * region.
 	 */
 	if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr &&
 	    (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len)
@@ -98,36 +97,33 @@ memp_fput(dbmfp, pgaddr, flags)
 		F_SET(bhp, BH_DISCARD);
 
 	/*
-	 * If more than one reference to the page, we're done.  Ignore discard
-	 * flags (for now) and leave it at its position in the LRU chain.  The
-	 * rest gets done at last reference close.
+	 * Check for a reference count going to zero.  This can happen if the
+	 * application returns a page twice.
 	 */
-#ifdef DEBUG
 	if (bhp->ref == 0) {
-		__db_err(dbmp->dbenv,
-    "Unpinned page returned: reference count on page %lu went negative.",
-		    (u_long)bhp->pgno);
-		abort();
+		__db_err(dbmp->dbenv, "%s: page %lu: unpinned page returned",
+		    __memp_fn(dbmfp), (u_long)bhp->pgno);
+		UNLOCKREGION(dbmp);
+		return (EINVAL);
 	}
-#endif
+
+	/*
+	 * If more than one reference to the page, we're done.  Ignore the
+	 * discard flags (for now) and leave it at its position in the LRU
+	 * chain.  The rest gets done at last reference close.
+	 */
 	if (--bhp->ref > 0) {
 		UNLOCKREGION(dbmp);
 		return (0);
 	}
 
-	/* Move the buffer to the head/tail of the LRU chain. */
-	SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh);
-	if (F_ISSET(bhp, BH_DISCARD))
-		SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh);
-	else
-		SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
-
 	/*
-	 * If this buffer is scheduled for writing because of a checkpoint,
-	 * write it now.  If we can't write it, set a flag so that the next
-	 * time the memp_sync function is called we try writing it there,
-	 * as the checkpoint application better be able to write all of the
-	 * files.
+	 * If this buffer is scheduled for writing because of a checkpoint, we
+	 * need to write it (if we marked it dirty), or update the checkpoint
+	 * counters (if we didn't mark it dirty).  If we try to write it and
+	 * can't, that's not necessarily an error, but set a flag so that the
+	 * next time the memp_sync function runs we try writing it there, as
+	 * the checkpoint application better be able to write all of the files.
 	 */
 	if (F_ISSET(bhp, BH_WRITE))
 		if (F_ISSET(bhp, BH_DIRTY)) {
@@ -137,12 +133,18 @@ memp_fput(dbmfp, pgaddr, flags)
 		} else {
 			F_CLR(bhp, BH_WRITE);
 
-			mfp = R_ADDR(dbmp, bhp->mf_offset);
-			--mfp->lsn_cnt;
-
+			--dbmfp->mfp->lsn_cnt;
 			--mp->lsn_cnt;
 		}
 
+	/* Move the buffer to the head/tail of the LRU chain. */
+	SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh);
+	if (F_ISSET(bhp, BH_DISCARD))
+		SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh);
+	else
+		SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
+
+
 	UNLOCKREGION(dbmp);
 	return (0);
 }
diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c
index 2eff7dd74c..3b352aa553 100644
--- a/db2/mp/mp_fset.c
+++ b/db2/mp/mp_fset.c
@@ -1,13 +1,13 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fset.c	10.12 (Sleepycat) 11/26/97";
+static const char sccsid[] = "@(#)mp_fset.c	10.15 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -30,16 +30,14 @@ int
 memp_fset(dbmfp, pgaddr, flags)
 	DB_MPOOLFILE *dbmfp;
 	void *pgaddr;
-	int flags;
+	u_int32_t flags;
 {
 	BH *bhp;
 	DB_MPOOL *dbmp;
 	MPOOL *mp;
-	MPOOLFILE *mfp;
 	int ret;
 
 	dbmp = dbmfp->dbmp;
-	mfp = dbmfp->mfp;
 	mp = dbmp->mp;
 
 	/* Validate arguments. */
diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c
index ca81f8d6d6..fc985bc521 100644
--- a/db2/mp/mp_open.c
+++ b/db2/mp/mp_open.c
@@ -1,23 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_open.c	10.16 (Sleepycat) 11/28/97";
+static const char sccsid[] = "@(#)mp_open.c	10.23 (Sleepycat) 5/3/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <fcntl.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
@@ -33,13 +30,14 @@ static const char sccsid[] = "@(#)mp_open.c	10.16 (Sleepycat) 11/28/97";
 int
 memp_open(path, flags, mode, dbenv, retp)
 	const char *path;
-	int flags, mode;
+	u_int32_t flags;
+	int mode;
 	DB_ENV *dbenv;
 	DB_MPOOL **retp;
 {
 	DB_MPOOL *dbmp;
 	size_t cachesize;
-	int ret;
+	int is_private, ret;
 
 	/* Validate arguments. */
 #ifdef HAVE_SPINLOCKS
@@ -62,15 +60,16 @@ memp_open(path, flags, mode, dbenv, retp)
 	dbmp->dbenv = dbenv;
 
 	/* Decide if it's possible for anyone else to access the pool. */
-	if ((dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE))
-		F_SET(dbmp, MP_ISPRIVATE);
+	is_private =
+	    (dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE);
 
 	/*
 	 * Map in the region.  We do locking regardless, as portions of it are
 	 * implemented in common code (if we put the region in a file, that is).
 	 */
 	F_SET(dbmp, MP_LOCKREGION);
-	if ((ret = __memp_ropen(dbmp, path, cachesize, mode, flags)) != 0)
+	if ((ret = __memp_ropen(dbmp,
+	    path, cachesize, mode, is_private, LF_ISSET(DB_CREATE))) != 0)
 		goto err;
 	F_CLR(dbmp, MP_LOCKREGION);
 
@@ -79,7 +78,7 @@ memp_open(path, flags, mode, dbenv, retp)
 	 * If it's threaded, then we have to lock both the handles and the
 	 * region, and we need to allocate a mutex for that purpose.
 	 */
-	if (!F_ISSET(dbmp, MP_ISPRIVATE))
+	if (!is_private)
 		F_SET(dbmp, MP_LOCKREGION);
 	if (LF_ISSET(DB_THREAD)) {
 		F_SET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION);
@@ -135,10 +134,11 @@ memp_close(dbmp)
 	}
 
 	/* Close the region. */
-	if ((t_ret = __memp_rclose(dbmp)) && ret == 0)
+	if ((t_ret = __db_rdetach(&dbmp->reginfo)) != 0 && ret == 0)
 		ret = t_ret;
 
-	/* Discard the structure. */
+	if (dbmp->reginfo.path != NULL)
+		FREES(dbmp->reginfo.path);
 	FREE(dbmp, sizeof(DB_MPOOL));
 
 	return (ret);
@@ -154,8 +154,19 @@ memp_unlink(path, force, dbenv)
 	int force;
 	DB_ENV *dbenv;
 {
-	return (__db_runlink(dbenv,
-	    DB_APP_NONE, path, DB_DEFAULT_MPOOL_FILE, force));
+	REGINFO reginfo;
+	int ret;
+
+	memset(&reginfo, 0, sizeof(reginfo));
+	reginfo.dbenv = dbenv;
+	reginfo.appname = DB_APP_NONE;
+	if (path != NULL && (reginfo.path = __db_strdup(path)) == NULL)
+		return (ENOMEM);
+	reginfo.file = DB_DEFAULT_MPOOL_FILE;
+	ret = __db_runlink(&reginfo, force);
+	if (reginfo.path != NULL)
+		FREES(reginfo.path);
+	return (ret);
 }
 
 /*
diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c
index 13a6c62d35..e83e0f44fa 100644
--- a/db2/mp/mp_pr.c
+++ b/db2/mp/mp_pr.c
@@ -1,13 +1,13 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_pr.c	10.21 (Sleepycat) 1/6/98";
+static const char sccsid[] = "@(#)mp_pr.c	10.26 (Sleepycat) 5/23/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -15,20 +15,20 @@ static const char sccsid[] = "@(#)mp_pr.c	10.21 (Sleepycat) 1/6/98";
 
 #include <errno.h>
 #include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #endif
 
 #include "db_int.h"
+#include "db_page.h"
 #include "shqueue.h"
 #include "db_shash.h"
 #include "mp.h"
+#include "db_auto.h"
+#include "db_ext.h"
+#include "common_ext.h"
 
-static void __memp_pbh __P((FILE *, DB_MPOOL *, BH *, int));
-static void __memp_pdbmf __P((FILE *, DB_MPOOLFILE *, int));
-static void __memp_pmf __P((FILE *, MPOOLFILE *, int));
-static void __memp_pmp __P((FILE *, DB_MPOOL *, MPOOL *, int));
+static void __memp_pbh __P((DB_MPOOL *, BH *, size_t *, FILE *));
 
 /*
  * memp_stat --
@@ -64,6 +64,8 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
 		    dbmp->mp->rlayout.lock.mutex_set_wait;
 		(*gspp)->st_region_nowait =
 		    dbmp->mp->rlayout.lock.mutex_set_nowait;
+		(*gspp)->st_refcnt = dbmp->mp->rlayout.refcnt;
+		(*gspp)->st_regsize = dbmp->mp->rlayout.size;
 
 		UNLOCKREGION(dbmp);
 	}
@@ -77,7 +79,8 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
 		for (len = 0,
 		    mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
 		    mfp != NULL;
-		    ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile));
+		    ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
+			;
 
 		UNLOCKREGION(dbmp);
 
@@ -148,174 +151,118 @@ __memp_fns(dbmp, mfp)
 	return ((char *)R_ADDR(dbmp, mfp->path_off));
 }
 
+#define	FMAP_ENTRIES	200			/* Files we map. */
+
+#define	MPOOL_DUMP_HASH	0x01			/* Debug hash chains. */
+#define	MPOOL_DUMP_LRU	0x02			/* Debug LRU chains. */
+#define	MPOOL_DUMP_MEM	0x04			/* Debug region memory. */
+#define	MPOOL_DUMP_ALL	0x07			/* Debug all. */
+
+
 /*
- * __memp_debug --
+ * __memp_dump_region --
  *	Display MPOOL structures.
  *
- * PUBLIC: void __memp_debug __P((DB_MPOOL *, FILE *, int));
+ * PUBLIC: void __memp_dump_region __P((DB_MPOOL *, char *, FILE *));
  */
 void
-__memp_debug(dbmp, fp, data)
+__memp_dump_region(dbmp, area, fp)
 	DB_MPOOL *dbmp;
+	char *area;
 	FILE *fp;
-	int data;
 {
+	BH *bhp;
+	DB_HASHTAB *htabp;
 	DB_MPOOLFILE *dbmfp;
-	u_long cnt;
+	MPOOL *mp;
+	MPOOLFILE *mfp;
+	size_t bucket, fmap[FMAP_ENTRIES + 1];
+	u_int32_t flags;
+	int cnt;
 
 	/* Make it easy to call from the debugger. */
 	if (fp == NULL)
 		fp = stderr;
 
-	/* Welcome message. */
-	(void)fprintf(fp, "%s\nMpool per-process (%lu) statistics\n",
-	    DB_LINE, (u_long)getpid());
-
-	if (data)
-		(void)fprintf(fp, "    fd: %d; addr %lx; maddr %lx\n",
-		    dbmp->fd, (u_long)dbmp->addr, (u_long)dbmp->maddr);
-
-	/* Display the DB_MPOOLFILE structures. */
-	for (cnt = 0, dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
-	    dbmfp != NULL; ++cnt, dbmfp = TAILQ_NEXT(dbmfp, q));
-	(void)fprintf(fp, "%lu process-local files\n", cnt);
-	for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
-	    dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) {
-		(void)fprintf(fp, "%s\n", __memp_fn(dbmfp));
-		__memp_pdbmf(fp, dbmfp, data);
-	}
+	for (flags = 0; *area != '\0'; ++area)
+		switch (*area) {
+		case 'A':
+			LF_SET(MPOOL_DUMP_ALL);
+			break;
+		case 'h':
+			LF_SET(MPOOL_DUMP_HASH);
+			break;
+		case 'l':
+			LF_SET(MPOOL_DUMP_LRU);
+			break;
+		case 'm':
+			LF_SET(MPOOL_DUMP_MEM);
+			break;
+		}
 
-	/* Switch to global statistics. */
-	(void)fprintf(fp, "\n%s\nMpool statistics\n", DB_LINE);
+	LOCKREGION(dbmp);
 
-	/* Display the MPOOL structure. */
-	__memp_pmp(fp, dbmp, dbmp->mp, data);
+	mp = dbmp->mp;
 
-	/* Flush in case we're debugging. */
-	(void)fflush(fp);
-}
-
-/*
- * __memp_pdbmf --
- *	Display a DB_MPOOLFILE structure.
- */
-static void
-__memp_pdbmf(fp, dbmfp, data)
-	FILE *fp;
-	DB_MPOOLFILE *dbmfp;
-	int data;
-{
-	if (!data)
-		return;
-
-	(void)fprintf(fp, "    fd: %d; %s\n",
-	    dbmfp->fd, F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write");
-}
-
-/*
- * __memp_pmp --
- *	Display the MPOOL structure.
- */
-static void
-__memp_pmp(fp, dbmp, mp, data)
-	FILE *fp;
-	DB_MPOOL *dbmp;
-	MPOOL *mp;
-	int data;
-{
-	BH *bhp;
-	MPOOLFILE *mfp;
-	DB_HASHTAB *htabp;
-	size_t bucket;
-	int cnt;
-	const char *sep;
-
-	(void)fprintf(fp, "references: %lu; cachesize: %lu\n",
-	    (u_long)mp->rlayout.refcnt, (u_long)mp->stat.st_cachesize);
-	(void)fprintf(fp,
-	    "    %lu pages created\n", (u_long)mp->stat.st_page_create);
-	(void)fprintf(fp,
-	    "    %lu mmap pages returned\n", (u_long)mp->stat.st_map);
-	(void)fprintf(fp, "    %lu I/O's (%lu read, %lu written)\n",
-	    (u_long)mp->stat.st_page_in + mp->stat.st_page_out,
-	    (u_long)mp->stat.st_page_in, (u_long)mp->stat.st_page_out);
-	if (mp->stat.st_cache_hit + mp->stat.st_cache_miss != 0)
-		(void)fprintf(fp,
-		    "    %.0f%% cache hit rate (%lu hit, %lu miss)\n",
-		    ((double)mp->stat.st_cache_hit /
-	    (mp->stat.st_cache_hit + mp->stat.st_cache_miss)) * 100,
-		    (u_long)mp->stat.st_cache_hit,
-		    (u_long)mp->stat.st_cache_miss);
+	/* Display MPOOL structures. */
+	(void)fprintf(fp, "%s\nPool (region addr 0x%lx, alloc addr 0x%lx)\n",
+	    DB_LINE, (u_long)dbmp->reginfo.addr, (u_long)dbmp->addr);
 
 	/* Display the MPOOLFILE structures. */
-	for (cnt = 0, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
-	    mfp != NULL; ++cnt, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile));
-	(void)fprintf(fp, "%d total files\n", cnt);
-	for (cnt = 1, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
-	    mfp != NULL; ++cnt, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
-		(void)fprintf(fp, "file %d\n", cnt);
-		__memp_pmf(fp, mfp, data);
+	cnt = 0;
+	for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
+	    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile), ++cnt) {
+		(void)fprintf(fp, "file #%d: %s: %lu references: %s\n",
+		    cnt + 1, __memp_fns(dbmp, mfp), (u_long)mfp->ref,
+		    F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write");
+		    if (cnt < FMAP_ENTRIES)
+			fmap[cnt] = R_OFFSET(dbmp, mfp);
 	}
 
-	if (!data)
-		return;
+	for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
+	    dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q), ++cnt) {
+		(void)fprintf(fp, "file #%d: %s: fd: %d: per-process, %s\n",
+		    cnt + 1, __memp_fn(dbmfp), dbmfp->fd,
+		    F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write");
+		    if (cnt < FMAP_ENTRIES)
+			fmap[cnt] = R_OFFSET(dbmp, mfp);
+	}
+	if (cnt < FMAP_ENTRIES)
+		fmap[cnt] = INVALID;
+	else
+		fmap[FMAP_ENTRIES] = INVALID;
 
 	/* Display the hash table list of BH's. */
-	(void)fprintf(fp, "%s\nHASH table of BH's (%lu buckets):\n",
-	    DB_LINE, (u_long)mp->htab_buckets);
-	(void)fprintf(fp,
-	    "longest chain searched %lu\n", (u_long)mp->stat.st_hash_longest);
-	(void)fprintf(fp, "average chain searched %lu (total/calls: %lu/%lu)\n",
-	    (u_long)mp->stat.st_hash_examined /
-	    (mp->stat.st_hash_searches ? mp->stat.st_hash_searches : 1),
-	    (u_long)mp->stat.st_hash_examined,
-	    (u_long)mp->stat.st_hash_searches);
-	for (htabp = dbmp->htab,
-	    bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) {
-		if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL)
-			(void)fprintf(fp, "%lu:\n", (u_long)bucket);
-		for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
-		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
-			__memp_pbh(fp, dbmp, bhp, data);
+	if (LF_ISSET(MPOOL_DUMP_HASH)) {
+		(void)fprintf(fp,
+	    "%s\nBH hash table (%lu hash slots)\npageno, file, ref, address\n",
+		    DB_LINE, (u_long)mp->htab_buckets);
+		for (htabp = dbmp->htab,
+		    bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) {
+			if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL)
+				(void)fprintf(fp, "%lu:\n", (u_long)bucket);
+			for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
+			    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
+				__memp_pbh(dbmp, bhp, fmap, fp);
+		}
 	}
 
 	/* Display the LRU list of BH's. */
-	(void)fprintf(fp, "LRU list of BH's (pgno/offset):");
-	for (sep = "\n    ", bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh);
-	    bhp != NULL; sep = ", ", bhp = SH_TAILQ_NEXT(bhp, q, __bh))
-		(void)fprintf(fp, "%s%lu/%lu", sep,
-		    (u_long)bhp->pgno, (u_long)R_OFFSET(dbmp, bhp));
-	(void)fprintf(fp, "\n");
-}
+	if (LF_ISSET(MPOOL_DUMP_LRU)) {
+		(void)fprintf(fp, "%s\nBH LRU list\n", DB_LINE);
+		(void)fprintf(fp, "pageno, file, ref, address\n");
+		for (bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh);
+		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
+			__memp_pbh(dbmp, bhp, fmap, fp);
+	}
 
-/*
- * __memp_pmf --
- *	Display an MPOOLFILE structure.
- */
-static void
-__memp_pmf(fp, mfp, data)
-	FILE *fp;
-	MPOOLFILE *mfp;
-	int data;
-{
-	(void)fprintf(fp, "    %lu pages created\n",
-	    (u_long)mfp->stat.st_page_create);
-	(void)fprintf(fp, "    %lu I/O's (%lu read, %lu written)\n",
-	    (u_long)mfp->stat.st_page_in + mfp->stat.st_page_out,
-	    (u_long)mfp->stat.st_page_in, (u_long)mfp->stat.st_page_out);
-	if (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss != 0)
-		(void)fprintf(fp,
-		    "    %.0f%% cache hit rate (%lu hit, %lu miss)\n",
-		    ((double)mfp->stat.st_cache_hit /
-		    (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss)) * 100,
-		    (u_long)mfp->stat.st_cache_hit,
-		    (u_long)mfp->stat.st_cache_miss);
-	if (!data)
-		return;
-
-	(void)fprintf(fp, "    %d references; %s; pagesize: %lu\n", mfp->ref,
-	    F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write",
-	    (u_long)mfp->stat.st_pagesize);
+	if (LF_ISSET(MPOOL_DUMP_MEM))
+		__db_shalloc_dump(dbmp->addr, fp);
+
+	UNLOCKREGION(dbmp);
+
+	/* Flush in case we're debugging. */
+	(void)fflush(fp);
 }
 
 /*
@@ -323,28 +270,37 @@ __memp_pmf(fp, mfp, data)
  *	Display a BH structure.
  */
 static void
-__memp_pbh(fp, dbmp, bhp, data)
-	FILE *fp;
+__memp_pbh(dbmp, bhp, fmap, fp)
 	DB_MPOOL *dbmp;
 	BH *bhp;
-	int data;
+	size_t *fmap;
+	FILE *fp;
 {
-	const char *sep;
-
-	if (!data)
-		return;
-
-	(void)fprintf(fp, "    BH @ %lu (mf: %lu): page %lu; ref %lu",
-	    (u_long)R_OFFSET(dbmp, bhp),
-	    (u_long)bhp->mf_offset, (u_long)bhp->pgno, (u_long)bhp->ref);
-	sep = "; ";
-	if (F_ISSET(bhp, BH_DIRTY)) {
-		(void)fprintf(fp, "%sdirty", sep);
-		sep = ", ";
-	}
-	if (F_ISSET(bhp, BH_WRITE)) {
-		(void)fprintf(fp, "%schk_write", sep);
-		sep = ", ";
-	}
+	static const FN fn[] = {
+		{ BH_CALLPGIN,	"callpgin" },
+		{ BH_DIRTY,	"dirty" },
+		{ BH_DISCARD,	"discard" },
+		{ BH_LOCKED,	"locked" },
+		{ BH_TRASH,	"trash" },
+		{ BH_WRITE,	"write" },
+		{ 0 },
+	};
+	int i;
+
+	for (i = 0; i < FMAP_ENTRIES; ++i)
+		if (fmap[i] == INVALID || fmap[i] == bhp->mf_offset)
+			break;
+
+	if (fmap[i] == INVALID)
+		(void)fprintf(fp, "  %4lu, %lu, %2lu, %lu",
+		    (u_long)bhp->pgno, (u_long)bhp->mf_offset,
+		    (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp));
+	else
+		(void)fprintf(fp, "  %4lu,   #%d,  %2lu, %lu",
+		    (u_long)bhp->pgno, i + 1,
+		    (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp));
+
+	__db_prflags(bhp->flags, fn, fp);
+
 	(void)fprintf(fp, "\n");
 }
diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c
index c20e669749..6b92fbdad4 100644
--- a/db2/mp/mp_region.c
+++ b/db2/mp/mp_region.c
@@ -1,24 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_region.c	10.18 (Sleepycat) 11/29/97";
+static const char sccsid[] = "@(#)mp_region.c	10.30 (Sleepycat) 5/31/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <fcntl.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
@@ -86,7 +82,7 @@ alloc:	if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
 
 		/*
 		 * Retry as soon as we've freed up sufficient space.  If we
-		 * have to coalesce of memory to satisfy the request, don't
+		 * will have to coalesce memory to satisfy the request, don't
 		 * try until it's likely (possible?) that we'll succeed.
 		 */
 		total += fsize = __db_shsizeof(bhp);
@@ -179,18 +175,19 @@ retry:	/* Find a buffer we can flush; pure LRU. */
  *	Attach to, and optionally create, the mpool region.
  *
  * PUBLIC: int __memp_ropen
- * PUBLIC:    __P((DB_MPOOL *, const char *, size_t, int, int));
+ * PUBLIC:    __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
  */
 int
-__memp_ropen(dbmp, path, cachesize, mode, flags)
+__memp_ropen(dbmp, path, cachesize, mode, is_private, flags)
 	DB_MPOOL *dbmp;
 	const char *path;
 	size_t cachesize;
-	int mode, flags;
+	int mode, is_private;
+	u_int32_t flags;
 {
 	MPOOL *mp;
 	size_t rlen;
-	int fd, newregion, ret, retry_cnt;
+	int defcache, ret;
 
 	/*
 	 * Unlike other DB subsystems, mpool can't simply grow the region
@@ -204,155 +201,107 @@ __memp_ropen(dbmp, path, cachesize, mode, flags)
 	 *
 	 * Up the user's cachesize by 25% to account for our overhead.
 	 */
+	defcache = 0;
 	if (cachesize < DB_CACHESIZE_MIN)
-		if (cachesize == 0)
+		if (cachesize == 0) {
+			defcache = 1;
 			cachesize = DB_CACHESIZE_DEF;
-		else
+		} else
 			cachesize = DB_CACHESIZE_MIN;
 	rlen = cachesize + cachesize / 4;
 
-	/* Map in the region. */
-	retry_cnt = newregion = 0;
-retry:	if (LF_ISSET(DB_CREATE)) {
-		/*
-		 * If it's a private mpool, use malloc, it's a lot faster than
-		 * instantiating a region.
-		 *
-		 * XXX
-		 * If we're doing locking and don't have spinlocks for this
-		 * architecture, we'd have to instantiate the file, we need
-		 * the file descriptor for locking.  However, it should not
-		 * be possible for DB_THREAD to be set if HAVE_SPINLOCKS aren't
-		 * defined.
-		 *
-		 * XXX
-		 * HP-UX won't permit mutexes to live in anything but shared
-		 * memory.  So, instantiate the shared mpool region file on
-		 * that architecture, regardless.  If this turns out to be a
-		 * performance problem, we could use anonymous memory instead.
-		 */
-#if !defined(__hppa)
-		if (F_ISSET(dbmp, MP_ISPRIVATE))
-			if ((dbmp->maddr = __db_malloc(rlen)) == NULL)
-				ret = ENOMEM;
-			else {
-				F_SET(dbmp, MP_MALLOC);
-				ret = __db_rinit(dbmp->dbenv,
-				    dbmp->maddr, 0, rlen, 0);
-			}
-		else
-#endif
-			ret = __db_rcreate(dbmp->dbenv, DB_APP_NONE, path,
-			    DB_DEFAULT_MPOOL_FILE, mode, rlen,
-			    F_ISSET(dbmp, MP_ISPRIVATE) ? DB_TEMPORARY : 0,
-			    &fd, &dbmp->maddr);
-		if (ret == 0) {
-			/* Put the MPOOL structure first in the region. */
-			mp = dbmp->maddr;
-
-			SH_TAILQ_INIT(&mp->bhq);
-			SH_TAILQ_INIT(&mp->bhfq);
-			SH_TAILQ_INIT(&mp->mpfq);
-
-			/* Initialize the rest of the region as free space. */
-			dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL);
-			__db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
-
-			/*
-			 *
-			 * Pretend that the cache will be broken up into 4K
-			 * pages, and that we want to keep it under, say, 10
-			 * pages on each chain.  This means a 256MB cache will
-			 * allocate ~6500 offset pairs.
-			 */
-			mp->htab_buckets =
-			    __db_tablesize((cachesize / (4 * 1024)) / 10);
+	/*
+	 * Map in the region.
+	 *
+	 * If it's a private mpool, use malloc, it's a lot faster than
+	 * instantiating a region.
+	 */
+	dbmp->reginfo.dbenv = dbmp->dbenv;
+	dbmp->reginfo.appname = DB_APP_NONE;
+	if (path == NULL)
+		dbmp->reginfo.path = NULL;
+	else
+		if ((dbmp->reginfo.path = __db_strdup(path)) == NULL)
+			return (ENOMEM);
+	dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE;
+	dbmp->reginfo.mode = mode;
+	dbmp->reginfo.size = rlen;
+	dbmp->reginfo.dbflags = flags;
+	dbmp->reginfo.flags = 0;
+	if (defcache)
+		F_SET(&dbmp->reginfo, REGION_SIZEDEF);
 
-			/* Allocate hash table space and initialize it. */
-			if ((ret = __db_shalloc(dbmp->addr,
-			    mp->htab_buckets * sizeof(DB_HASHTAB),
-			    0, &dbmp->htab)) != 0)
-				goto err;
-			__db_hashinit(dbmp->htab, mp->htab_buckets);
-			mp->htab = R_OFFSET(dbmp, dbmp->htab);
+	/*
+	 * If we're creating a temporary region, don't use any standard
+	 * naming.
+	 */
+	if (is_private) {
+		dbmp->reginfo.appname = DB_APP_TMP;
+		dbmp->reginfo.file = NULL;
+		F_SET(&dbmp->reginfo, REGION_PRIVATE);
+	}
 
-			ZERO_LSN(mp->lsn);
-			mp->lsn_cnt = 0;
+	if ((ret = __db_rattach(&dbmp->reginfo)) != 0) {
+		if (dbmp->reginfo.path != NULL)
+			FREES(dbmp->reginfo.path);
+		return (ret);
+	}
 
-			memset(&mp->stat, 0, sizeof(mp->stat));
-			mp->stat.st_cachesize = cachesize;
+	/*
+	 * The MPOOL structure is first in the region, the rest of the region
+	 * is free space.
+	 */
+	dbmp->mp = dbmp->reginfo.addr;
+	dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL);
 
-			mp->flags = 0;
+	/* Initialize a created region. */
+	if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) {
+		mp = dbmp->mp;
+		SH_TAILQ_INIT(&mp->bhq);
+		SH_TAILQ_INIT(&mp->bhfq);
+		SH_TAILQ_INIT(&mp->mpfq);
 
-			newregion = 1;
-		} else if (ret != EEXIST)
-			return (ret);
-	}
+		__db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
 
-	/* If we didn't or couldn't create the region, try and join it. */
-	if (!newregion &&
-	    (ret = __db_ropen(dbmp->dbenv, DB_APP_NONE,
-	    path, DB_DEFAULT_MPOOL_FILE, 0, &fd, &dbmp->maddr)) != 0) {
 		/*
-		 * If we failed because the file wasn't available, wait a
-		 * second and try again.
+		 * Assume we want to keep the hash chains with under 10 pages
+		 * on each chain.  We don't know the pagesize in advance, and
+		 * it may differ for different files.  Use a pagesize of 1K for
+		 * the calculation -- we walk these chains a lot, they should
+		 * be short.
 		 */
-		if (ret == EAGAIN && ++retry_cnt < 3) {
-			(void)__db_sleep(1, 0);
-			goto retry;
-		}
-		return (ret);
-	}
+		mp->htab_buckets =
+		    __db_tablesize((cachesize / (1 * 1024)) / 10);
 
-	/* Set up the common pointers. */
-	dbmp->mp = dbmp->maddr;
-	dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL);
+		/* Allocate hash table space and initialize it. */
+		if ((ret = __db_shalloc(dbmp->addr,
+		    mp->htab_buckets * sizeof(DB_HASHTAB),
+		    0, &dbmp->htab)) != 0)
+			goto err;
+		__db_hashinit(dbmp->htab, mp->htab_buckets);
+		mp->htab = R_OFFSET(dbmp, dbmp->htab);
 
-	/*
-	 * If not already locked, lock the region -- if it's a new region,
-	 * then either __db_rcreate() locked it for us or we malloc'd it
-	 * instead of creating a region, neither of which requires locking
-	 * here.
-	 */
-	if (!newregion)
-		LOCKREGION(dbmp);
+		ZERO_LSN(mp->lsn);
+		mp->lsn_cnt = 0;
 
-	/*
-	 * Get the hash table address; it's on the shared page, so we have
-	 * to lock first.
-	 */
-	dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
+		memset(&mp->stat, 0, sizeof(mp->stat));
+		mp->stat.st_cachesize = cachesize;
 
-	dbmp->fd = fd;
+		mp->flags = 0;
+	}
 
-	/* If we locked the region, release it now. */
-	if (!F_ISSET(dbmp, MP_MALLOC))
-		UNLOCKREGION(dbmp);
-	return (0);
+	/* Get the local hash table address. */
+	dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
 
-err:	if (fd != -1) {
-		dbmp->fd = fd;
-		(void)__memp_rclose(dbmp);
-	}
+	UNLOCKREGION(dbmp);
+	return (0);
 
-	if (newregion)
+err:	UNLOCKREGION(dbmp);
+	(void)__db_rdetach(&dbmp->reginfo);
+	if (F_ISSET(&dbmp->reginfo, REGION_CREATED))
 		(void)memp_unlink(path, 1, dbmp->dbenv);
-	return (ret);
-}
 
-/*
- * __memp_rclose --
- *	Close the mpool region.
- *
- * PUBLIC: int __memp_rclose __P((DB_MPOOL *));
- */
-int
-__memp_rclose(dbmp)
-	DB_MPOOL *dbmp;
-{
-	if (F_ISSET(dbmp, MP_MALLOC)) {
-		__db_free(dbmp->maddr);
-		return (0);
-	}
-	return (__db_rclose(dbmp->dbenv, dbmp->fd, dbmp->maddr));
+	if (dbmp->reginfo.path != NULL)
+		FREES(dbmp->reginfo.path);
+	return (ret);
 }
diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c
index 6d16cf3cd4..33218eef1a 100644
--- a/db2/mp/mp_sync.c
+++ b/db2/mp/mp_sync.c
@@ -1,13 +1,13 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_sync.c	10.19 (Sleepycat) 12/3/97";
+static const char sccsid[] = "@(#)mp_sync.c	10.25 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -15,7 +15,6 @@ static const char sccsid[] = "@(#)mp_sync.c	10.19 (Sleepycat) 12/3/97";
 
 #include <errno.h>
 #include <stdlib.h>
-#include <string.h>
 #endif
 
 #include "db_int.h"
@@ -25,6 +24,7 @@ static const char sccsid[] = "@(#)mp_sync.c	10.19 (Sleepycat) 12/3/97";
 #include "common_ext.h"
 
 static int __bhcmp __P((const void *, const void *));
+static int __memp_fsync __P((DB_MPOOLFILE *));
 
 /*
  * memp_sync --
@@ -145,7 +145,8 @@ memp_sync(dbmp, lsnp)
 				bharray[ar_cnt++] = bhp;
 			}
 		} else
-			F_CLR(bhp, BH_WRITE);
+			if (F_ISSET(bhp, BH_WRITE))
+				F_CLR(bhp, BH_WRITE);
 
 	/* If there no buffers we can write immediately, we're done. */
 	if (ar_cnt == 0) {
@@ -235,10 +236,8 @@ int
 memp_fsync(dbmfp)
 	DB_MPOOLFILE *dbmfp;
 {
-	BH *bhp, **bharray;
 	DB_MPOOL *dbmp;
-	size_t mf_offset;
-	int ar_cnt, cnt, nalloc, next, pincnt, ret, wrote;
+	int is_tmp;
 
 	dbmp = dbmfp->dbmp;
 
@@ -250,14 +249,62 @@ memp_fsync(dbmfp)
 	if (F_ISSET(dbmfp, MP_READONLY))
 		return (0);
 
-	ret = 0;
 	LOCKREGION(dbmp);
-	if (F_ISSET(dbmfp->mfp, MP_TEMP))
-		ret = 1;
+	is_tmp = F_ISSET(dbmfp->mfp, MP_TEMP);
 	UNLOCKREGION(dbmp);
-	if (ret)
+	if (is_tmp)
 		return (0);
 
+	return (__memp_fsync(dbmfp));
+}
+
+/*
+ * __mp_xxx_fd --
+ *	Return a file descriptor for DB 1.85 compatibility locking.
+ *
+ * PUBLIC: int __mp_xxx_fd __P((DB_MPOOLFILE *, int *));
+ */
+int
+__mp_xxx_fd(dbmfp, fdp)
+	DB_MPOOLFILE *dbmfp;
+	int *fdp;
+{
+	int ret;
+
+	/*
+	 * This is a truly spectacular layering violation, intended ONLY to
+	 * support compatibility for the DB 1.85 DB->fd call.
+	 *
+	 * Sync the database file to disk, creating the file as necessary.
+	 *
+	 * We skip the MP_READONLY and MP_TEMP tests done by memp_fsync(3).
+	 * The MP_READONLY test isn't interesting because we will either
+	 * already have a file descriptor (we opened the database file for
+	 * reading) or we aren't readonly (we created the database which
+	 * requires write privileges).  The MP_TEMP test isn't interesting
+	 * because we want to write to the backing file regardless so that
+	 * we get a file descriptor to return.
+	 */
+	ret = dbmfp->fd == -1 ? __memp_fsync(dbmfp) : 0;
+
+	return ((*fdp = dbmfp->fd) == -1 ? ENOENT : ret);
+}
+
+/*
+ * __memp_fsync --
+ *	Mpool file internal sync function.
+ */
+static int
+__memp_fsync(dbmfp)
+	DB_MPOOLFILE *dbmfp;
+{
+	BH *bhp, **bharray;
+	DB_MPOOL *dbmp;
+	size_t mf_offset;
+	int ar_cnt, cnt, nalloc, next, pincnt, ret, wrote;
+
+	ret = 0;
+	dbmp = dbmfp->dbmp;
 	mf_offset = R_OFFSET(dbmp, dbmfp->mfp);
 
 	/*
@@ -359,7 +406,6 @@ err:	UNLOCKREGION(dbmp);
 	if (ret == 0)
 		return (pincnt == 0 ? __db_fsync(dbmfp->fd) : DB_INCOMPLETE);
 	return (ret);
-
 }
 
 /*
@@ -453,8 +499,8 @@ __bhcmp(p1, p2)
 {
 	BH *bhp1, *bhp2;
 
-	bhp1 = *(BH **)p1;
-	bhp2 = *(BH **)p2;
+	bhp1 = *(BH * const *)p1;
+	bhp2 = *(BH * const *)p2;
 
 	/* Sort by file (shared memory pool offset). */
 	if (bhp1->mf_offset < bhp2->mf_offset)
diff --git a/db2/mutex/68020.gcc b/db2/mutex/68020.gcc
index 9d8be641d8..21410e61d4 100644
--- a/db2/mutex/68020.gcc
+++ b/db2/mutex/68020.gcc
@@ -1,5 +1,5 @@
 /*
- * @(#)68020.gcc	10.1 (Sleepycat) 4/12/97
+ * @(#)68020.gcc	10.2 (Sleepycat) 2/15/98
  *
  * For gcc/68K, 0 is clear, 1 is set.
  */
@@ -16,4 +16,3 @@
 
 #define	TSL_UNSET(tsl)	(*(tsl) = 0)
 #define	TSL_INIT(tsl)	TSL_UNSET(tsl)
-
diff --git a/db2/mutex/mutex.c b/db2/mutex/mutex.c
index 6dca323113..de0d0e23fe 100644
--- a/db2/mutex/mutex.c
+++ b/db2/mutex/mutex.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mutex.c	10.32 (Sleepycat) 1/16/98";
+static const char sccsid[] = "@(#)mutex.c	10.48 (Sleepycat) 5/23/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -16,14 +16,12 @@ static const char sccsid[] = "@(#)mutex.c	10.32 (Sleepycat) 1/16/98";
 
 #include <errno.h>
 #include <fcntl.h>
-#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #endif
 
 #include "db_int.h"
-#include "common_ext.h"
 
 #ifdef HAVE_SPINLOCKS
 
@@ -43,13 +41,21 @@ static const char sccsid[] = "@(#)mutex.c	10.32 (Sleepycat) 1/16/98";
  * Should we not use MSEM_IF_NOWAIT and let the system block for us?
  * I've no idea if this will block all threads in the process or not.
  */
-#define	TSL_INIT(x)	msem_init(x, MSEM_UNLOCKED)
+#define	TSL_INIT(x)	(msem_init(x, MSEM_UNLOCKED) == NULL)
+#define	TSL_INIT_ERROR	1
 #define	TSL_SET(x)	(!msem_lock(x, MSEM_IF_NOWAIT))
 #define	TSL_UNSET(x)	msem_unlock(x, 0)
 #endif
 
+#ifdef HAVE_FUNC_RELIANT
+#define	TSL_INIT(x)	initspin(x, 1)
+#define	TSL_SET(x)	(cspinlock(x) == 0)
+#define	TSL_UNSET(x)	spinunlock(x)
+#endif
+
 #ifdef HAVE_FUNC_SGI
-#define	TSL_INIT(x)	init_lock(x)
+#define	TSL_INIT(x)	(init_lock(x) != 0)
+#define	TSL_INIT_ERROR	1
 #define	TSL_SET(x)	(!acquire_lock(x))
 #define	TSL_UNSET(x)	release_lock(x)
 #endif
@@ -58,7 +64,8 @@ static const char sccsid[] = "@(#)mutex.c	10.32 (Sleepycat) 1/16/98";
 /*
  * Semaphore calls don't work on Solaris 5.5.
  *
- * #define	TSL_INIT(x)	sema_init(x, 1, USYNC_PROCESS, NULL)
+ * #define	TSL_INIT(x)	(sema_init(x, 1, USYNC_PROCESS, NULL) != 0)
+ * #define	TSL_INIT_ERROR	1
  * #define	TSL_SET(x)	(sema_wait(x) == 0)
  * #define	TSL_UNSET(x)	sema_post(x)
  */
@@ -67,6 +74,10 @@ static const char sccsid[] = "@(#)mutex.c	10.32 (Sleepycat) 1/16/98";
 #define	TSL_UNSET(x)	_lock_clear(x)
 #endif
 
+#ifdef HAVE_ASSEM_PARISC_GCC
+#include "parisc.gcc"
+#endif
+
 #ifdef HAVE_ASSEM_SCO_CC
 #include "sco.cc"
 #endif
@@ -85,17 +96,20 @@ static const char sccsid[] = "@(#)mutex.c	10.32 (Sleepycat) 1/16/98";
 #include "x86.gcc"
 #endif
 
-#if defined(_WIN32)
-/* DBDB this needs to be byte-aligned!! */
+#ifdef WIN16
+/* Win16 spinlocks are simple because we cannot possibly be preempted. */
 #define	TSL_INIT(tsl)
-#define	TSL_SET(tsl)	(!InterlockedExchange((PLONG)tsl, 1))
+#define	TSL_SET(tsl)	(*(tsl) = 1)
 #define	TSL_UNSET(tsl)	(*(tsl) = 0)
 #endif
 
-#ifdef macintosh
-/* Mac spinlocks are simple because we cannot possibly be preempted. */
+#if defined(_WIN32)
+/*
+ * XXX
+ * DBDB this needs to be byte-aligned!!
+ */
 #define	TSL_INIT(tsl)
-#define	TSL_SET(tsl)	(*(tsl) = 1)
+#define	TSL_SET(tsl)	(!InterlockedExchange((PLONG)tsl, 1))
 #define	TSL_UNSET(tsl)	(*(tsl) = 0)
 #endif
 
@@ -105,14 +119,14 @@ static const char sccsid[] = "@(#)mutex.c	10.32 (Sleepycat) 1/16/98";
  * __db_mutex_init --
  *	Initialize a DB mutex structure.
  *
- * PUBLIC: void __db_mutex_init __P((db_mutex_t *, u_int32_t));
+ * PUBLIC: int __db_mutex_init __P((db_mutex_t *, u_int32_t));
  */
-void
+int
 __db_mutex_init(mp, off)
 	db_mutex_t *mp;
 	u_int32_t off;
 {
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((ALIGNTYPE)mp & (MUTEX_ALIGNMENT - 1)) {
 		(void)fprintf(stderr,
 		    "MUTEX ERROR: mutex NOT %d-byte aligned!\n",
@@ -125,11 +139,17 @@ __db_mutex_init(mp, off)
 #ifdef HAVE_SPINLOCKS
 	COMPQUIET(off, 0);
 
+#ifdef TSL_INIT_ERROR
+	if (TSL_INIT(&mp->tsl_resource))
+		return (errno);
+#else
 	TSL_INIT(&mp->tsl_resource);
+#endif
 	mp->spins = __os_spin();
 #else
 	mp->off = off;
 #endif
+	return (0);
 }
 
 #define	MS(n)		((n) * 1000)	/* Milliseconds to micro-seconds. */
@@ -147,17 +167,25 @@ __db_mutex_lock(mp, fd)
 	int fd;
 {
 	u_long usecs;
-
 #ifdef HAVE_SPINLOCKS
 	int nspins;
+#else
+	struct flock k_lock;
+	pid_t mypid;
+	int locked;
+#endif
+
+	if (!DB_GLOBAL(db_mutexlocks))
+		return (0);
 
+#ifdef HAVE_SPINLOCKS
 	COMPQUIET(fd, 0);
 
 	for (usecs = MS(10);;) {
 		/* Try and acquire the uncontested resource lock for N spins. */
 		for (nspins = mp->spins; nspins > 0; --nspins)
 			if (TSL_SET(&mp->tsl_resource)) {
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 				if (mp->pid != 0) {
 					(void)fprintf(stderr,
 		    "MUTEX ERROR: __db_mutex_lock: lock currently locked\n");
@@ -182,9 +210,6 @@ __db_mutex_lock(mp, fd)
 	/* NOTREACHED */
 
 #else /* !HAVE_SPINLOCKS */
-	struct flock k_lock;
-	pid_t mypid;
-	int locked;
 
 	/* Initialize the lock. */
 	k_lock.l_whence = SEEK_SET;
@@ -246,7 +271,10 @@ __db_mutex_unlock(mp, fd)
 	db_mutex_t *mp;
 	int fd;
 {
-#ifdef DEBUG
+	if (!DB_GLOBAL(db_mutexlocks))
+		return (0);
+
+#ifdef DIAGNOSTIC
 	if (mp->pid == 0) {
 		(void)fprintf(stderr,
 	    "MUTEX ERROR: __db_mutex_unlock: lock already unlocked\n");
@@ -257,7 +285,7 @@ __db_mutex_unlock(mp, fd)
 #ifdef HAVE_SPINLOCKS
 	COMPQUIET(fd, 0);
 
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	mp->pid = 0;
 #endif
 
diff --git a/db2/mutex/parisc.gcc b/db2/mutex/parisc.gcc
index e15f6f2dba..2e4540f767 100644
--- a/db2/mutex/parisc.gcc
+++ b/db2/mutex/parisc.gcc
@@ -1,5 +1,5 @@
-/* 
- * @(#)parisc.gcc	8.5 (Sleepycat) 1/18/97
+/*
+ * @(#)parisc.gcc	8.8 (Sleepycat) 6/2/98
  *
  * Copyright (c) 1996-1997, The University of Utah and the Computer Systems
  * Laboratory at the University of Utah (CSL).  All rights reserved.
@@ -22,19 +22,15 @@
 
 /*
  * The PA-RISC has a "load and clear" instead of a "test and set" instruction.
- * The 32-bit word used by that instruction must be 16-byte aligned hence we
- * allocate 16 bytes for a tsl_t and use the word that is properly aligned.
- * We could use the "aligned" attribute in GCC but that doesn't work for stack
- * variables.
+ * The 32-bit word used by that instruction must be 16-byte aligned.  We could
+ * use the "aligned" attribute in GCC but that doesn't work for stack variables.
  */
 #define	TSL_SET(tsl) ({							\
-	int *__l = (int *)(((int)(tsl)+15)&~15);			\
+	register tsl_t *__l = (tsl);					\
 	int __r;							\
 	asm volatile("ldcws 0(%1),%0" : "=r" (__r) : "r" (__l));	\
 	__r & 1;							\
 })
 
-#define	TSL_UNSET(tsl) ({						\
-	int *__l = (int *)(((int)(tsl)+15)&~15);			\
-	*__l = -1;							\
-})
+#define	TSL_UNSET(tsl)	(*(tsl) = -1)
+#define	TSL_INIT(tsl)	TSL_UNSET(tsl)
diff --git a/db2/mutex/parisc.hp b/db2/mutex/parisc.hp
index d10807b7f1..bd0e37fc78 100644
--- a/db2/mutex/parisc.hp
+++ b/db2/mutex/parisc.hp
@@ -1,5 +1,5 @@
-/* 
- * @(#)parisc.hp	8.5 (Sleepycat) 1/18/97
+/*
+ * @(#)parisc.hp	8.6 (Sleepycat) 6/2/98
  *
  * Copyright (c) 1996-1997, The University of Utah and the Computer Systems
  * Laboratory at the University of Utah (CSL).  All rights reserved.
diff --git a/db2/mutex/sco.cc b/db2/mutex/sco.cc
new file mode 100644
index 0000000000..7c165a2072
--- /dev/null
+++ b/db2/mutex/sco.cc
@@ -0,0 +1,24 @@
+/*
+ * @(#)x86.uslc
+ *
+ * UnixWare has threads in libthread, but OpenServer doesn't (yet).
+ *
+ * For cc/x86, 0 is clear, 1 is set.
+ */
+
+#if defined(__USLC__)
+asm int
+_tsl_set(void *tsl)
+{
+%mem tsl
+	movl	tsl, %ecx
+	movl	$1, %eax
+	lock
+	xchgb	(%ecx),%al
+	xorl	$1,%eax
+}
+#endif
+
+#define	TSL_SET(tsl)	_tsl_set(tsl)
+#define	TSL_UNSET(tsl)	(*(tsl) = 0)
+#define	TSL_INIT(tsl)	TSL_UNSET(tsl)
diff --git a/db2/os/os_abs.c b/db2/os/os_abs.c
index 872e46d058..d9f4970467 100644
--- a/db2/os/os_abs.c
+++ b/db2/os/os_abs.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_abs.c	10.7 (Sleepycat) 10/24/97";
+static const char sccsid[] = "@(#)os_abs.c	10.8 (Sleepycat) 4/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
diff --git a/db2/os/os_alloc.c b/db2/os/os_alloc.c
index 27abffbf0d..35784476c0 100644
--- a/db2/os/os_alloc.c
+++ b/db2/os/os_alloc.c
@@ -1,26 +1,46 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_alloc.c	10.1 (Sleepycat) 12/1/97";
+static const char sccsid[] = "@(#)os_alloc.c	10.6 (Sleepycat) 5/2/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
-#include <errno.h>
 #include <string.h>
 #endif
 
 #include "db_int.h"
 
 /*
+ * __db_strdup --
+ *	The strdup(3) function for DB.
+ *
+ * PUBLIC: char *__db_strdup __P((const char *));
+ */
+char *
+__db_strdup(str)
+	const char *str;
+{
+	size_t len;
+	char *copy;
+
+	len = strlen(str) + 1;
+	if ((copy = __db_malloc(len)) == NULL)
+		return (NULL);
+
+	memcpy(copy, str, len);
+	return (copy);
+}
+
+/*
  * XXX
  * Correct for systems that return NULL when you allocate 0 bytes of memory.
  * There are several places in DB where we allocate the number of bytes held
@@ -28,6 +48,10 @@ static const char sccsid[] = "@(#)os_alloc.c	10.1 (Sleepycat) 12/1/97";
  * returns a NULL for that reason (which behavior is permitted by ANSI).  We
  * could make these calls macros on non-Alpha architectures (that's where we
  * saw the problem), but it's probably not worth the autoconf complexity.
+ *
+ *	Out of memory.
+ *	We wish to hold the whole sky,
+ *	But we never will.
  */
 /*
  * __db_calloc --
@@ -42,7 +66,7 @@ __db_calloc(num, size)
 	void *p;
 
 	size *= num;
-	if ((p = __db_jump.db_malloc(size == 0 ? 1 : size)) != NULL)
+	if ((p = __db_jump.j_malloc(size == 0 ? 1 : size)) != NULL)
 		memset(p, 0, size);
 	return (p);
 }
@@ -57,7 +81,15 @@ void *
 __db_malloc(size)
 	size_t size;
 {
-	return (__db_jump.db_malloc(size == 0 ? 1 : size));
+#ifdef DIAGNOSTIC
+	void *p;
+
+	p = __db_jump.j_malloc(size == 0 ? 1 : size);
+	memset(p, 0xff, size == 0 ? 1 : size);
+	return (p);
+#else
+	return (__db_jump.j_malloc(size == 0 ? 1 : size));
+#endif
 }
 
 /*
@@ -71,5 +103,5 @@ __db_realloc(ptr, size)
 	void *ptr;
 	size_t size;
 {
-	return (__db_jump.db_realloc(ptr, size == 0 ? 1 : size));
+	return (__db_jump.j_realloc(ptr, size == 0 ? 1 : size));
 }
diff --git a/db2/os/os_config.c b/db2/os/os_config.c
index 7a89ba58ab..4150c843e4 100644
--- a/db2/os/os_config.c
+++ b/db2/os/os_config.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_config.c	10.12 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)os_config.c	10.26 (Sleepycat) 5/23/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -20,22 +20,6 @@ static const char sccsid[] = "@(#)os_config.c	10.12 (Sleepycat) 1/8/98";
 #include "db_int.h"
 
 /*
- * __os_oldwin --
- *	Return if Windows 95 (as opposed to Windows NT).
- *
- * PUBLIC: int __os_oldwin __P((void));
- */
-int
-__os_oldwin()
-{
-#ifdef _WIN32
-	return ((GetVersion() & 0x80000000) != 0);
-#else
-	return (0);
-#endif
-}
-
-/*
  * XXX
  * We provide our own extern declarations so that we don't collide with
  * systems that get them wrong, e.g., SunOS.
@@ -47,13 +31,20 @@ __os_oldwin()
 #define imported
 #endif
 
+/*
+ * XXX
+ * HP/UX MPE doesn't have fsync, but you can build one using FCONTROL.
+ */
+#ifdef __hp3000s900
+#define	fsync	__mpe_fsync
+#endif
+
 imported extern int	 close __P((int));
 imported extern void	 free __P((void *));
 imported extern int	 fsync __P((int));
 imported extern void    *malloc __P((size_t));
 imported extern int	 open __P((const char *, int, ...));
 imported extern ssize_t	 read __P((int, void *, size_t));
-imported extern char	*strdup __P((const char *));
 imported extern void    *realloc __P((void *, size_t));
 imported extern int	 unlink __P((const char *));
 imported extern ssize_t	 write __P((int, const void *, size_t));
@@ -63,7 +54,7 @@ imported extern ssize_t	 write __P((int, const void *, size_t));
  *	This list of interfaces that applications can replace.  In some
  *	cases, the user is permitted to replace the standard ANSI C or
  *	POSIX 1003.1 call, e.g., malloc or read.  In others, we provide
- *	a local interface to the functionality, e.g., __os_map.
+ *	a local interface to the functionality, e.g., __os_ioinfo.
  */
 struct __db_jumptab __db_jump = {
 	close,				/* DB_FUNC_CLOSE */
@@ -74,20 +65,26 @@ struct __db_jumptab __db_jump = {
 	fsync,				/* DB_FUNC_FSYNC */
 	__os_ioinfo,			/* DB_FUNC_IOINFO */
 	malloc,				/* DB_FUNC_MALLOC */
-	__os_map,			/* DB_FUNC_MAP */
+	NULL,				/* DB_FUNC_MAP */
 	open,				/* DB_FUNC_OPEN */
 	read,				/* DB_FUNC_READ */
 	realloc,			/* DB_FUNC_REALLOC */
+	NULL,				/* DB_FUNC_RUNLINK */
 	__os_seek,			/* DB_FUNC_SEEK */
 	__os_sleep,			/* DB_FUNC_SLEEP */
-	strdup,				/* DB_FUNC_STRDUP */
 	unlink,				/* DB_FUNC_UNLINK */
-	__os_unmap,			/* DB_FUNC_UNMAP */
+	NULL,				/* DB_FUNC_UNMAP */
 	write,				/* DB_FUNC_WRITE */
 	NULL				/* DB_FUNC_YIELD */
 };
 
-int __db_tsl_spins;			/* DB_TSL_SPINS */
+DB_GLOBALS __db_global_values = {
+	1,				/* DB_MUTEXLOCKS */
+	0,				/* DB_REGION_ANON, DB_REGION_NAME */
+	0,				/* DB_REGION_INIT */
+	0,				/* DB_TSL_SPINS */
+	0				/* DB_PAGEYIELD */
+};
 
 /*
  * db_jump_set --
@@ -99,74 +96,68 @@ db_jump_set(func, which)
 	int which;
 {
 	switch (which) {
-	case DB_FUNC_CALLOC:
-		/*
-		 * XXX
-		 * Obsolete, calloc is no longer called by DB.
-		 */
-		 break;
 	case DB_FUNC_CLOSE:
-		__db_jump.db_close = (int (*) __P((int)))func;
+		__db_jump.j_close = (int (*) __P((int)))func;
 		break;
 	case DB_FUNC_DIRFREE:
-		__db_jump.db_dirfree = (void (*) __P((char **, int)))func;
+		__db_jump.j_dirfree = (void (*) __P((char **, int)))func;
 		break;
 	case DB_FUNC_DIRLIST:
-		__db_jump.db_dirlist =
+		__db_jump.j_dirlist =
 		    (int (*) __P((const char *, char ***, int *)))func;
 		break;
 	case DB_FUNC_EXISTS:
-		__db_jump.db_exists = (int (*) __P((const char *, int *)))func;
+		__db_jump.j_exists = (int (*) __P((const char *, int *)))func;
 		break;
 	case DB_FUNC_FREE:
-		__db_jump.db_free = (void (*) __P((void *)))func;
+		__db_jump.j_free = (void (*) __P((void *)))func;
 		break;
 	case DB_FUNC_FSYNC:
-		__db_jump.db_fsync = (int (*) __P((int)))func;
+		__db_jump.j_fsync = (int (*) __P((int)))func;
 		break;
 	case DB_FUNC_IOINFO:
-		__db_jump.db_ioinfo = (int (*) __P((const char *,
+		__db_jump.j_ioinfo = (int (*) __P((const char *,
 		    int, u_int32_t *, u_int32_t *, u_int32_t *)))func;
 		break;
 	case DB_FUNC_MALLOC:
-		__db_jump.db_malloc = (void *(*) __P((size_t)))func;
+		__db_jump.j_malloc = (void *(*) __P((size_t)))func;
 		break;
 	case DB_FUNC_MAP:
-		__db_jump.db_map =
-		    (int (*) __P((int, size_t, int, int, void **)))func;
+		__db_jump.j_map = (int (*)
+		    __P((char *, int, size_t, int, int, int, void **)))func;
 		break;
 	case DB_FUNC_OPEN:
-		__db_jump.db_open = (int (*) __P((const char *, int, ...)))func;
+		__db_jump.j_open = (int (*) __P((const char *, int, ...)))func;
 		break;
 	case DB_FUNC_READ:
-		__db_jump.db_read =
+		__db_jump.j_read =
 		    (ssize_t (*) __P((int, void *, size_t)))func;
 		break;
 	case DB_FUNC_REALLOC:
-		__db_jump.db_realloc = (void *(*) __P((void *, size_t)))func;
+		__db_jump.j_realloc = (void *(*) __P((void *, size_t)))func;
+		break;
+	case DB_FUNC_RUNLINK:
+		__db_jump.j_runlink = (int (*) __P((char *)))func;
 		break;
 	case DB_FUNC_SEEK:
-		__db_jump.db_seek =
-		    (int (*) __P((int, size_t, db_pgno_t, u_long, int)))func;
+		__db_jump.j_seek = (int (*)
+		    __P((int, size_t, db_pgno_t, u_int32_t, int, int)))func;
 		break;
 	case DB_FUNC_SLEEP:
-		__db_jump.db_sleep = (int (*) __P((u_long, u_long)))func;
-		break;
-	case DB_FUNC_STRDUP:
-		__db_jump.db_strdup = (char *(*) __P((const char *)))func;
+		__db_jump.j_sleep = (int (*) __P((u_long, u_long)))func;
 		break;
 	case DB_FUNC_UNLINK:
-		__db_jump.db_unlink = (int (*) __P((const char *)))func;
+		__db_jump.j_unlink = (int (*) __P((const char *)))func;
 		break;
 	case DB_FUNC_UNMAP:
-		__db_jump.db_unmap = (int (*) __P((void *, size_t)))func;
+		__db_jump.j_unmap = (int (*) __P((void *, size_t)))func;
 		break;
 	case DB_FUNC_WRITE:
-		__db_jump.db_write =
+		__db_jump.j_write =
 		    (ssize_t (*) __P((int, const void *, size_t)))func;
 		break;
 	case DB_FUNC_YIELD:
-		__db_jump.db_yield = (int (*) __P((void)))func;
+		__db_jump.j_yield = (int (*) __P((void)))func;
 		break;
 	default:
 		return (EINVAL);
@@ -182,11 +173,32 @@ int
 db_value_set(value, which)
 	int value, which;
 {
+	int ret;
+
 	switch (which) {
+	case DB_MUTEXLOCKS:
+		DB_GLOBAL(db_mutexlocks) = value;
+		break;
+	case DB_PAGEYIELD:
+		DB_GLOBAL(db_pageyield) = value;
+		break;
+	case DB_REGION_ANON:
+		if (value != 0 && (ret = __db_mapanon_ok(0)) != 0)
+			return (ret);
+		DB_GLOBAL(db_region_anon) = value;
+		break;
+	case DB_REGION_INIT:
+		DB_GLOBAL(db_region_init) = value;
+		break;
+	case DB_REGION_NAME:
+		if (value != 0 && (ret = __db_mapanon_ok(1)) != 0)
+			return (ret);
+		DB_GLOBAL(db_region_anon) = value;
+		break;
 	case DB_TSL_SPINS:
 		if (value <= 0)
 			return (EINVAL);
-		__db_tsl_spins = value;
+		DB_GLOBAL(db_tsl_spins) = value;
 		break;
 	default:
 		return (EINVAL);
diff --git a/db2/os/os_dir.c b/db2/os/os_dir.c
index 10fb8b6739..14a10ad23f 100644
--- a/db2/os/os_dir.c
+++ b/db2/os/os_dir.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_dir.c	10.13 (Sleepycat) 10/28/97";
+static const char sccsid[] = "@(#)os_dir.c	10.15 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -32,13 +32,9 @@ static const char sccsid[] = "@(#)os_dir.c	10.13 (Sleepycat) 10/28/97";
 #endif
 
 #include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
-#include "common_ext.h"
 
 /*
  * __os_dirlist --
diff --git a/db2/os/os_fid.c b/db2/os/os_fid.c
index 6820b88786..cf48c01bd8 100644
--- a/db2/os/os_fid.c
+++ b/db2/os/os_fid.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_fid.c	10.9 (Sleepycat) 10/24/97";
+static const char sccsid[] = "@(#)os_fid.c	10.11 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -16,7 +16,6 @@ static const char sccsid[] = "@(#)os_fid.c	10.9 (Sleepycat) 10/24/97";
 #include <sys/stat.h>
 
 #include <errno.h>
-#include <stdlib.h>
 #include <string.h>
 #include <time.h>
 #endif
diff --git a/db2/os/os_fsync.c b/db2/os/os_fsync.c
index 7b001ceeb0..e1f271a75c 100644
--- a/db2/os/os_fsync.c
+++ b/db2/os/os_fsync.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_fsync.c	10.3 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)os_fsync.c	10.5 (Sleepycat) 4/19/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -32,3 +32,18 @@ __db_fsync(fd)
 {
 	return (__os_fsync(fd) ? errno : 0);
 }
+
+#ifdef __hp3000s900
+#include <fcntl.h>
+
+int
+__mpe_fsync(fd)
+	int fd;
+{
+	extern FCONTROL(short, short, void *);
+
+	FCONTROL(_MPE_FILENO(fd), 2, NULL);	/* Flush the buffers */
+	FCONTROL(_MPE_FILENO(fd), 6, NULL);	/* Write the EOF */
+	return (0);
+}
+#endif
diff --git a/db2/os/os_map.c b/db2/os/os_map.c
index b1553188dc..5f0fd790e6 100644
--- a/db2/os/os_map.c
+++ b/db2/os/os_map.c
@@ -1,47 +1,395 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_map.c	10.7 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)os_map.c	10.19 (Sleepycat) 5/3/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
+#ifdef HAVE_MMAP
 #include <sys/mman.h>
+#endif
+
+#ifdef HAVE_SHMGET
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#endif
 
 #include <errno.h>
+#include <string.h>
 #endif
 
 #include "db_int.h"
+#include "common_ext.h"
+
+#ifdef HAVE_MMAP
+static int __os_map __P((char *, int, size_t, int, int, int, void **));
+#endif
+#ifdef HAVE_SHMGET
+static int __os_shmget __P((char *, REGINFO *));
+#endif
 
 /*
- * __os_map --
- *	Map in some shared memory backed by a file descriptor.
+ * __db_mapanon_ok --
+ *	Return if this OS can support anonymous memory regions.
+ *
+ * PUBLIC: int __db_mapanon_ok __P((int));
+ */
+int
+__db_mapanon_ok(need_names)
+	int need_names;
+{
+	int ret;
+
+	ret = EINVAL;
+
+	/*
+	 * If we don't have spinlocks, we have to have a file descriptor
+	 * for fcntl(2) locking, which implies using mmap(2) to map in a
+	 * regular file.  Theoretically, we could probably find ways to
+	 * get a file descriptor to lock other types of shared regions,
+	 * but I don't see any reason to do so.
+	 *
+	 * If need_names is set, the application wants to share anonymous
+	 * memory among multiple processes, so we have to have a way to
+	 * name it.  This requires shmget(2), on UNIX systems.
+	 */
+#ifdef HAVE_SPINLOCKS
+#ifdef HAVE_SHMGET
+	ret = 0;
+#endif
+#ifdef HAVE_MMAP
+#ifdef MAP_ANON
+	if (!need_names)
+		ret = 0;
+#endif
+#ifdef MAP_ANONYMOUS
+	if (!need_names)
+		ret = 0;
+#endif
+#else
+	COMPQUIET(need_names, 0);
+#endif /* HAVE_MMAP */
+#endif /* HAVE_SPINLOCKS */
+
+	return (ret);
+}
+
+/*
+ * __db_mapinit --
+ *	Return if shared regions need to be initialized.
+ *
+ * PUBLIC: int __db_mapinit __P((void));
+ */
+int
+__db_mapinit()
+{
+	/*
+	 * Historically, some systems required that all of the bytes of the
+	 * region be written before it could be mmapped and accessed randomly.
+	 * We have the option of setting REGION_INIT_NEEDED at configuration
+	 * time if we're running on one of those systems.
+	 */
+#ifdef REGION_INIT_NEEDED
+	return (1);
+#else
+	return (0);
+#endif
+}
+
+/*
+ * __db_mapregion --
+ *	Attach to a shared memory region.
+ *
+ * PUBLIC: int __db_mapregion __P((char *, REGINFO *));
+ */
+int
+__db_mapregion(path, infop)
+	char *path;
+	REGINFO *infop;
+{
+	int called, ret;
+
+	called = 0;
+	ret = EINVAL;
+
+	/* If the user replaces the map call, call through their interface. */
+	if (__db_jump.j_map != NULL) {
+		F_SET(infop, REGION_HOLDINGSYS);
+		return (__db_jump.j_map(path, infop->fd, infop->size,
+		    1, F_ISSET(infop, REGION_ANONYMOUS), 0, &infop->addr));
+	}
+
+	if (F_ISSET(infop, REGION_ANONYMOUS)) {
+		/*
+		 * !!!
+		 * If we're creating anonymous regions:
+		 *
+		 * If it's private, we use mmap(2).  The problem with using
+		 * shmget(2) is that we may be creating a region of which the
+		 * application isn't aware, and if the application crashes
+		 * we'll have no way to remove the system resources for the
+		 * region.
+		 *
+		 * If it's not private, we use the shmget(2) interface if it's
+		 * available, because it allows us to name anonymous memory.
+		 * If shmget(2) isn't available, use the mmap(2) calls.
+		 *
+		 * In the case of anonymous memory, using mmap(2) means the
+		 * memory isn't named and only the single process and its
+		 * threads can access the region.
+		 */
+#ifdef	HAVE_MMAP
+#ifdef	MAP_ANON
+#define	HAVE_MMAP_ANONYMOUS	1
+#else
+#ifdef	MAP_ANONYMOUS
+#define	HAVE_MMAP_ANONYMOUS	1
+#endif
+#endif
+#endif
+#ifdef HAVE_MMAP_ANONYMOUS
+		if (!called && F_ISSET(infop, REGION_PRIVATE)) {
+			called = 1;
+			ret = __os_map(path,
+			    infop->fd, infop->size, 1, 1, 0, &infop->addr);
+		}
+#endif
+#ifdef HAVE_SHMGET
+		if (!called) {
+			called = 1;
+			ret = __os_shmget(path, infop);
+		}
+#endif
+#ifdef HAVE_MMAP
+		/*
+		 * If we're trying to join an unnamed anonymous region, fail --
+		 * that's not possible.
+		 */
+		if (!called) {
+			called = 1;
+
+			if (!F_ISSET(infop, REGION_CREATED)) {
+				__db_err(infop->dbenv,
+			    "cannot join region in unnamed anonymous memory");
+				return (EINVAL);
+			}
+
+			ret = __os_map(path,
+			    infop->fd, infop->size, 1, 1, 0, &infop->addr);
+		}
+#endif
+	} else {
+		/*
+		 * !!!
+		 * If we're creating normal regions, we use the mmap(2)
+		 * interface if it's available because it's POSIX 1003.1
+		 * standard and we trust it more than we do shmget(2).
+		 */
+#ifdef HAVE_MMAP
+		if (!called) {
+			called = 1;
+
+			/* Mmap(2) regions that aren't anonymous can grow. */
+			F_SET(infop, REGION_CANGROW);
+
+			ret = __os_map(path,
+			    infop->fd, infop->size, 1, 0, 0, &infop->addr);
+		}
+#endif
+#ifdef HAVE_SHMGET
+		if (!called) {
+			called = 1;
+			ret = __os_shmget(path, infop);
+		}
+#endif
+	}
+	return (ret);
+}
+
+/*
+ * __db_unmapregion --
+ *	Detach from the shared memory region.
+ *
+ * PUBLIC: int __db_unmapregion __P((REGINFO *));
+ */
+int
+__db_unmapregion(infop)
+	REGINFO *infop;
+{
+	int called, ret;
+
+	called = 0;
+	ret = EINVAL;
+
+	if (__db_jump.j_unmap != NULL)
+		return (__db_jump.j_unmap(infop->addr, infop->size));
+
+#ifdef HAVE_SHMGET
+	if (infop->segid != INVALID_SEGID) {
+		called = 1;
+		ret = shmdt(infop->addr) ? errno : 0;
+	}
+#endif
+#ifdef HAVE_MMAP
+	if (!called) {
+		called = 1;
+		ret = munmap(infop->addr, infop->size) ? errno : 0;
+	}
+#endif
+	return (ret);
+}
+
+/*
+ * __db_unlinkregion --
+ *	Remove the shared memory region.
+ *
+ * PUBLIC: int __db_unlinkregion __P((char *, REGINFO *));
+ */
+int
+__db_unlinkregion(name, infop)
+	char *name;
+	REGINFO *infop;
+{
+	int called, ret;
+
+	called = 0;
+	ret = EINVAL;
+
+	if (__db_jump.j_runlink != NULL)
+		return (__db_jump.j_runlink(name));
+
+#ifdef HAVE_SHMGET
+	if (infop->segid != INVALID_SEGID) {
+		called = 1;
+		ret = shmctl(infop->segid, IPC_RMID, NULL) ? errno : 0;
+	}
+#else
+	COMPQUIET(infop, NULL);
+#endif
+#ifdef HAVE_MMAP
+	if (!called) {
+		called = 1;
+		ret = 0;
+	}
+#endif
+	return (ret);
+}
+
+/*
+ * __db_mapfile --
+ *	Map in a shared memory file.
+ *
+ * PUBLIC: int __db_mapfile __P((char *, int, size_t, int, void **));
+ */
+int
+__db_mapfile(path, fd, len, is_rdonly, addr)
+	char *path;
+	int fd, is_rdonly;
+	size_t len;
+	void **addr;
+{
+	if (__db_jump.j_map != NULL)
+		return (__db_jump.j_map(path, fd, len, 0, 0, is_rdonly, addr));
+
+#ifdef HAVE_MMAP
+	return (__os_map(path, fd, len, 0, 0, is_rdonly, addr));
+#else
+	return (EINVAL);
+#endif
+}
+
+/*
+ * __db_unmapfile --
+ *	Unmap the shared memory file.
  *
- * PUBLIC: int __os_map __P((int, size_t, int, int, void **));
+ * PUBLIC: int __db_unmapfile __P((void *, size_t));
  */
 int
-__os_map(fd, len, is_private, is_rdonly, addr)
-	int fd, is_private, is_rdonly;
+__db_unmapfile(addr, len)
+	void *addr;
+	size_t len;
+{
+	if (__db_jump.j_unmap != NULL)
+		return (__db_jump.j_unmap(addr, len));
+
+#ifdef HAVE_MMAP
+	return (munmap(addr, len) ? errno : 0);
+#else
+	return (EINVAL);
+#endif
+}
+
+#ifdef HAVE_MMAP
+/*
+ * __os_map --
+ *	Call the mmap(2) function.
+ */
+static int
+__os_map(path, fd, len, is_region, is_anonymous, is_rdonly, addr)
+	char *path;
+	int fd, is_region, is_anonymous, is_rdonly;
 	size_t len;
 	void **addr;
 {
 	void *p;
 	int flags, prot;
 
-	flags = is_private ? MAP_PRIVATE : MAP_SHARED;
+	COMPQUIET(path, NULL);
+
+	/*
+	 * If it's read-only, it's private, and if it's not, it's shared.
+	 * Don't bother with an additional parameter.
+	 */
+	flags = is_rdonly ? MAP_PRIVATE : MAP_SHARED;
+
+	if (is_region && is_anonymous) {
+		/*
+		 * BSD derived systems use MAP_ANON; Digital Unix and HP/UX
+		 * use MAP_ANONYMOUS.
+		 */
+#ifdef MAP_ANON
+		flags |= MAP_ANON;
+#endif
+#ifdef MAP_ANONYMOUS
+		flags |= MAP_ANONYMOUS;
+#endif
+		fd = -1;
+	}
+#ifdef MAP_FILE
+	if (!is_region || !is_anonymous) {
+		/*
+		 * Historically, MAP_FILE was required for mapping regular
+		 * files, even though it was the default.  Some systems have
+		 * it, some don't, some that have it set it to 0.
+		 */
+		flags |= MAP_FILE;
+	}
+#endif
+
+	/*
+	 * I know of no systems that implement the flag to tell the system
+	 * that the region contains semaphores, but it's not an unreasonable
+	 * thing to do, and has been part of the design since forever.  I
+	 * don't think anyone will object, but don't set it for read-only
+	 * files, it doesn't make sense.
+	 */
 #ifdef MAP_HASSEMAPHORE
-	flags |= MAP_HASSEMAPHORE;
+	if (!is_rdonly)
+		flags |= MAP_HASSEMAPHORE;
 #endif
+
 	prot = PROT_READ | (is_rdonly ? 0 : PROT_WRITE);
 
-#ifndef MAP_FAILED			/* XXX: Mmap(2) failure return. */
+	/* MAP_FAILED was not defined in early mmap implementations. */
+#ifndef MAP_FAILED
 #define	MAP_FAILED	-1
 #endif
 	if ((p =
@@ -51,21 +399,67 @@ __os_map(fd, len, is_private, is_rdonly, addr)
 	*addr = p;
 	return (0);
 }
+#endif
 
+#ifdef HAVE_SHMGET
 /*
- * __os_unmap --
- *	Release the specified shared memory.
- *
- * PUBLIC: int __os_unmap __P((void *, size_t));
+ * __os_shmget --
+ *	Call the shmget(2) family of functions.
  */
-int
-__os_unmap(addr, len)
-	void *addr;
-	size_t len;
+static int
+__os_shmget(path, infop)
+	REGINFO *infop;
+	char *path;
 {
-	/*
-	 * !!!
-	 * The argument len is always the same length as was mapped.
-	 */
-	return (munmap(addr, len) ? errno : 0);
+	key_t key;
+	int shmflg;
+
+	if (F_ISSET(infop, REGION_CREATED)) {
+		/*
+		 * The return key from ftok(3) is not guaranteed to be unique.
+		 * The nice thing about the shmget(2) interface is that it
+		 * allows you to name anonymous pieces of memory.  The evil
+		 * thing about it is that the name space is separate from the
+		 * filesystem.
+		 */
+#ifdef __hp3000s900
+		{char mpe_path[MAXPATHLEN];
+		/*
+		 * MPE ftok() is broken as of 5.5pp4.  If the file path does
+		 * not start with '/' or '.', then ftok() tries to interpret
+		 * the file path in MPE syntax instead of POSIX HFS syntax.
+		 * The workaround is to prepend "./" to these paths.  See HP
+		 * SR 5003416081 for details.
+		 */
+		if (*path != '/' && *path != '.') {
+			if (strlen(path) + strlen("./") + 1 > sizeof(mpe_path))
+				return (ENAMETOOLONG);
+			mpe_path[0] = '.';
+			mpe_path[1] = '/';
+			(void)strcpy(mpe_path + 2, path);
+			path = mpe_path;
+		}
+		}
+#endif
+		if ((key = ftok(path, 1)) == (key_t)-1)
+			return (errno);
+
+		shmflg = IPC_CREAT | 0600;
+		if ((infop->segid = shmget(key, infop->size, shmflg)) == -1)
+			return (errno);
+	}
+
+	if ((infop->addr = shmat(infop->segid, NULL, 0)) == (void *)-1) {
+		/*
+		 * If we're trying to join the region and failing, assume
+		 * that there was a reboot and the region no longer exists.
+		 */
+		if (!F_ISSET(infop, REGION_CREATED))
+			errno = EAGAIN;
+		return (errno);
+	}
+
+	F_SET(infop, REGION_HOLDINGSYS);
+	return (0);
 }
+#endif
diff --git a/db2/os/os_oflags.c b/db2/os/os_oflags.c
index 3656eef1c4..388c1c6faa 100644
--- a/db2/os/os_oflags.c
+++ b/db2/os/os_oflags.c
@@ -1,18 +1,19 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_oflags.c	10.2 (Sleepycat) 10/24/97";
+static const char sccsid[] = "@(#)os_oflags.c	10.6 (Sleepycat) 4/19/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
+#include <sys/stat.h>
 
 #include <fcntl.h>
 #endif
@@ -23,13 +24,13 @@ static const char sccsid[] = "@(#)os_oflags.c	10.2 (Sleepycat) 10/24/97";
  * __db_oflags --
  *	Convert open(2) flags to DB flags.
  *
- * PUBLIC: int __db_oflags __P((int));
+ * PUBLIC: u_int32_t __db_oflags __P((int));
  */
-int
+u_int32_t
 __db_oflags(oflags)
 	int oflags;
 {
-	int dbflags;
+	u_int32_t dbflags;
 
 	/*
 	 * XXX
@@ -46,3 +47,48 @@ __db_oflags(oflags)
 		dbflags |= DB_TRUNCATE;
 	return (dbflags);
 }
+
+/*
+ * __db_omode --
+ *	Convert a permission string to the correct open(2) flags.
+ *
+ * PUBLIC: int __db_omode __P((const char *));
+ */
+int
+__db_omode(perm)
+	const char *perm;
+{
+	int mode;
+
+#ifndef	S_IRUSR
+#if defined(_WIN32) || defined(WIN16)
+#define	S_IRUSR	S_IREAD		/* R for owner */
+#define	S_IWUSR	S_IWRITE	/* W for owner */
+#define	S_IRGRP	0		/* R for group */
+#define	S_IWGRP	0		/* W for group */
+#define	S_IROTH	0		/* R for other */
+#define	S_IWOTH	0		/* W for other */
+#else
+#define	S_IRUSR	0000400		/* R for owner */
+#define	S_IWUSR	0000200		/* W for owner */
+#define	S_IRGRP	0000040		/* R for group */
+#define	S_IWGRP	0000020		/* W for group */
+#define	S_IROTH	0000004		/* R for other */
+#define	S_IWOTH	0000002		/* W for other */
+#endif /* _WIN32 || WIN16 */
+#endif
+	mode = 0;
+	if (perm[0] == 'r')
+		mode |= S_IRUSR;
+	if (perm[1] == 'w')
+		mode |= S_IWUSR;
+	if (perm[2] == 'r')
+		mode |= S_IRGRP;
+	if (perm[3] == 'w')
+		mode |= S_IWGRP;
+	if (perm[4] == 'r')
+		mode |= S_IROTH;
+	if (perm[5] == 'w')
+		mode |= S_IWOTH;
+	return (mode);
+}
diff --git a/db2/os/os_open.c b/db2/os/os_open.c
index a628765556..e960377ebb 100644
--- a/db2/os/os_open.c
+++ b/db2/os/os_open.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_open.c	10.20 (Sleepycat) 11/27/97";
+static const char sccsid[] = "@(#)os_open.c	10.26 (Sleepycat) 5/4/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -25,12 +25,13 @@ static const char sccsid[] = "@(#)os_open.c	10.20 (Sleepycat) 11/27/97";
  * __db_open --
  *	Open a file descriptor.
  *
- * PUBLIC: int __db_open __P((const char *, int, int, int, int *));
+ * PUBLIC: int __db_open __P((const char *, u_int32_t, u_int32_t, int, int *));
  */
 int
 __db_open(name, arg_flags, ok_flags, mode, fdp)
 	const char *name;
-	int arg_flags, ok_flags, mode, *fdp;
+	u_int32_t arg_flags, ok_flags;
+	int mode, *fdp;
 {
 	int fd, flags;
 
@@ -54,7 +55,7 @@ __db_open(name, arg_flags, ok_flags, mode, fdp)
 	else
 		flags |= O_RDWR;
 
-#ifdef _WIN32
+#if defined(_WIN32) || defined(WIN16)
 #ifdef _MSC_VER
 	if (arg_flags & DB_SEQUENTIAL)
 		flags |= _O_SEQUENTIAL;
@@ -80,7 +81,7 @@ __db_open(name, arg_flags, ok_flags, mode, fdp)
 		(void)__os_unlink(name);
 #endif
 
-#if !defined(_WIN32) && !defined(macintosh)
+#if !defined(_WIN32) && !defined(WIN16)
 	/*
 	 * Deny access to any child process; done for Win32 by O_NOINHERIT,
 	 * MacOS has neither child processes nor fd inheritance.
diff --git a/db2/os/os_rpath.c b/db2/os/os_rpath.c
index 44fd4ec9f4..23867b35ac 100644
--- a/db2/os/os_rpath.c
+++ b/db2/os/os_rpath.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_rpath.c	10.2 (Sleepycat) 10/24/97";
+static const char sccsid[] = "@(#)os_rpath.c	10.3 (Sleepycat) 4/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
diff --git a/db2/os/os_rw.c b/db2/os/os_rw.c
index 48f7fdc5b1..7591041981 100644
--- a/db2/os/os_rw.c
+++ b/db2/os/os_rw.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_rw.c	10.6 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)os_rw.c	10.7 (Sleepycat) 4/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
diff --git a/db2/os/os_seek.c b/db2/os/os_seek.c
index e27044b626..159425cc27 100644
--- a/db2/os/os_seek.c
+++ b/db2/os/os_seek.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_seek.c	10.6 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)os_seek.c	10.9 (Sleepycat) 4/19/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -24,19 +24,21 @@ static const char sccsid[] = "@(#)os_seek.c	10.6 (Sleepycat) 10/25/97";
  * __os_seek --
  *	Seek to a page/byte offset in the file.
  *
- * PUBLIC: int __os_seek __P((int, size_t, db_pgno_t, u_long, int));
+ * PUBLIC: int __os_seek __P((int, size_t, db_pgno_t, u_int32_t, int, int));
  */
 int
-__os_seek(fd, pgsize, pageno, relative, whence)
+__os_seek(fd, pgsize, pageno, relative, isrewind, whence)
 	int fd;
 	size_t pgsize;
 	db_pgno_t pageno;
-	u_long relative;
-	int whence;
+	u_int32_t relative;
+	int isrewind, whence;
 {
 	off_t offset;
 
-	offset = pgsize * pageno + relative;
+	offset = (off_t)pgsize * pageno + relative;
+	if (isrewind)
+		offset = -offset;
 
 	return (lseek(fd, offset, whence) == -1 ? errno : 0);
 }
diff --git a/db2/os/os_sleep.c b/db2/os/os_sleep.c
index 2d2cb71f6d..6a5b91f5c4 100644
--- a/db2/os/os_sleep.c
+++ b/db2/os/os_sleep.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_sleep.c	10.8 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)os_sleep.c	10.10 (Sleepycat) 4/27/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -42,7 +42,8 @@ __os_sleep(secs, usecs)
 	struct timeval t;
 
 	/* Don't require that the values be normalized. */
-	for (; usecs >= 1000000; ++secs, usecs -= 1000000);
+	for (; usecs >= 1000000; ++secs, usecs -= 1000000)
+		;
 
 	/*
 	 * It's important that we yield the processor here so that other
diff --git a/db2/os/os_spin.c b/db2/os/os_spin.c
index fb693c2848..2fd21d018b 100644
--- a/db2/os/os_spin.c
+++ b/db2/os/os_spin.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_spin.c	10.3 (Sleepycat) 11/25/97";
+static const char sccsid[] = "@(#)os_spin.c	10.7 (Sleepycat) 5/20/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -29,28 +29,33 @@ static const char sccsid[] = "@(#)os_spin.c	10.3 (Sleepycat) 11/25/97";
 int
 __os_spin()
 {
-	extern int __db_tsl_spins;
+	static long sys_val;
 
 	/* If the application specified the spins, use its value. */
-	if (__db_tsl_spins != 0)
-		return (__db_tsl_spins);
+	if (DB_GLOBAL(db_tsl_spins) != 0)
+		return (DB_GLOBAL(db_tsl_spins));
+
+	/* If we've already figured this out, return the value. */
+	if (sys_val != 0)
+		return (sys_val);
 
 	/*
 	 * XXX
-	 * Sysconf: Solaris uses _SC_NPROCESSORS_ONLN to return the number
-	 * of online processors.  I don't know if this call is portable or
-	 * not.
+	 * Solaris and Linux use _SC_NPROCESSORS_ONLN to return the number of
+	 * online processors.  We don't want to repeatedly call sysconf because
+	 * it's quite expensive (requiring multiple filesystem accesses) under
+	 * Debian Linux.
+	 *
+	 * Spin 50 times per processor -- we have anecdotal evidence that this
+	 * is a reasonable value.
 	 */
 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
-	{
-		long sys_val;
-
-		sys_val = sysconf(_SC_NPROCESSORS_ONLN);
-		if (sys_val > 0)
-			return (sys_val * 50);
-	}
+	if ((sys_val = sysconf(_SC_NPROCESSORS_ONLN)) > 1)
+		sys_val *= 50;
+	else
+		sys_val = 1;
+#else
+	sys_val = 1;
 #endif
-
-	/* Default to a single processor. */
-	return (1);
+	return (sys_val);
 }
diff --git a/db2/os/os_stat.c b/db2/os/os_stat.c
index 73600b6336..e7d3f24174 100644
--- a/db2/os/os_stat.c
+++ b/db2/os/os_stat.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_stat.c	10.11 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)os_stat.c	10.15 (Sleepycat) 4/27/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -16,11 +16,9 @@ static const char sccsid[] = "@(#)os_stat.c	10.11 (Sleepycat) 1/8/98";
 #include <sys/stat.h>
 
 #include <errno.h>
-#include <string.h>
 #endif
 
 #include "db_int.h"
-#include "common_ext.h"
 
 /*
  * __os_exists --
@@ -37,8 +35,17 @@ __os_exists(path, isdirp)
 
 	if (stat(path, &sb) != 0)
 		return (errno);
+
+#if !defined(S_ISDIR) || defined(STAT_MACROS_BROKEN)
+#if defined(_WIN32) || defined(WIN16)
+#define	S_ISDIR(m)	(_S_IFDIR & (m))
+#else
+#define	S_ISDIR(m)	(((m) & 0170000) == 0040000)
+#endif
+#endif
 	if (isdirp != NULL)
 		*isdirp = S_ISDIR(sb.st_mode);
+
 	return (0);
 }
 
@@ -69,10 +76,16 @@ __os_ioinfo(path, fd, mbytesp, bytesp, iosizep)
 	if (bytesp != NULL)
 		*bytesp = sb.st_size % MEGABYTE;
 
-	/* Return the underlying filesystem blocksize, if available. */
+	/*
+	 * Return the underlying filesystem blocksize, if available.
+	 *
+	 * XXX
+	 * Check for a 0 size -- HP's MPE architecture has st_blksize,
+	 * but it's always 0.
+	 */
 #ifdef HAVE_ST_BLKSIZE
-	if (iosizep != NULL)
-		*iosizep = sb.st_blksize;
+	if (iosizep != NULL && (*iosizep = sb.st_blksize) == 0)
+		*iosizep = DB_DEF_IOSIZE;
 #else
 	if (iosizep != NULL)
 		*iosizep = DB_DEF_IOSIZE;
diff --git a/db2/os/os_unlink.c b/db2/os/os_unlink.c
index 473ce77d39..3a1fa3ff99 100644
--- a/db2/os/os_unlink.c
+++ b/db2/os/os_unlink.c
@@ -1,14 +1,14 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1997
+ * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_unlink.c	10.4 (Sleepycat) 10/28/97";
+static const char sccsid[] = "@(#)os_unlink.c	10.5 (Sleepycat) 4/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
diff --git a/db2/progs/db_archive/db_archive.c b/db2/progs/db_archive/db_archive.c
index a9c6c28e70..691824c2ab 100644
--- a/db2/progs/db_archive/db_archive.c
+++ b/db2/progs/db_archive/db_archive.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
@@ -9,9 +9,9 @@
 
 #ifndef lint
 static const char copyright[] =
-"@(#) Copyright (c) 1997\n\
+"@(#) Copyright (c) 1996, 1997, 1998\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_archive.c	10.15 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)db_archive.c	10.17 (Sleepycat) 4/10/98";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -50,7 +50,8 @@ main(argc, argv)
 	extern char *optarg;
 	extern int optind;
 	DB_ENV *dbenv;
-	int ch, flags, verbose;
+	u_int32_t flags;
+	int ch, verbose;
 	char *home, **list;
 
 	flags = verbose = 0;
diff --git a/db2/progs/db_checkpoint/db_checkpoint.c b/db2/progs/db_checkpoint/db_checkpoint.c
index 3157a52666..74f95ccce2 100644
--- a/db2/progs/db_checkpoint/db_checkpoint.c
+++ b/db2/progs/db_checkpoint/db_checkpoint.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
@@ -9,9 +9,9 @@
 
 #ifndef lint
 static const char copyright[] =
-"@(#) Copyright (c) 1997\n\
+"@(#) Copyright (c) 1996, 1997, 1998\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_checkpoint.c	10.14 (Sleepycat) 1/17/98";
+static const char sccsid[] = "@(#)db_checkpoint.c	10.17 (Sleepycat) 5/3/98";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -37,7 +37,6 @@ static const char sccsid[] = "@(#)db_checkpoint.c	10.14 (Sleepycat) 1/17/98";
 #include "common_ext.h"
 
 char	*check __P((DB_ENV *, long, long));
-int	 checkpoint __P((DB_ENV *, char *, int));
 DB_ENV	*db_init __P((char *));
 int	 logpid __P((char *, int));
 int	 main __P((int, char *[]));
@@ -58,26 +57,39 @@ main(argc, argv)
 	extern int optind;
 	DB_ENV *dbenv;
 	time_t now;
-	long kbytes, minutes, seconds;
-	int ch, eval, verbose;
+	long argval;
+	u_int32_t kbytes, minutes, seconds;
+	int ch, eval, once, verbose;
 	char *home, *logfile;
 
-	home = logfile = NULL;
+	/*
+	 * XXX
+	 * Don't allow a fully unsigned 32-bit number, some compilers get
+	 * upset and require it to be specified in hexadecimal and so on.
+	 */
+#define	MAX_UINT32_T	2147483647
+
 	kbytes = minutes = 0;
-	verbose = 0;
-	while ((ch = getopt(argc, argv, "h:k:L:p:v")) != EOF)
+	once = verbose = 0;
+	home = logfile = NULL;
+	while ((ch = getopt(argc, argv, "1h:k:L:p:v")) != EOF)
 		switch (ch) {
+		case '1':
+			once = 1;
+			break;
 		case 'h':
 			home = optarg;
 			break;
 		case 'k':
-			get_long(optarg, 1, LONG_MAX, &kbytes);
+			get_long(optarg, 1, (long)MAX_UINT32_T, &argval);
+			kbytes = argval;
 			break;
 		case 'L':
 			logfile = optarg;
 			break;
 		case 'p':
-			get_long(optarg, 1, LONG_MAX, &minutes);
+			get_long(optarg, 1, (long)MAX_UINT32_T, &argval);
+			minutes = argval;
 			break;
 		case 'v':
 			verbose = 1;
@@ -92,8 +104,8 @@ main(argc, argv)
 	if (argc != 0)
 		usage();
 
-	if (kbytes == 0 && minutes == 0) {
-		warnx("at least one of -k and -p must be specified");
+	if (once == 0 && kbytes == 0 && minutes == 0) {
+		warnx("at least one of -1, -k and -p must be specified");
 		usage();
 	}
 
@@ -113,8 +125,6 @@ main(argc, argv)
 	eval = 0;
 	seconds = kbytes != 0 ? 30 : minutes * 60;
 	while (!interrupted) {
-		(void)__db_sleep(seconds, 0);
-
 		if (verbose) {
 			(void)time(&now);
 			printf("checkpoint: %s", ctime(&now));
@@ -134,6 +144,11 @@ main(argc, argv)
 			__db_err(dbenv, "checkpoint: %s", strerror(errno));
 			break;
 		}
+
+		if (once)
+			break;
+
+		(void)__db_sleep(seconds, 0);
 	}
 
 	if (logfile != NULL && logpid(logfile, 0))
@@ -244,6 +259,6 @@ void
 usage()
 {
 	(void)fprintf(stderr,
-    "usage: db_checkpoint [-v] [-h home] [-k kbytes] [-L file] [-p min]\n");
+    "usage: db_checkpoint [-1v] [-h home] [-k kbytes] [-L file] [-p min]\n");
 	exit(1);
 }
diff --git a/db2/progs/db_deadlock/db_deadlock.c b/db2/progs/db_deadlock/db_deadlock.c
index 97fa8ca4f6..49a52416dd 100644
--- a/db2/progs/db_deadlock/db_deadlock.c
+++ b/db2/progs/db_deadlock/db_deadlock.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
@@ -9,9 +9,9 @@
 
 #ifndef lint
 static const char copyright[] =
-"@(#) Copyright (c) 1997\n\
+"@(#) Copyright (c) 1996, 1997, 1998\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_deadlock.c	10.17 (Sleepycat) 1/15/98";
+static const char sccsid[] = "@(#)db_deadlock.c	10.19 (Sleepycat) 4/10/98";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -54,13 +54,15 @@ main(argc, argv)
 	u_int32_t atype;
 	time_t now;
 	long usecs;
-	int ch, flags, verbose;
+	u_int32_t flags;
+	int ch, verbose;
 	char *home, *logfile;
 
 	atype = DB_LOCK_DEFAULT;
 	home = logfile = NULL;
 	usecs = 0;
-	flags = verbose = 0;
+	flags = 0;
+	verbose = 0;
 	while ((ch = getopt(argc, argv, "a:h:L:t:vw")) != EOF)
 		switch (ch) {
 		case 'a':
diff --git a/db2/progs/db_dump/db_dump.c b/db2/progs/db_dump/db_dump.c
index c09719059b..f532bc2779 100644
--- a/db2/progs/db_dump/db_dump.c
+++ b/db2/progs/db_dump/db_dump.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
@@ -9,9 +9,9 @@
 
 #ifndef lint
 static const char copyright[] =
-"@(#) Copyright (c) 1997\n\
+"@(#) Copyright (c) 1996, 1997, 1998\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_dump.c	10.16 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)db_dump.c	10.19 (Sleepycat) 5/23/98";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -35,8 +35,6 @@ static const char sccsid[] = "@(#)db_dump.c	10.16 (Sleepycat) 8/27/97";
 
 void	configure __P((char *));
 DB_ENV *db_init __P((char *));
-void	dbt_dump __P((DBT *));
-void	dbt_print __P((DBT *));
 int	main __P((int, char *[]));
 void	pheader __P((DB *, int));
 void	usage __P((void));
@@ -55,11 +53,11 @@ main(argc, argv)
 	DBC *dbcp;
 	DBT key, data;
 	DB_ENV *dbenv;
-	int ch, dflag, pflag;
+	int ch, checkprint, dflag;
 	char *home;
 
 	home = NULL;
-	dflag = pflag = 0;
+	checkprint = dflag = 0;
 	while ((ch = getopt(argc, argv, "df:h:p")) != EOF)
 		switch (ch) {
 		case 'd':
@@ -73,7 +71,7 @@ main(argc, argv)
 			home = optarg;
 			break;
 		case 'p':
-			pflag = 1;
+			checkprint = 1;
 			break;
 		case '?':
 		default:
@@ -89,7 +87,7 @@ main(argc, argv)
 		if (home != NULL)
 			errx(1,
 			    "the -d and -h options may not both be specified");
-		if (pflag)
+		if (checkprint)
 			errx(1,
 			    "the -d and -p options may not both be specified");
 	}
@@ -116,23 +114,19 @@ main(argc, argv)
 	}
 
 	/* Print out the header. */
-	pheader(dbp, pflag);
+	pheader(dbp, checkprint);
 
 	/* Print out the key/data pairs. */
 	memset(&key, 0, sizeof(key));
 	memset(&data, 0, sizeof(data));
-	if (pflag)
-		while ((errno = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) {
-			if (dbp->type != DB_RECNO)
-				dbt_print(&key);
-			dbt_print(&data);
-		}
-	else
-		while ((errno = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) {
-			if (dbp->type != DB_RECNO)
-				dbt_dump(&key);
-			dbt_dump(&data);
-		}
+	while ((errno = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) {
+		if (dbp->type != DB_RECNO &&
+		    (errno = __db_prdbt(&key, checkprint, stdout)) != 0)
+			break;
+		if ((errno = __db_prdbt(&data, checkprint, stdout)) != 0)
+			break;
+	}
+
 	if (errno != DB_NOTFOUND)
 		err(1, "cursor get");
 
@@ -229,47 +223,6 @@ pheader(dbp, pflag)
 	printf("HEADER=END\n");
 }
 
-static char hex[] = "0123456789abcdef";
-
-/*
- * dbt_dump --
- *	Write out a key or data item using byte values.
- */
-void
-dbt_dump(dbtp)
-	DBT *dbtp;
-{
-	u_int32_t len;
-	u_int8_t *p;
-
-	for (len = dbtp->size, p = dbtp->data; len--; ++p)
-		(void)printf("%c%c",
-		    hex[(u_int8_t)(*p & 0xf0) >> 4], hex[*p & 0x0f]);
-	printf("\n");
-}
-
-/*
- * dbt_print --
- *	Write out a key or data item using printable characters.
- */
-void
-dbt_print(dbtp)
-	DBT *dbtp;
-{
-	u_int32_t len;
-	u_int8_t *p;
-
-	for (len = dbtp->size, p = dbtp->data; len--; ++p)
-		if (isprint(*p)) {
-			if (*p == '\\')
-				(void)printf("\\");
-			(void)printf("%c", *p);
-		} else
-			(void)printf("\\%c%c",
-			    hex[(u_int8_t)(*p & 0xf0) >> 4], hex[*p & 0x0f]);
-	printf("\n");
-}
-
 /*
  * usage --
  *	Display the usage message.
diff --git a/db2/progs/db_dump185/db_dump185.c b/db2/progs/db_dump185/db_dump185.c
index 5ec7673f1b..17451100f9 100644
--- a/db2/progs/db_dump185/db_dump185.c
+++ b/db2/progs/db_dump185/db_dump185.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
@@ -9,9 +9,9 @@
 
 #ifndef lint
 static const char copyright[] =
-"@(#) Copyright (c) 1997\n\
+"@(#) Copyright (c) 1996, 1997, 1998\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_dump185.c	10.8 (Sleepycat) 9/21/97";
+static const char sccsid[] = "@(#)db_dump185.c	10.10 (Sleepycat) 4/10/98";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -30,7 +30,7 @@ static const char sccsid[] = "@(#)db_dump185.c	10.8 (Sleepycat) 9/21/97";
 #include "clib_ext.h"
 
 /* Hash Table Information */
-typedef struct hashhdr {		/* Disk resident portion */
+typedef struct hashhdr185 {		/* Disk resident portion */
 	int		magic;		/* Magic NO for hash tables */
 	int		version;	/* Version ID */
 	u_int32_t	lorder;		/* Byte Order */
@@ -48,11 +48,34 @@ typedef struct hashhdr {		/* Disk resident portion */
 					 * table */
 	int		ffactor;	/* Fill factor */
 	int		nkeys;		/* Number of keys in hash table */
-} HASHHDR;
+} HASHHDR185;
+typedef struct htab185	 {		/* Memory resident data structure */
+	HASHHDR185 	hdr;		/* Header */
+} HTAB185;
 
-typedef struct htab	 {		/* Memory resident data structure */
-	HASHHDR 	hdr;		/* Header */
-} HTAB;
+/* Hash Table Information */
+typedef struct hashhdr186 {	/* Disk resident portion */
+	int32_t	magic;		/* Magic NO for hash tables */
+	int32_t	version;	/* Version ID */
+	int32_t	lorder;		/* Byte Order */
+	int32_t	bsize;		/* Bucket/Page Size */
+	int32_t	bshift;		/* Bucket shift */
+	int32_t	ovfl_point;	/* Where overflow pages are being allocated */
+	int32_t	last_freed;	/* Last overflow page freed */
+	int32_t	max_bucket;	/* ID of Maximum bucket in use */
+	int32_t	high_mask;	/* Mask to modulo into entire table */
+	int32_t	low_mask;	/* Mask to modulo into lower half of table */
+	int32_t	ffactor;	/* Fill factor */
+	int32_t	nkeys;		/* Number of keys in hash table */
+	int32_t	hdrpages;	/* Size of table header */
+	int32_t	h_charkey;	/* value of hash(CHARKEY) */
+#define NCACHED	32		/* number of bit maps and spare points */
+	int32_t	spares[NCACHED];/* spare pages for overflow */
+	u_int16_t	bitmaps[NCACHED];	/* address of overflow page bitmaps */
+} HASHHDR186;
+typedef struct htab186	 {		/* Memory resident data structure */
+	HASHHDR186 	hdr;		/* Header */
+} HTAB186;
 
 typedef struct _epgno {
 	u_int32_t pgno;			/* the page number */
@@ -149,8 +172,8 @@ typedef struct _btree {
 	u_int32_t flags;
 } BTREE;
 
-void	db_185_btree __P((DB *, int));
-void	db_185_hash __P((DB *, int));
+void	db_btree __P((DB *, int));
+void	db_hash __P((DB *, int));
 void	dbt_dump __P((DBT *));
 void	dbt_print __P((DBT *));
 int	main __P((int, char *[]));
@@ -193,9 +216,9 @@ main(argc, argv)
 	if ((dbp = dbopen(argv[0], O_RDONLY, 0, DB_BTREE, NULL)) == NULL) {
 		if ((dbp = dbopen(argv[0], O_RDONLY, 0, DB_HASH, NULL)) == NULL)
 			err(1, "%s", argv[0]);
-		db_185_hash(dbp, pflag);
+		db_hash(dbp, pflag);
 	} else
-		db_185_btree(dbp, pflag);
+		db_btree(dbp, pflag);
 
 	/*
 	 * !!!
@@ -219,36 +242,43 @@ main(argc, argv)
 }
 
 /*
- * db_185_hash --
+ * db_hash --
  *	Dump out hash header information.
  */
 void
-db_185_hash(dbp, pflag)
+db_hash(dbp, pflag)
 	DB *dbp;
 	int pflag;
 {
-	HTAB *hashp;
-
-	hashp = dbp->internal;
+	HTAB185 *hash185p;
+	HTAB186 *hash186p;
 
 	printf("format=%s\n", pflag ? "print" : "bytevalue");
 	printf("type=hash\n");
-	printf("h_ffactor=%lu\n", (u_long)hashp->hdr.ffactor);
-#ifdef NOT_AVAILABLE_IN_DB_185
-	printf("h_nelem=%lu\n", (u_long)hashp->hdr.nelem);
-#endif
-	if (hashp->hdr.lorder != 0)
-		printf("db_lorder=%lu\n", (u_long)hashp->hdr.lorder);
-	printf("db_pagesize=%lu\n", (u_long)hashp->hdr.bsize);
+
+	/* DB 1.85 was version 2, DB 1.86 was version 3. */
+	hash185p = dbp->internal;
+	if (hash185p->hdr.version > 2) {
+		hash186p = dbp->internal;
+		printf("h_ffactor=%lu\n", (u_long)hash186p->hdr.ffactor);
+		if (hash186p->hdr.lorder != 0)
+			printf("db_lorder=%lu\n", (u_long)hash186p->hdr.lorder);
+		printf("db_pagesize=%lu\n", (u_long)hash186p->hdr.bsize);
+	} else {
+		printf("h_ffactor=%lu\n", (u_long)hash185p->hdr.ffactor);
+		if (hash185p->hdr.lorder != 0)
+			printf("db_lorder=%lu\n", (u_long)hash185p->hdr.lorder);
+		printf("db_pagesize=%lu\n", (u_long)hash185p->hdr.bsize);
+	}
 	printf("HEADER=END\n");
 }
 
 /*
- * db_185_btree --
+ * db_btree --
  *	Dump out btree header information.
  */
 void
-db_185_btree(dbp, pflag)
+db_btree(dbp, pflag)
 	DB *dbp;
 	int pflag;
 {
diff --git a/db2/progs/db_load/db_load.c b/db2/progs/db_load/db_load.c
index afa5730c25..5ac17753f5 100644
--- a/db2/progs/db_load/db_load.c
+++ b/db2/progs/db_load/db_load.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
@@ -9,14 +9,13 @@
 
 #ifndef lint
 static const char copyright[] =
-"@(#) Copyright (c) 1997\n\
+"@(#) Copyright (c) 1996, 1997, 1998\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_load.c	10.15 (Sleepycat) 12/29/97";
+static const char sccsid[] = "@(#)db_load.c	10.20 (Sleepycat) 6/2/98";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/stat.h>
 
 #include <errno.h>
 #include <limits.h>
@@ -27,6 +26,8 @@ static const char sccsid[] = "@(#)db_load.c	10.15 (Sleepycat) 12/29/97";
 #endif
 
 #include "db_int.h"
+#include "db_page.h"
+#include "db_am.h"
 #include "clib_ext.h"
 
 void	badnum __P((void));
@@ -55,7 +56,8 @@ main(argc, argv)
 	DB_ENV *dbenv;
 	DB_INFO dbinfo;
 	db_recno_t recno;
-	int ch, no_header, pflag;
+	u_int32_t db_nooverwrite;
+	int ch, checkprint, existed, no_header;
 	char **clist, **clp, *home;
 
 	/* Allocate enough room for configuration arguments. */
@@ -63,9 +65,10 @@ main(argc, argv)
 		err(1, NULL);
 
 	home = NULL;
-	no_header = 0;
+	db_nooverwrite = 0;
+	existed = checkprint = no_header = 0;
 	argtype = dbtype = DB_UNKNOWN;
-	while ((ch = getopt(argc, argv, "c:f:h:Tt:")) != EOF)
+	while ((ch = getopt(argc, argv, "c:f:h:nTt:")) != EOF)
 		switch (ch) {
 		case 'c':
 			*clp++ = optarg;
@@ -77,8 +80,11 @@ main(argc, argv)
 		case 'h':
 			home = optarg;
 			break;
+		case 'n':
+			db_nooverwrite = DB_NOOVERWRITE;
+			break;
 		case 'T':
-			no_header = pflag = 1;
+			no_header = checkprint = 1;
 			break;
 		case 't':
 			if (strcmp(optarg, "btree") == 0) {
@@ -105,18 +111,18 @@ main(argc, argv)
 	if (argc != 1)
 		usage();
 
-	/* Initialize the environment. */
-	dbenv = db_init(home);
-	memset(&dbinfo, 0, sizeof(DB_INFO));
+	/* Initialize the environment if the user specified one. */
+	dbenv = home == NULL ? NULL : db_init(home);
 
 	/*
 	 * Read the header.  If there isn't any header, we're expecting flat
-	 * text, set the pflag appropriately.
+	 * text, set the checkprint flag appropriately.
 	 */
+	memset(&dbinfo, 0, sizeof(DB_INFO));
 	if (no_header)
 		dbtype = argtype;
 	else {
-		rheader(&dbtype, &pflag, &dbinfo);
+		rheader(&dbtype, &checkprint, &dbinfo);
 		if (argtype != DB_UNKNOWN) {
 			/* Conversion to/from recno is prohibited. */
 			if ((dbtype == DB_RECNO && argtype != DB_RECNO) ||
@@ -133,17 +139,20 @@ main(argc, argv)
 	configure(&dbinfo, clist);
 
 	/* Open the DB file. */
-	if ((errno = db_open(argv[0], dbtype, DB_CREATE | DB_TRUNCATE,
-	    S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH,
-	    dbenv, &dbinfo, &dbp)) != 0)
+	if ((errno = db_open(argv[0], dbtype, DB_CREATE,
+	    __db_omode("rwrwrw"), dbenv, &dbinfo, &dbp)) != 0)
 		err(1, "%s", argv[0]);
 
 	/* Initialize the key/data pair. */
 	memset(&key, 0, sizeof(DBT));
-	if ((key.data = (void *)malloc(key.ulen = 1024)) == NULL) {
-		errno = ENOMEM;
-		err(1, NULL);
-	}
+	if (dbtype == DB_RECNO) {
+		key.data = &recno;
+		key.size = sizeof(recno);
+	} else
+		if ((key.data = (void *)malloc(key.ulen = 1024)) == NULL) {
+			errno = ENOMEM;
+			err(1, NULL);
+		}
 	memset(&data, 0, sizeof(DBT));
 	if ((data.data = (void *)malloc(data.ulen = 1024)) == NULL) {
 		errno = ENOMEM;
@@ -151,22 +160,17 @@ main(argc, argv)
 	}
 
 	/* Get each key/data pair and add them to the database. */
-	if (dbtype == DB_RECNO) {
-		key.data = &recno;
-		key.size = sizeof(recno);
-		for (recno = 1;; ++recno) {
-			if (pflag) {
+	for (recno = 1;; ++recno) {
+		if (dbtype == DB_RECNO)
+			if (checkprint) {
 				if (dbt_rprint(&data))
 					break;
-			} else
+			} else {
 				if (dbt_rdump(&data))
 					break;
-			if ((errno = dbp->put(dbp, NULL, &key, &data, 0)) != 0)
-				err(1, "%s", argv[0]);
-		}
-	} else
-		for (;;) {
-			if (pflag) {
+			}
+		else
+			if (checkprint) {
 				if (dbt_rprint(&key))
 					break;
 				if (dbt_rprint(&data))
@@ -177,13 +181,26 @@ main(argc, argv)
 				if (dbt_rdump(&data))
 fmt:					err(1, "odd number of key/data pairs");
 			}
-			if ((errno = dbp->put(dbp, NULL, &key, &data, 0)) != 0)
-				err(1, "%s", argv[0]);
+		switch (errno =
+		    dbp->put(dbp, NULL, &key, &data, db_nooverwrite)) {
+		case 0:
+			break;
+		case DB_KEYEXIST:
+			existed = 1;
+			warnx("%s: line %d: key already exists, not loaded:",
+			    argv[0],
+			    dbtype == DB_RECNO ? recno : recno * 2 - 1);
+			(void)__db_prdbt(&key, checkprint, stderr);
+			break;
+		default:
+			err(1, "%s", argv[0]);
+			/* NOTREACHED */
 		}
+	}
 
 	if ((errno = dbp->close(dbp, 0)) != 0)
 		err(1, "%s", argv[0]);
-	return (0);
+	return (existed ? 1 : 0);
 }
 
 /*
@@ -200,13 +217,26 @@ db_init(home)
 		errno = ENOMEM;
 		err(1, NULL);
 	}
-	dbenv->db_errfile = stderr;
-	dbenv->db_errpfx = progname;
 
-	if ((errno =
-	    db_appinit(home, NULL, dbenv, DB_CREATE | DB_USE_ENVIRON)) != 0)
-		err(1, "db_appinit");
-	return (dbenv);
+	/*
+	 * The database may be live, try and use the shared regions.
+	 *
+	 * If it works, we're done.  Set the error output options so that
+	 * future errors are correctly reported.
+	 */
+	if ((errno = db_appinit(home, NULL, dbenv, DB_INIT_LOCK |
+	    DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_USE_ENVIRON)) == 0) {
+		dbenv->db_errfile = stderr;
+		dbenv->db_errpfx = progname;
+		return (dbenv);
+	}
+
+	/*
+	 * If the db_appinit fails, assume the database isn't live, and don't
+	 * bother with an environment.
+	 */
+	free(dbenv);
+	return (NULL);
 }
 
 #define	FLAG(name, value, keyword, flag)				\
@@ -279,16 +309,16 @@ configure(dbinfop, clp)
  *	Read the header message.
  */
 void
-rheader(dbtypep, pflagp, dbinfop)
+rheader(dbtypep, checkprintp, dbinfop)
 	DBTYPE *dbtypep;
-	int *pflagp;
+	int *checkprintp;
 	DB_INFO *dbinfop;
 {
 	long lineno, val;
 	char name[256], value[256];
 
 	*dbtypep = DB_UNKNOWN;
-	*pflagp = 0;
+	*checkprintp = 0;
 
 	for (lineno = 1;; ++lineno) {
 		/* If we don't see the expected information, it's an error. */
@@ -301,11 +331,11 @@ rheader(dbtypep, pflagp, dbinfop)
 
 		if (strcmp(name, "format") == 0) {
 			if (strcmp(value, "bytevalue") == 0) {
-				*pflagp = 0;
+				*checkprintp = 0;
 				continue;
 			}
 			if (strcmp(value, "print") == 0) {
-				*pflagp = 1;
+				*checkprintp = 1;
 				continue;
 			}
 			errx(1, "line %d: unknown format", lineno);
@@ -390,39 +420,6 @@ dbt_rprint(dbtp)
 }
 
 /*
- * digitize --
- *	Convert a character to an integer.
- */
-int
-digitize(c)
-	int c;
-{
-	switch (c) {			/* Don't depend on ASCII ordering. */
-	case '0': return (0);
-	case '1': return (1);
-	case '2': return (2);
-	case '3': return (3);
-	case '4': return (4);
-	case '5': return (5);
-	case '6': return (6);
-	case '7': return (7);
-	case '8': return (8);
-	case '9': return (9);
-	case 'a': return (10);
-	case 'b': return (11);
-	case 'c': return (12);
-	case 'd': return (13);
-	case 'e': return (14);
-	case 'f': return (15);
-	}
-
-	err(1, "unexpected hexadecimal value");
-	/* NOTREACHED */
-
-	return (0);
-}
-
-/*
  * dbt_rdump --
  *	Read a byte dump line into a DBT structure.
  */
@@ -459,6 +456,39 @@ dbt_rdump(dbtp)
 }
 
 /*
+ * digitize --
+ *	Convert a character to an integer.
+ */
+int
+digitize(c)
+	int c;
+{
+	switch (c) {			/* Don't depend on ASCII ordering. */
+	case '0': return (0);
+	case '1': return (1);
+	case '2': return (2);
+	case '3': return (3);
+	case '4': return (4);
+	case '5': return (5);
+	case '6': return (6);
+	case '7': return (7);
+	case '8': return (8);
+	case '9': return (9);
+	case 'a': return (10);
+	case 'b': return (11);
+	case 'c': return (12);
+	case 'd': return (13);
+	case 'e': return (14);
+	case 'f': return (15);
+	}
+
+	err(1, "unexpected hexadecimal value");
+	/* NOTREACHED */
+
+	return (0);
+}
+
+/*
  * badnum --
  *	Display the bad number message.
  */
@@ -475,7 +505,8 @@ badnum()
 void
 usage()
 {
-	(void)fprintf(stderr,
-"usage: db_load [-T]\n\t[-c name=value] [-f file] [-h home] [-t btree | hash] db_file\n");
+	(void)fprintf(stderr, "%s\n\t%s\n",
+	    "usage: db_load [-nT]",
+    "[-c name=value] [-f file] [-h home] [-t btree | hash | recno] db_file");
 	exit(1);
 }
diff --git a/db2/progs/db_printlog/db_printlog.c b/db2/progs/db_printlog/db_printlog.c
index 24554bcd14..3b48ad9643 100644
--- a/db2/progs/db_printlog/db_printlog.c
+++ b/db2/progs/db_printlog/db_printlog.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
@@ -9,9 +9,9 @@
 
 #ifndef lint
 static const char copyright[] =
-"@(#) Copyright (c) 1997\n\
+"@(#) Copyright (c) 1996, 1997, 1998\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_printlog.c	10.11 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)db_printlog.c	10.12 (Sleepycat) 4/10/98";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
diff --git a/db2/progs/db_recover/db_recover.c b/db2/progs/db_recover/db_recover.c
index f902fed8c0..a2845725b8 100644
--- a/db2/progs/db_recover/db_recover.c
+++ b/db2/progs/db_recover/db_recover.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
@@ -9,9 +9,9 @@
 
 #ifndef lint
 static const char copyright[] =
-"@(#) Copyright (c) 1997\n\
+"@(#) Copyright (c) 1996, 1997, 1998\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_recover.c	10.17 (Sleepycat) 1/15/98";
+static const char sccsid[] = "@(#)db_recover.c	10.19 (Sleepycat) 4/10/98";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -29,7 +29,7 @@ static const char sccsid[] = "@(#)db_recover.c	10.17 (Sleepycat) 1/15/98";
 #include "common_ext.h"
 #include "clib_ext.h"
 
-DB_ENV	*db_init __P((char *, int, int));
+DB_ENV	*db_init __P((char *, u_int32_t, int));
 int	 main __P((int, char *[]));
 void	 usage __P((void));
 
@@ -45,7 +45,8 @@ main(argc, argv)
 	extern int optind;
 	DB_ENV *dbenv;
 	time_t now;
-	int ch, flags, verbose;
+	u_int32_t flags;
+	int ch, verbose;
 	char *home;
 
 	home = NULL;
@@ -88,10 +89,11 @@ main(argc, argv)
 DB_ENV *
 db_init(home, flags, verbose)
 	char *home;
-	int flags, verbose;
+	u_int32_t flags;
+	int verbose;
 {
 	DB_ENV *dbenv;
-	int local_flags;
+	u_int32_t local_flags;
 
 	if ((dbenv = (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) {
 		errno = ENOMEM;
diff --git a/db2/progs/db_stat/db_stat.c b/db2/progs/db_stat/db_stat.c
index 5295f011a6..f2551805b0 100644
--- a/db2/progs/db_stat/db_stat.c
+++ b/db2/progs/db_stat/db_stat.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 
@@ -9,9 +9,9 @@
 
 #ifndef lint
 static const char copyright[] =
-"@(#) Copyright (c) 1997\n\
+"@(#) Copyright (c) 1996, 1997, 1998\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_stat.c	8.30 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)db_stat.c	8.38 (Sleepycat) 5/30/98";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -27,25 +27,35 @@ static const char sccsid[] = "@(#)db_stat.c	8.30 (Sleepycat) 1/8/98";
 #endif
 
 #include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "lock.h"
+#include "mp.h"
 #include "clib_ext.h"
 
 #undef stat
 
-typedef enum { T_NOTSET, T_DB, T_LOG, T_MPOOL, T_TXN } test_t;
+typedef enum { T_NOTSET, T_DB, T_LOCK, T_LOG, T_MPOOL, T_TXN } test_t;
 
+int	argcheck __P((char *, const char *));
 void	btree_stats __P((DB *));
 DB_ENV *db_init __P((char *, test_t));
+void	dl __P((const char *, u_long));
 void	hash_stats __P((DB *));
-int	main __P((int, char *[]));
+int	lock_ok __P((char *));
+void	lock_stats __P((DB_ENV *));
 void	log_stats __P((DB_ENV *));
+int	main __P((int, char *[]));
+int	mpool_ok __P((char *));
 void	mpool_stats __P((DB_ENV *));
 void	onint __P((int));
 void	prflags __P((u_int32_t, const FN *));
-void	txn_stats __P((DB_ENV *));
 int	txn_compare __P((const void *, const void *));
+void	txn_stats __P((DB_ENV *));
 void	usage __P((void));
 
 int	 interrupted;
+char	*internal;
 const char
 	*progname = "db_stat";				/* Program name. */
 
@@ -64,8 +74,16 @@ main(argc, argv)
 
 	ttype = T_NOTSET;
 	db = home = NULL;
-	while ((ch = getopt(argc, argv, "d:h:lmt")) != EOF)
+	while ((ch = getopt(argc, argv, "C:cd:h:lM:mNt")) != EOF)
 		switch (ch) {
+		case 'C':
+			ttype = T_LOCK;
+			if (!argcheck(internal = optarg, "Acflmo"))
+				usage();
+			break;
+		case 'c':
+			ttype = T_LOCK;
+			break;
 		case 'd':
 			db = optarg;
 			ttype = T_DB;
@@ -76,9 +94,17 @@ main(argc, argv)
 		case 'l':
 			ttype = T_LOG;
 			break;
+		case 'M':
+			ttype = T_MPOOL;
+			if (!argcheck(internal = optarg, "Ahlm"))
+				usage();
+			break;
 		case 'm':
 			ttype = T_MPOOL;
 			break;
+		case 'N':
+			(void)db_value_set(0, DB_MUTEXLOCKS);
+			break;
 		case 't':
 			ttype = T_TXN;
 			break;
@@ -115,6 +141,9 @@ main(argc, argv)
 		}
 		(void)dbp->close(dbp, 0);
 		break;
+	case T_LOCK:
+		lock_stats(dbenv);
+		break;
 	case T_LOG:
 		log_stats(dbenv);
 		break;
@@ -168,13 +197,12 @@ btree_stats(dbp)
 	prflags(sp->bt_flags, fn);
 	if (dbp->type == DB_BTREE) {
 #ifdef NOT_IMPLEMENTED
-		printf("%lu\tMaximum keys per-page.\n", (u_long)sp->bt_maxkey);
+		dl("Maximum keys per-page.\n", (u_long)sp->bt_maxkey);
 #endif
-		printf("%lu\tMinimum keys per-page.\n", (u_long)sp->bt_minkey);
+		dl("Minimum keys per-page.\n", (u_long)sp->bt_minkey);
 	}
 	if (dbp->type == DB_RECNO) {
-		printf("%lu\tFixed-length record size.\n",
-		    (u_long)sp->bt_re_len);
+		dl("Fixed-length record size.\n", (u_long)sp->bt_re_len);
 		if (isprint(sp->bt_re_pad))
 			printf("%c\tFixed-length record pad.\n",
 			    (int)sp->bt_re_pad);
@@ -182,43 +210,38 @@ btree_stats(dbp)
 			printf("0x%x\tFixed-length record pad.\n",
 			    (int)sp->bt_re_pad);
 	}
-	printf("%lu\tUnderlying tree page size.\n", (u_long)sp->bt_pagesize);
-	printf("%lu\tNumber of levels in the tree.\n", (u_long)sp->bt_levels);
-	printf("%lu\tNumber of keys in the tree.\n", (u_long)sp->bt_nrecs);
-	printf("%lu\tNumber of tree internal pages.\n", (u_long)sp->bt_int_pg);
-	printf("%lu\tNumber of tree leaf pages.\n", (u_long)sp->bt_leaf_pg);
-	printf("%lu\tNumber of tree duplicate pages.\n",
-	    (u_long)sp->bt_dup_pg);
-	printf("%lu\tNumber of tree overflow pages.\n",
-	    (u_long)sp->bt_over_pg);
-	printf("%lu\tNumber of pages on the free list.\n",
-	    (u_long)sp->bt_free);
-	printf("%lu\tNumber of pages freed for reuse.\n",
-	    (u_long)sp->bt_freed);
-	printf("%lu\tNumber of bytes free in tree internal pages (%.0f%% ff)\n",
-	    (u_long)sp->bt_int_pgfree,
-	    PCT(sp->bt_int_pgfree, sp->bt_int_pg));
-	printf("%lu\tNumber of bytes free in tree leaf pages (%.0f%% ff).\n",
-	    (u_long)sp->bt_leaf_pgfree,
-	    PCT(sp->bt_leaf_pgfree, sp->bt_leaf_pg));
-printf("%lu\tNumber of bytes free in tree duplicate pages (%.0f%% ff).\n",
-	    (u_long)sp->bt_dup_pgfree,
-	    PCT(sp->bt_dup_pgfree, sp->bt_dup_pg));
-printf("%lu\tNumber of bytes free in tree overflow pages (%.0f%% ff).\n",
-	    (u_long)sp->bt_over_pgfree,
-	    PCT(sp->bt_over_pgfree, sp->bt_over_pg));
-	printf("%lu\tNumber of bytes saved by prefix compression.\n",
+	dl("Underlying tree page size.\n", (u_long)sp->bt_pagesize);
+	dl("Number of levels in the tree.\n", (u_long)sp->bt_levels);
+	dl("Number of keys in the tree.\n", (u_long)sp->bt_nrecs);
+	dl("Number of tree internal pages.\n", (u_long)sp->bt_int_pg);
+	dl("Number of tree leaf pages.\n", (u_long)sp->bt_leaf_pg);
+	dl("Number of tree duplicate pages.\n", (u_long)sp->bt_dup_pg);
+	dl("Number of tree overflow pages.\n", (u_long)sp->bt_over_pg);
+	dl("Number of pages on the free list.\n", (u_long)sp->bt_free);
+	dl("Number of pages freed for reuse.\n", (u_long)sp->bt_freed);
+	dl("Number of bytes free in tree internal pages",
+	    (u_long)sp->bt_int_pgfree);
+	printf(" (%.0f%% ff).\n", PCT(sp->bt_int_pgfree, sp->bt_int_pg));
+	dl("Number of bytes free in tree leaf pages",
+	    (u_long)sp->bt_leaf_pgfree);
+	printf(" (%.0f%% ff).\n", PCT(sp->bt_leaf_pgfree, sp->bt_leaf_pg));
+	dl("Number of bytes free in tree duplicate pages",
+	    (u_long)sp->bt_dup_pgfree);
+	printf(" (%.0f%% ff).\n", PCT(sp->bt_dup_pgfree, sp->bt_dup_pg));
+	dl("Number of bytes free in tree overflow pages",
+	    (u_long)sp->bt_over_pgfree);
+	printf(" (%.0f%% ff).\n", PCT(sp->bt_over_pgfree, sp->bt_over_pg));
+	dl("Number of bytes saved by prefix compression.\n",
 	    (u_long)sp->bt_pfxsaved);
-	printf("%lu\tTotal number of tree page splits.\n",
-	    (u_long)sp->bt_split);
-	printf("%lu\tNumber of root page splits.\n", (u_long)sp->bt_rootsplit);
-	printf("%lu\tNumber of fast splits.\n", (u_long)sp->bt_fastsplit);
-	printf("%lu\tNumber of hits in tree fast-insert code.\n",
+	dl("Total number of tree page splits.\n", (u_long)sp->bt_split);
+	dl("Number of root page splits.\n", (u_long)sp->bt_rootsplit);
+	dl("Number of fast splits.\n", (u_long)sp->bt_fastsplit);
+	dl("Number of hits in tree fast-insert code.\n",
 	    (u_long)sp->bt_cache_hit);
-	printf("%lu\tNumber of misses in tree fast-insert code.\n",
+	dl("Number of misses in tree fast-insert code.\n",
 	    (u_long)sp->bt_cache_miss);
-	printf("%lu\tNumber of keys added.\n", (u_long)sp->bt_added);
-	printf("%lu\tNumber of keys deleted.\n", (u_long)sp->bt_deleted);
+	dl("Number of keys added.\n", (u_long)sp->bt_added);
+	dl("Number of keys deleted.\n", (u_long)sp->bt_deleted);
 }
 
 /*
@@ -231,10 +254,47 @@ hash_stats(dbp)
 {
 	COMPQUIET(dbp, NULL);
 
+	printf("Hash statistics not currently available.\n");
 	return;
 }
 
 /*
+ * lock_stats --
+ *	Display lock statistics.
+ */
+void
+lock_stats(dbenv)
+	DB_ENV *dbenv;
+{
+	DB_LOCK_STAT *sp;
+
+	if (internal != NULL) {
+		__lock_dump_region(dbenv->lk_info, internal, stdout);
+		return;
+	}
+
+	if (lock_stat(dbenv->lk_info, &sp, NULL))
+		err(1, NULL);
+
+	printf("%#lx\tLock magic number.\n", (u_long)sp->st_magic);
+	printf("%lu\tLock version number.\n", (u_long)sp->st_version);
+	dl("Lock region reference count.\n", (u_long)sp->st_refcnt);
+	dl("Lock region size.\n", (u_long)sp->st_regsize);
+	dl("Maximum number of locks.\n", (u_long)sp->st_maxlocks);
+	dl("Number of lock modes.\n", (u_long)sp->st_nmodes);
+	dl("Number of lock objects.\n", (u_long)sp->st_numobjs);
+	dl("Number of lockers.\n", (u_long)sp->st_nlockers);
+	dl("Number of lock conflicts.\n", (u_long)sp->st_nconflicts);
+	dl("Number of lock requests.\n", (u_long)sp->st_nrequests);
+	dl("Number of lock releases.\n", (u_long)sp->st_nreleases);
+	dl("Number of deadlocks.\n", (u_long)sp->st_ndeadlocks);
+	dl("The number of region locks granted without waiting.\n",
+	    (u_long)sp->st_region_nowait);
+	dl("The number of region locks granted after waiting.\n",
+	    (u_long)sp->st_region_wait);
+}
+
+/*
  * log_stats --
  *	Display log statistics.
  */
@@ -249,6 +309,8 @@ log_stats(dbenv)
 
 	printf("%#lx\tLog magic number.\n", (u_long)sp->st_magic);
 	printf("%lu\tLog version number.\n", (u_long)sp->st_version);
+	dl("Log region reference count.\n", (u_long)sp->st_refcnt);
+	dl("Log region size.\n", (u_long)sp->st_regsize);
 	printf("%#o\tLog file mode.\n", sp->st_mode);
 	if (sp->st_lg_max % MEGABYTE == 0)
 		printf("%luMb\tLog file size.\n",
@@ -261,13 +323,13 @@ log_stats(dbenv)
 	    (u_long)sp->st_w_mbytes, (u_long)sp->st_w_bytes);
 	printf("%luMb\tLog bytes written since last checkpoint (+%lu bytes).\n",
 	    (u_long)sp->st_wc_mbytes, (u_long)sp->st_wc_bytes);
-	printf("%lu\tTotal log file writes.\n", (u_long)sp->st_wcount);
-	printf("%lu\tTotal log file flushes.\n", (u_long)sp->st_scount);
+	dl("Total log file writes.\n", (u_long)sp->st_wcount);
+	dl("Total log file flushes.\n", (u_long)sp->st_scount);
 	printf("%lu\tCurrent log file number.\n", (u_long)sp->st_cur_file);
 	printf("%lu\tCurrent log file offset.\n", (u_long)sp->st_cur_offset);
-	printf("%lu\tThe number of region locks granted without waiting.\n",
+	dl("The number of region locks granted without waiting.\n",
 	    (u_long)sp->st_region_nowait);
-	printf("%lu\tThe number of region locks granted after waiting.\n",
+	dl("The number of region locks granted after waiting.\n",
 	    (u_long)sp->st_region_wait);
 }
 
@@ -282,70 +344,74 @@ mpool_stats(dbenv)
 	DB_MPOOL_FSTAT **fsp;
 	DB_MPOOL_STAT *gsp;
 
+	if (internal != NULL) {
+		__memp_dump_region(dbenv->mp_info, internal, stdout);
+		return;
+	}
+
 	if (memp_stat(dbenv->mp_info, &gsp, &fsp, NULL))
 		err(1, NULL);
 
-	printf("%lu\tCache size (%luK).\n",
-	    (u_long)gsp->st_cachesize, (u_long)gsp->st_cachesize / 1024);
-	printf("%lu\tRequested pages found in the cache",
-	    (u_long)gsp->st_cache_hit);
+	dl("Pool region reference count.\n", (u_long)gsp->st_refcnt);
+	dl("Pool region size.\n", (u_long)gsp->st_regsize);
+	dl("Cache size", (u_long)gsp->st_cachesize);
+	printf(" (%luK).\n", (u_long)gsp->st_cachesize / 1024);
+	dl("Requested pages found in the cache", (u_long)gsp->st_cache_hit);
 	if (gsp->st_cache_hit + gsp->st_cache_miss != 0)
 		printf(" (%.0f%%)", ((double)gsp->st_cache_hit /
 		    (gsp->st_cache_hit + gsp->st_cache_miss)) * 100);
 	printf(".\n");
-	printf("%lu\tRequested pages mapped into the process' address space.\n",
+	dl("Requested pages mapped into the process' address space.\n",
 	    (u_long)gsp->st_map);
-	printf("%lu\tRequested pages not found in the cache.\n",
+	dl("Requested pages not found in the cache.\n",
 	    (u_long)gsp->st_cache_miss);
-	printf("%lu\tPages created in the cache.\n",
-	    (u_long)gsp->st_page_create);
-	printf("%lu\tPages read into the cache.\n", (u_long)gsp->st_page_in);
-	printf("%lu\tPages written from the cache to the backing file.\n",
+	dl("Pages created in the cache.\n", (u_long)gsp->st_page_create);
+	dl("Pages read into the cache.\n", (u_long)gsp->st_page_in);
+	dl("Pages written from the cache to the backing file.\n",
 	    (u_long)gsp->st_page_out);
-	printf("%lu\tClean pages forced from the cache.\n",
+	dl("Clean pages forced from the cache.\n",
 	    (u_long)gsp->st_ro_evict);
-	printf("%lu\tDirty pages forced from the cache.\n",
+	dl("Dirty pages forced from the cache.\n",
 	    (u_long)gsp->st_rw_evict);
-	printf("%lu\tDirty buffers written by trickle-sync thread.\n",
+	dl("Dirty buffers written by trickle-sync thread.\n",
 	    (u_long)gsp->st_page_trickle);
-	printf("%lu\tCurrent clean buffer count.\n",
+	dl("Current clean buffer count.\n",
 	    (u_long)gsp->st_page_clean);
-	printf("%lu\tCurrent dirty buffer count.\n",
+	dl("Current dirty buffer count.\n",
 	    (u_long)gsp->st_page_dirty);
-	printf("%lu\tNumber of hash buckets used for page location.\n",
+	dl("Number of hash buckets used for page location.\n",
 	    (u_long)gsp->st_hash_buckets);
-	printf("%lu\tTotal number of times hash chains searched for a page.\n",
+	dl("Total number of times hash chains searched for a page.\n",
 	    (u_long)gsp->st_hash_searches);
-	printf("%lu\tThe longest hash chain searched for a page.\n",
+	dl("The longest hash chain searched for a page.\n",
 	    (u_long)gsp->st_hash_longest);
-	printf(
-	    "%lu\tTotal number of hash buckets examined for page location.\n",
+	dl("Total number of hash buckets examined for page location.\n",
 	    (u_long)gsp->st_hash_examined);
-	printf("%lu\tThe number of region locks granted without waiting.\n",
+	dl("The number of region locks granted without waiting.\n",
 	    (u_long)gsp->st_region_nowait);
-	printf("%lu\tThe number of region locks granted after waiting.\n",
+	dl("The number of region locks granted after waiting.\n",
 	    (u_long)gsp->st_region_wait);
 
 	for (; fsp != NULL && *fsp != NULL; ++fsp) {
 		printf("%s\n", DB_LINE);
 		printf("%s\n", (*fsp)->file_name);
-		printf("%lu\tPage size.\n", (u_long)(*fsp)->st_pagesize);
-		printf("%lu\tRequested pages found in the cache",
+		dl("Page size.\n", (u_long)(*fsp)->st_pagesize);
+		dl("Requested pages found in the cache",
 		    (u_long)(*fsp)->st_cache_hit);
 		if ((*fsp)->st_cache_hit + (*fsp)->st_cache_miss != 0)
 			printf(" (%.0f%%)", ((double)(*fsp)->st_cache_hit /
 			    ((*fsp)->st_cache_hit + (*fsp)->st_cache_miss)) *
 			    100);
 		printf(".\n");
-	printf("%lu\tRequested pages mapped into the process' address space.\n",
+		dl("Requested pages mapped into the process' address space.\n",
 		    (u_long)(*fsp)->st_map);
-		printf("%lu\tRequested pages not found in the cache.\n",
+		dl("Requested pages not found in the cache.\n",
 		    (u_long)(*fsp)->st_cache_miss);
-		printf("%lu\tPages created in the cache.\n",
+		dl("Pages created in the cache.\n",
 		    (u_long)(*fsp)->st_page_create);
-		printf("%lu\tPages read into the cache.\n",
+		dl("Pages read into the cache.\n",
 		    (u_long)(*fsp)->st_page_in);
-	printf("%lu\tPages written from the cache to the backing file.\n",
+		dl("Pages written from the cache to the backing file.\n",
 		    (u_long)(*fsp)->st_page_out);
 	}
 }
@@ -358,46 +424,48 @@ void
 txn_stats(dbenv)
 	DB_ENV *dbenv;
 {
-	DB_TXN_STAT *tstat;
+	DB_TXN_STAT *sp;
 	u_int32_t i;
 	const char *p;
 
-	if (txn_stat(dbenv->tx_info, &tstat, NULL))
+	if (txn_stat(dbenv->tx_info, &sp, NULL))
 		err(1, NULL);
 
-	p = tstat->st_last_ckp.file == 0 ?
+	dl("Txn region reference count.\n", (u_long)sp->st_refcnt);
+	dl("Txn region size.\n", (u_long)sp->st_regsize);
+	p = sp->st_last_ckp.file == 0 ?
 	    "No checkpoint LSN." : "File/offset for last checkpoint LSN.";
-	printf("%lu/%lu\t%s\n", (u_long)tstat->st_last_ckp.file,
-	    (u_long)tstat->st_last_ckp.offset, p);
-	p = tstat->st_pending_ckp.file == 0 ?
+	printf("%lu/%lu\t%s\n",
+	    (u_long)sp->st_last_ckp.file, (u_long)sp->st_last_ckp.offset, p);
+	p = sp->st_pending_ckp.file == 0 ?
 	    "No pending checkpoint LSN." :
 	    "File/offset for last pending checkpoint LSN.";
 	printf("%lu/%lu\t%s\n",
-	    (u_long)tstat->st_pending_ckp.file,
-	    (u_long)tstat->st_pending_ckp.offset, p);
-	if (tstat->st_time_ckp == 0)
+	    (u_long)sp->st_pending_ckp.file,
+	    (u_long)sp->st_pending_ckp.offset, p);
+	if (sp->st_time_ckp == 0)
 		printf("0\tNo checkpoint timestamp.\n");
 	else
 		printf("%.24s\tCheckpoint timestamp.\n",
-		    ctime(&tstat->st_time_ckp));
+		    ctime(&sp->st_time_ckp));
 	printf("%lx\tLast transaction ID allocated.\n",
-	    (u_long)tstat->st_last_txnid);
-	printf("%lu\tMaximum number of active transactions.\n",
-	    (u_long)tstat->st_maxtxns);
-	printf("%lu\tNumber of transactions begun.\n",
-	    (u_long)tstat->st_nbegins);
-	printf("%lu\tNumber of transactions aborted.\n",
-	    (u_long)tstat->st_naborts);
-	printf("%lu\tNumber of transactions committed.\n",
-	    (u_long)tstat->st_ncommits);
-	printf("%lu\tActive transactions.\n", (u_long)tstat->st_nactive);
-	qsort(tstat->st_txnarray,
-	    tstat->st_nactive, sizeof(tstat->st_txnarray[0]), txn_compare);
-	for (i = 0; i < tstat->st_nactive; ++i)
+	    (u_long)sp->st_last_txnid);
+	dl("Maximum number of active transactions.\n", (u_long)sp->st_maxtxns);
+	dl("Number of transactions begun.\n", (u_long)sp->st_nbegins);
+	dl("Number of transactions aborted.\n", (u_long)sp->st_naborts);
+	dl("Number of transactions committed.\n", (u_long)sp->st_ncommits);
+	dl("The number of region locks granted without waiting.\n",
+	    (u_long)sp->st_region_nowait);
+	dl("The number of region locks granted after waiting.\n",
+	    (u_long)sp->st_region_wait);
+	dl("Active transactions.\n", (u_long)sp->st_nactive);
+	qsort(sp->st_txnarray,
+	    sp->st_nactive, sizeof(sp->st_txnarray[0]), txn_compare);
+	for (i = 0; i < sp->st_nactive; ++i)
 		printf("\tid: %lx; initial LSN file/offest %lu/%lu\n",
-		    (u_long)tstat->st_txnarray[i].txnid,
-		    (u_long)tstat->st_txnarray[i].lsn.file,
-		    (u_long)tstat->st_txnarray[i].lsn.offset);
+		    (u_long)sp->st_txnarray[i].txnid,
+		    (u_long)sp->st_txnarray[i].lsn.file,
+		    (u_long)sp->st_txnarray[i].lsn.offset);
 }
 
 int
@@ -417,25 +485,41 @@ txn_compare(a1, b1)
 }
 
 /*
+ * dl --
+ *	Display a big value.
+ */
+void
+dl(msg, value)
+	const char *msg;
+	u_long value;
+{
+	/*
+	 * Two formats: if less than 10 million, display as the number, if
+	 * greater than 10 million display as ###M.
+	 */
+	if (value < 10000000)
+		printf("%lu\t%s", value, msg);
+	else
+		printf("%luM\t%s", value / 1000000, msg);
+}
+
+/*
  * prflags --
  *	Print out flag values.
  */
 void
-prflags(flags, fn)
+prflags(flags, fnp)
 	u_int32_t flags;
-	FN const *fn;
-{
 	const FN *fnp;
-	int found;
+{
 	const char *sep;
 
 	sep = " ";
 	printf("Flags:");
-	for (found = 0, fnp = fn; fnp->mask != 0; ++fnp)
+	for (; fnp->mask != 0; ++fnp)
 		if (fnp->mask & flags) {
 			printf("%s%s", sep, fnp->name);
 			sep = ", ";
-			found = 1;
 		}
 	printf("\n");
 }
@@ -450,7 +534,7 @@ db_init(home, ttype)
 	test_t ttype;
 {
 	DB_ENV *dbenv;
-	int flags;
+	u_int32_t flags;
 
 	if ((dbenv = (DB_ENV *)malloc(sizeof(DB_ENV))) == NULL) {
 		errno = ENOMEM;
@@ -467,13 +551,16 @@ db_init(home, ttype)
 	switch (ttype) {
 	case T_DB:
 	case T_MPOOL:
-		flags |= DB_INIT_MPOOL;
+		LF_SET(DB_INIT_MPOOL);
+		break;
+	case T_LOCK:
+		LF_SET(DB_INIT_LOCK);
 		break;
 	case T_LOG:
-		flags |= DB_INIT_LOG;
+		LF_SET(DB_INIT_LOG);
 		break;
 	case T_TXN:
-		flags |= DB_INIT_TXN;
+		LF_SET(DB_INIT_TXN);
 		break;
 	case T_NOTSET:
 		abort();
@@ -493,7 +580,7 @@ db_init(home, ttype)
 
 	/* Turn off the DB_INIT_MPOOL flag if it's a database. */
 	if (ttype == T_DB)
-		flags &= ~DB_INIT_MPOOL;
+		LF_CLR(DB_INIT_MPOOL);
 
 	/* Set the error output options -- this time we want a message. */
 	memset(dbenv, 0, sizeof(*dbenv));
@@ -508,6 +595,21 @@ db_init(home, ttype)
 }
 
 /*
+ * argcheck --
+ *	Return if argument flags are okay.
+ */
+int
+argcheck(arg, ok_args)
+	char *arg;
+	const char *ok_args;
+{
+	for (; *arg != '\0'; ++arg)
+		if (strchr(ok_args, *arg) == NULL)
+			return (0);
+	return (1);
+}
+
+/*
  * oninit --
  *	Interrupt signal handler.
  */
@@ -523,6 +625,7 @@ onint(signo)
 void
 usage()
 {
-	fprintf(stderr, "usage: db_stat [-mlt] [-d file] [-h home]\n");
+	fprintf(stderr,
+    "usage: db_stat [-clmNt] [-C Acflmo] [-d file] [-h home] [-M Ahlm]\n");
 	exit (1);
 }
diff --git a/db2/txn/txn.c b/db2/txn/txn.c
index 2a2e3da97b..4f3ffd8ed2 100644
--- a/db2/txn/txn.c
+++ b/db2/txn/txn.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -43,27 +43,20 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)txn.c	10.39 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)txn.c	10.58 (Sleepycat) 5/31/98";
 #endif /* not lint */
 
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <fcntl.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #include <time.h>
-#include <unistd.h>
 #endif
 
-#include "shqueue.h"
 #include "db_int.h"
+#include "shqueue.h"
 #include "db_page.h"
 #include "db_shash.h"
 #include "txn.h"
@@ -74,9 +67,9 @@ static const char sccsid[] = "@(#)txn.c	10.39 (Sleepycat) 1/8/98";
 #include "common_ext.h"
 
 static int __txn_check_running __P((const DB_TXN *));
-static int __txn_create __P((DB_ENV *, const char *, u_int));
 static int __txn_end __P((DB_TXN *, int));
 static int __txn_grow_region __P((DB_TXNMGR *));
+static int __txn_init __P((DB_TXNREGION *));
 static int __txn_undo __P((DB_TXN *));
 static int __txn_validate_region __P((DB_TXNMGR *));
 
@@ -85,30 +78,20 @@ static int __txn_validate_region __P((DB_TXNMGR *));
  * It assumes that a lock manager and log manager that conform to the db_log(3)
  * and db_lock(3) interfaces exist.
  *
- * Create and initialize a transaction region in shared memory.
+ * Initialize a transaction region in shared memory.
  * Return 0 on success, errno on failure.
  */
 static int
-__txn_create(dbenv, path, mode)
-	DB_ENV *dbenv;
-	const char *path;
-	u_int mode;
-{
+__txn_init(txn_region)
 	DB_TXNREGION *txn_region;
+{
 	time_t now;
-	int fd, maxtxns, ret;
 
-	maxtxns = dbenv->tx_max != 0 ? dbenv->tx_max : 1000;
 	(void)time(&now);
 
-	/* Region may have existed.  If it didn't, the open will fail. */
-	if ((ret = __db_rcreate(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE,
-	    mode, TXN_REGION_SIZE(maxtxns), 0, &fd, &txn_region)) != 0)
-		return (ret);
-
+	/* maxtxns is already initialized. */
 	txn_region->magic = DB_TXNMAGIC;
 	txn_region->version = DB_TXNVERSION;
-	txn_region->maxtxns = maxtxns;
 	txn_region->last_txnid = TXN_MINIMUM;
 	/* XXX If we ever do more types of locking and logging, this changes. */
 	txn_region->logtype = 0;
@@ -118,33 +101,22 @@ __txn_create(dbenv, path, mode)
 	ZERO_LSN(txn_region->pending_ckp);
 	SH_TAILQ_INIT(&txn_region->active_txn);
 	__db_shalloc_init((void *)&txn_region[1],
-	    TXN_REGION_SIZE(maxtxns) - sizeof(DB_TXNREGION));
-
-	/* Unlock the region. */
-	(void)__db_mutex_unlock(&txn_region->hdr.lock, fd);
+	    TXN_REGION_SIZE(txn_region->maxtxns) - sizeof(DB_TXNREGION));
 
-	/* Now unmap and close the region. */
-	if ((ret = __db_rclose(dbenv, fd, txn_region)) != 0) {
-		(void)txn_unlink(path, 1 /* force */, dbenv);
-		return (ret);
-	}
 	return (0);
 }
 
 int
 txn_open(path, flags, mode, dbenv, mgrpp)
 	const char *path;
-	int flags, mode;
+	u_int32_t flags;
+	int mode;
 	DB_ENV *dbenv;
 	DB_TXNMGR **mgrpp;
 {
 	DB_TXNMGR *tmgrp;
-	DB_TXNREGION *txn_regionp;
-	int fd, ret, retry_cnt;
-
-	tmgrp = NULL;
-	txn_regionp = NULL;
-	fd = -1;
+	u_int32_t maxtxns;
+	int ret;
 
 	/* Validate arguments. */
 	if (dbenv == NULL)
@@ -157,52 +129,57 @@ txn_open(path, flags, mode, dbenv, mgrpp)
 	if ((ret = __db_fchk(dbenv, "txn_open", flags, OKFLAGS)) != 0)
 		return (ret);
 
-	retry_cnt = 0;
-retry:	if (LF_ISSET(DB_CREATE) && (ret = __txn_create(dbenv, path, mode)) != 0)
-		if (ret == EAGAIN && ++retry_cnt < 0) {
-			(void)__db_sleep(1, 0);
-			goto retry;
-		} else	/* We did not really create the region */
-			flags &= ~DB_CREATE;
-
-	retry_cnt = 0;
-retry1:	if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE,
-	    flags & ~(DB_CREATE | DB_THREAD | DB_TXN_NOSYNC),
-	    &fd, &txn_regionp)) != 0) {
-		if (ret == EAGAIN && ++retry_cnt < 3) {
-			(void)__db_sleep(1, 0);
-			goto retry1;
-		}
-		goto out;
-	}
-
-
-	/* Check if valid region. */
-	if (txn_regionp->magic != DB_TXNMAGIC) {
-		__db_err(dbenv, "txn_open: Bad magic number");
-		ret = EINVAL;
-		goto out;
-	}
+	maxtxns = dbenv->tx_max != 0 ? dbenv->tx_max : 20;
 
 	/* Now, create the transaction manager structure and set its fields. */
-	if ((tmgrp = (DB_TXNMGR *)__db_malloc(sizeof(DB_TXNMGR))) == NULL) {
+	if ((tmgrp = (DB_TXNMGR *)__db_calloc(1, sizeof(DB_TXNMGR))) == NULL) {
 		__db_err(dbenv, "txn_open: %s", strerror(ENOMEM));
-		ret = ENOMEM;
-		goto out;
+		return (ENOMEM);
 	}
 
+	/* Initialize the transaction manager structure. */
+	tmgrp->mutexp = NULL;
 	tmgrp->dbenv = dbenv;
 	tmgrp->recover =
 	    dbenv->tx_recover == NULL ? __db_dispatch : dbenv->tx_recover;
-	tmgrp->region = txn_regionp;
-	tmgrp->reg_size = txn_regionp->hdr.size;
-	tmgrp->fd = fd;
 	tmgrp->flags = LF_ISSET(DB_TXN_NOSYNC | DB_THREAD);
-	tmgrp->mem = &txn_regionp[1];
-	tmgrp->mutexp = NULL;
 	TAILQ_INIT(&tmgrp->txn_chain);
+
+	/* Join/create the txn region. */
+	tmgrp->reginfo.dbenv = dbenv;
+	tmgrp->reginfo.appname = DB_APP_NONE;
+	if (path == NULL)
+		tmgrp->reginfo.path = NULL;
+	else
+		if ((tmgrp->reginfo.path = (char *)__db_strdup(path)) == NULL)
+			goto err;
+	tmgrp->reginfo.file = DEFAULT_TXN_FILE;
+	tmgrp->reginfo.mode = mode;
+	tmgrp->reginfo.size = TXN_REGION_SIZE(maxtxns);
+	tmgrp->reginfo.dbflags = flags;
+	tmgrp->reginfo.addr = NULL;
+	tmgrp->reginfo.fd = -1;
+	tmgrp->reginfo.flags = dbenv->tx_max == 0 ? REGION_SIZEDEF : 0;
+	if ((ret = __db_rattach(&tmgrp->reginfo)) != 0)
+		goto err;
+
+	/* Fill in region-related fields. */
+	tmgrp->region = tmgrp->reginfo.addr;
+	tmgrp->mem = &tmgrp->region[1];
+
+	if (F_ISSET(&tmgrp->reginfo, REGION_CREATED)) {
+		tmgrp->region->maxtxns = maxtxns;
+		if ((ret = __txn_init(tmgrp->region)) != 0)
+			goto err;
+
+	} else if (tmgrp->region->magic != DB_TXNMAGIC) {
+		/* Check if valid region. */
+		__db_err(dbenv, "txn_open: Bad magic number");
+		ret = EINVAL;
+		goto err;
+	}
+
 	if (LF_ISSET(DB_THREAD)) {
-		LOCK_TXNREGION(tmgrp);
 		if ((ret = __db_shalloc(tmgrp->mem, sizeof(db_mutex_t),
 		    MUTEX_ALIGNMENT, &tmgrp->mutexp)) == 0)
 			/*
@@ -211,25 +188,27 @@ retry1:	if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE,
 			 * to be ignored.  We put 0 here as a valid placeholder.
 			 */
 			__db_mutex_init(tmgrp->mutexp, 0);
-		UNLOCK_TXNREGION(tmgrp);
 		if (ret != 0)
-			goto out;
+			goto err;
 	}
+
+	UNLOCK_TXNREGION(tmgrp);
 	*mgrpp = tmgrp;
 	return (0);
 
-out:	if (txn_regionp != NULL)
-		(void)__db_rclose(dbenv, fd, txn_regionp);
-	if (flags & DB_CREATE)
-		(void)txn_unlink(path, 1, dbenv);
-	if (tmgrp != NULL) {
-		if (tmgrp->mutexp != NULL) {
-			LOCK_TXNREGION(tmgrp);
+err:	if (tmgrp->reginfo.addr != NULL) {
+		if (tmgrp->mutexp != NULL)
 			__db_shalloc_free(tmgrp->mem, tmgrp->mutexp);
-			UNLOCK_TXNREGION(tmgrp);
-		}
-		__db_free(tmgrp);
+
+		UNLOCK_TXNREGION(tmgrp);
+		(void)__db_rdetach(&tmgrp->reginfo);
+		if (F_ISSET(&tmgrp->reginfo, REGION_CREATED))
+			(void)txn_unlink(path, 1, dbenv);
 	}
+
+	if (tmgrp->reginfo.path != NULL)
+		FREES(tmgrp->reginfo.path);
+	FREE(tmgrp, sizeof(*tmgrp));
 	return (ret);
 }
 
@@ -244,77 +223,83 @@ txn_begin(tmgrp, parent, txnpp)
 	DB_TXN *parent;
 	DB_TXN **txnpp;
 {
-	TXN_DETAIL *txnp;
+	DB_LSN begin_lsn;
 	DB_TXN *retp;
-	int id, ret;
+	TXN_DETAIL *txnp;
+	size_t off;
+	u_int32_t id;
+	int ret;
+
+	txnp = NULL;
+	*txnpp = NULL;
+
+	if ((retp = (DB_TXN *)__db_malloc(sizeof(DB_TXN))) == NULL) {
+		__db_err(tmgrp->dbenv, "txn_begin : %s", strerror(ENOMEM));
+		return (ENOMEM);
+	}
+
+	/*
+	 * We do not have to write begin records (and if we do not, then we
+	 * need never write records for read-only transactions).  However,
+	 * we do need to find the current LSN so that we can store it in the
+	 * transaction structure, so we can know where to take checkpoints.
+	 */
+	if (tmgrp->dbenv->lg_info != NULL && (ret =
+	    log_put(tmgrp->dbenv->lg_info, &begin_lsn, NULL, DB_CURLSN)) != 0)
+		goto err2;
 
 	LOCK_TXNREGION(tmgrp);
 
+	/* Make sure that last_txnid is not going to wrap around. */
+	if (tmgrp->region->last_txnid == TXN_INVALID) {
+		__db_err(tmgrp->dbenv, "txn_begin: %s  %s",
+		    "Transaction ID wrapping.",
+		    "Snapshot your database and start a new log.");
+		ret = EINVAL;
+		goto err1;
+	}
+
 	if ((ret = __txn_validate_region(tmgrp)) != 0)
-		goto err;
+		goto err1;
 
 	/* Allocate a new transaction detail structure. */
 	if ((ret = __db_shalloc(tmgrp->mem, sizeof(TXN_DETAIL), 0, &txnp)) != 0
 	    && ret == ENOMEM && (ret = __txn_grow_region(tmgrp)) == 0)
 	    	ret = __db_shalloc(tmgrp->mem, sizeof(TXN_DETAIL), 0, &txnp);
-
 	if (ret != 0)
-		goto err;
-
-	/* Make sure that last_txnid is not going to wrap around. */
-	if (tmgrp->region->last_txnid == TXN_INVALID)
-		return (EINVAL);
-
-	if ((retp = (DB_TXN *)__db_malloc(sizeof(DB_TXN))) == NULL) {
-		__db_err(tmgrp->dbenv, "txn_begin : %s", strerror(ENOMEM));
-		ret = ENOMEM;
 		goto err1;
-	}
+
+	/* Place transaction on active transaction list. */
+	SH_TAILQ_INSERT_HEAD(&tmgrp->region->active_txn,
+	    txnp, links, __txn_detail);
 
 	id = ++tmgrp->region->last_txnid;
 	tmgrp->region->nbegins++;
 
 	txnp->txnid = id;
+	txnp->begin_lsn = begin_lsn;
 	ZERO_LSN(txnp->last_lsn);
-	ZERO_LSN(txnp->begin_lsn);
 	txnp->last_lock = 0;
 	txnp->status = TXN_RUNNING;
-	SH_TAILQ_INSERT_HEAD(&tmgrp->region->active_txn,
-	    txnp, links, __txn_detail);
-
+	off = (u_int8_t *)txnp - (u_int8_t *)tmgrp->region;
 	UNLOCK_TXNREGION(tmgrp);
 
 	ZERO_LSN(retp->last_lsn);
 	retp->txnid = id;
 	retp->parent = parent;
-	retp->off = (u_int8_t *)txnp - (u_int8_t *)tmgrp->region;
 	retp->mgrp = tmgrp;
-
-	if (tmgrp->dbenv->lg_info != NULL &&
-	    (ret = __txn_regop_log(tmgrp->dbenv->lg_info,
-	        retp, &txnp->begin_lsn, 0, TXN_BEGIN)) != 0) {
-
-		/* Deallocate transaction. */
-		LOCK_TXNREGION(tmgrp);
-		SH_TAILQ_REMOVE(&tmgrp->region->active_txn,
-		    txnp, links, __txn_detail);
-		__db_shalloc_free(tmgrp->mem, txnp);
-		UNLOCK_TXNREGION(tmgrp);
-		__db_free(retp);
-		return (ret);
-	}
+	retp->off = off;
 
 	LOCK_TXNTHREAD(tmgrp);
 	TAILQ_INSERT_TAIL(&tmgrp->txn_chain, retp, links);
 	UNLOCK_TXNTHREAD(tmgrp);
 
-	*txnpp  = retp;
+	*txnpp = retp;
 	return (0);
 
-err1:
-	__db_shalloc_free(tmgrp->mem, txnp);
-err:
-	UNLOCK_TXNREGION(tmgrp);
+err1:	UNLOCK_TXNREGION(tmgrp);
+
+err2:	__db_free(retp);
 	return (ret);
 }
 
@@ -332,12 +317,15 @@ txn_commit(txnp)
 	if ((ret = __txn_check_running(txnp)) != 0)
 		return (ret);
 
-	/* Sync the log. */
+	/*
+	 * If there are any log records, write a log record and sync
+	 * the log, else do no log writes.
+	 */
 	if ((logp = txnp->mgrp->dbenv->lg_info) != NULL &&
-	    (ret = __txn_regop_log(logp,
-	    txnp, &txnp->last_lsn,
-	    F_ISSET(txnp->mgrp, DB_TXN_NOSYNC) ? 0 : DB_FLUSH, TXN_COMMIT))
-	    != 0)
+	    !IS_ZERO_LSN(txnp->last_lsn) &&
+	    (ret = __txn_regop_log(logp, txnp, &txnp->last_lsn,
+	    F_ISSET(txnp->mgrp, DB_TXN_NOSYNC) ? 0 : DB_FLUSH,
+	    TXN_COMMIT)) != 0)
 		return (ret);
 
 	return (__txn_end(txnp, 1));
@@ -371,8 +359,8 @@ int
 txn_prepare(txnp)
 	DB_TXN *txnp;
 {
-	int ret;
 	TXN_DETAIL *tp;
+	int ret;
 
 	if ((ret = __txn_check_running(txnp)) != 0)
 		return (ret);
@@ -414,21 +402,23 @@ txn_close(tmgrp)
 	DB_TXN *txnp;
 	int ret, t_ret;
 
+	ret = 0;
+
 	/*
 	 * This function had better only be called once per process
 	 * (i.e., not per thread), so there should be no synchronization
 	 * required.
 	 */
-	for (ret = 0, txnp = TAILQ_FIRST(&tmgrp->txn_chain);
-	    txnp != TAILQ_END(&tmgrp->txn_chain);
-	    txnp = TAILQ_FIRST(&tmgrp->txn_chain)) {
-		if ((t_ret = txn_abort(txnp)) != 0 && ret == 0)
-			ret = t_ret;
-	}
+	while ((txnp =
+	    TAILQ_FIRST(&tmgrp->txn_chain)) != TAILQ_END(&tmgrp->txn_chain))
+		if ((t_ret = txn_abort(txnp)) != 0) {
+			__txn_end(txnp, 0);
+			if (ret == 0)
+				ret = t_ret;
+		}
 
-	if (tmgrp->dbenv->lg_info && (t_ret =
-	    log_flush(tmgrp->dbenv->lg_info, NULL)) != 0 &&
-	    ret == 0)
+	if (tmgrp->dbenv->lg_info &&
+	    (t_ret = log_flush(tmgrp->dbenv->lg_info, NULL)) != 0 && ret == 0)
 		ret = t_ret;
 
 	if (tmgrp->mutexp != NULL) {
@@ -437,12 +427,12 @@ txn_close(tmgrp)
 		UNLOCK_TXNREGION(tmgrp);
 	}
 
-	if ((t_ret = __db_rclose(tmgrp->dbenv, tmgrp->fd, tmgrp->region)) != 0
-	    && ret == 0)
+	if ((t_ret = __db_rdetach(&tmgrp->reginfo)) != 0 && ret == 0)
 		ret = t_ret;
 
-	if (ret == 0)
-		__db_free(tmgrp);
+	if (tmgrp->reginfo.path != NULL)
+		FREES(tmgrp->reginfo.path);
+	FREE(tmgrp, sizeof(*tmgrp));
 
 	return (ret);
 }
@@ -457,8 +447,19 @@ txn_unlink(path, force, dbenv)
 	int force;
 	DB_ENV *dbenv;
 {
-	return (__db_runlink(dbenv,
-	    DB_APP_NONE, path, DEFAULT_TXN_FILE, force));
+	REGINFO reginfo;
+	int ret;
+
+	memset(&reginfo, 0, sizeof(reginfo));
+	reginfo.dbenv = dbenv;
+	reginfo.appname = DB_APP_NONE;
+	if (path != NULL && (reginfo.path = (char *)__db_strdup(path)) == NULL)
+		return (ENOMEM);
+	reginfo.file = DEFAULT_TXN_FILE;
+	ret = __db_runlink(&reginfo, force);
+	if (reginfo.path != NULL)
+		FREES(reginfo.path);
+	return (ret);
 }
 
 /* Internal routines. */
@@ -540,10 +541,10 @@ static int
 __txn_undo(txnp)
 	DB_TXN *txnp;
 {
-	DB_TXNMGR *mgr;
-	DB_LOG *logp;
 	DBT rdbt;
+	DB_LOG *logp;
 	DB_LSN key_lsn;
+	DB_TXNMGR *mgr;
 	int ret;
 
 	mgr = txnp->mgrp;
@@ -594,7 +595,7 @@ __txn_undo(txnp)
 int
 txn_checkpoint(mgr, kbytes, minutes)
 	const DB_TXNMGR *mgr;
-	int kbytes, minutes;
+	u_int32_t kbytes, minutes;
 {
 	TXN_DETAIL *txnp;
 	DB_LSN ckp_lsn, last_ckp;
@@ -603,10 +604,6 @@ txn_checkpoint(mgr, kbytes, minutes)
 	time_t last_ckp_time, now;
 	int ret;
 
-	/* Check usage. */
-	if (kbytes < 0 || minutes < 0)
-		return (EINVAL);
-
 	/*
 	 * Check if we need to run recovery.
 	 */
@@ -678,8 +675,8 @@ do_ckp:
 	if (mgr->dbenv->mp_info != NULL &&
 	    (ret = memp_sync(mgr->dbenv->mp_info, &ckp_lsn)) != 0) {
 		/*
-		 * ret < 0 means that there are still buffers to flush;
-		 * the checkpoint is not complete. Back off and try again.
+		 * ret == DB_INCOMPLETE means that there are still buffers to
+		 * flush, the checkpoint is not complete.  Wait and try again.
 		 */
 		if (ret > 0)
 			__db_err(mgr->dbenv,
@@ -711,9 +708,9 @@ do_ckp:
 }
 
 /*
- * This is called at every interface to verify if the region
- * has changed size, and if so, to remap the region in and
- * reset the process pointers.
+ * __txn_validate_region --
+ *	Called at every interface to verify if the region has changed size,
+ *	and if so, to remap the region in and reset the process' pointers.
  */
 static int
 __txn_validate_region(tp)
@@ -721,15 +718,15 @@ __txn_validate_region(tp)
 {
 	int ret;
 
-	if (tp->reg_size == tp->region->hdr.size)
+	if (tp->reginfo.size == tp->region->hdr.size)
 		return (0);
 
-	/* Grow the region. */
-	if ((ret = __db_rremap(tp->dbenv, tp->region,
-	    tp->reg_size, tp->region->hdr.size, tp->fd, &tp->region)) != 0)
+	/* Detach/reattach the region. */
+	if ((ret = __db_rreattach(&tp->reginfo, tp->region->hdr.size)) != 0)
 		return (ret);
 
-	tp->reg_size = tp->region->hdr.size;
+	/* Reset region information. */
+	tp->region = tp->reginfo.addr;
 	tp->mem = &tp->region[1];
 
 	return (0);
@@ -739,27 +736,26 @@ static int
 __txn_grow_region(tp)
 	DB_TXNMGR *tp;
 {
-	size_t incr;
+	size_t incr, oldsize;
 	u_int32_t mutex_offset, oldmax;
 	u_int8_t *curaddr;
 	int ret;
 
 	oldmax = tp->region->maxtxns;
 	incr = oldmax * sizeof(DB_TXN);
-	mutex_offset = (u_int8_t *)tp->mutexp - (u_int8_t *)tp->region;
+	mutex_offset = tp->mutexp != NULL ?
+	    (u_int8_t *)tp->mutexp - (u_int8_t *)tp->region : 0;
 
-	if ((ret = __db_rgrow(tp->dbenv, tp->fd, incr)) != 0)
-		return (ret);
-
-	if ((ret = __db_rremap(tp->dbenv, tp->region,
-	    tp->reg_size, tp->reg_size + incr, tp->fd, &tp->region)) != 0)
+	oldsize = tp->reginfo.size;
+	if ((ret = __db_rgrow(&tp->reginfo, oldsize + incr)) != 0)
 		return (ret);
+	tp->region = tp->reginfo.addr;
 
 	/* Throw the new space on the free list. */
-	curaddr = (u_int8_t *)tp->region + tp->reg_size;
+	curaddr = (u_int8_t *)tp->region + oldsize;
 	tp->mem = &tp->region[1];
-	tp->reg_size += incr;
-	tp->mutexp = (db_mutex_t *)((u_int8_t *)tp->region + mutex_offset);
+	tp->mutexp = mutex_offset != 0 ?
+	    (db_mutex_t *)((u_int8_t *)tp->region + mutex_offset) : NULL;
 
 	*((size_t *)curaddr) = incr - sizeof(size_t);
 	curaddr += sizeof(size_t);
@@ -826,6 +822,11 @@ txn_stat(mgr, statp, db_malloc)
 			break;
 	}
 
+	stats->st_region_wait = mgr->region->hdr.lock.mutex_set_wait;
+	stats->st_region_nowait = mgr->region->hdr.lock.mutex_set_nowait;
+	stats->st_refcnt = mgr->region->hdr.refcnt;
+	stats->st_regsize = mgr->region->hdr.size;
+
 	UNLOCK_TXNREGION(mgr);
 	*statp = stats;
 	return (0);
diff --git a/db2/txn/txn.src b/db2/txn/txn.src
index 40bb63ecb6..04809b69d6 100644
--- a/db2/txn/txn.src
+++ b/db2/txn/txn.src
@@ -1,14 +1,12 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)txn.src	10.1 (Sleepycat) 4/12/97
- *
- * This is the source file used to create the logging functions for the
- * transaction system.
+ *	@(#)txn.src	10.3 (Sleepycat) 4/10/98
  */
+
 PREFIX	txn
 
 /*
diff --git a/db2/txn/txn_auto.c b/db2/txn/txn_auto.c
index 38627466a8..f03a52991f 100644
--- a/db2/txn/txn_auto.c
+++ b/db2/txn/txn_auto.c
@@ -15,8 +15,6 @@
 #include "db_dispatch.h"
 #include "txn.h"
 #include "db_am.h"
-#include "common_ext.h"
-
 /*
  * PUBLIC: int __txn_regop_log
  * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
@@ -58,7 +56,7 @@ int __txn_regop_log(logp, txnid, ret_lsnp, flags,
 	bp += sizeof(DB_LSN);
 	memcpy(bp, &opcode, sizeof(opcode));
 	bp += sizeof(opcode);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -74,22 +72,23 @@ int __txn_regop_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__txn_regop_print(notused1, dbtp, lsnp, notused3, notused4)
+__txn_regop_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__txn_regop_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __txn_regop_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -186,7 +185,7 @@ int __txn_ckp_log(logp, txnid, ret_lsnp, flags,
 	else
 		memset(bp, 0, sizeof(*last_ckp));
 	bp += sizeof(*last_ckp);
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
 		fprintf(stderr, "Error in log record length");
 #endif
@@ -202,22 +201,23 @@ int __txn_ckp_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
 int
-__txn_ckp_print(notused1, dbtp, lsnp, notused3, notused4)
+__txn_ckp_print(notused1, dbtp, lsnp, notused2, notused3)
 	DB_LOG *notused1;
 	DBT *dbtp;
 	DB_LSN *lsnp;
-	int notused3;
-	void *notused4;
+	int notused2;
+	void *notused3;
 {
 	__txn_ckp_args *argp;
 	u_int32_t i;
-	int c, ret;
+	u_int ch;
+	int ret;
 
 	i = 0;
-	c = 0;
+	ch = 0;
 	notused1 = NULL;
-	notused3 = 0;
-	notused4 = NULL;
+	notused2 = 0;
+	notused3 = NULL;
 
 	if ((ret = __txn_ckp_read(dbtp->data, &argp)) != 0)
 		return (ret);
diff --git a/db2/txn/txn_rec.c b/db2/txn/txn_rec.c
index 679cffb567..e53dc5f3b7 100644
--- a/db2/txn/txn_rec.c
+++ b/db2/txn/txn_rec.c
@@ -1,7 +1,7 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 /*
@@ -40,24 +40,20 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)txn_rec.c	10.6 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)txn_rec.c	10.11 (Sleepycat) 5/3/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
+#include <errno.h>
 #endif
 
 #include "db_int.h"
 #include "db_page.h"
 #include "shqueue.h"
 #include "txn.h"
-#include "db_dispatch.h"
 #include "db_am.h"
-#include "common_ext.h"
 
 /*
  * PUBLIC: int __txn_regop_recover
@@ -69,7 +65,7 @@ __txn_regop_recover(logp, dbtp, lsnp, redo, info)
 	DBT *dbtp;
 	DB_LSN *lsnp;
 	int redo;
-	 void *info;
+	void *info;
 {
 	__txn_regop_args *argp;
 	int ret;
@@ -77,8 +73,8 @@ __txn_regop_recover(logp, dbtp, lsnp, redo, info)
 #ifdef DEBUG_RECOVER
 	(void)__txn_regop_print(logp, dbtp, lsnp, redo, info);
 #endif
-	logp = logp;			/* XXX: Shut the compiler up. */
-	redo = redo;
+	COMPQUIET(redo, 0);
+	COMPQUIET(logp, NULL);
 
 	if ((ret = __txn_regop_read(dbtp->data, &argp)) != 0)
 		return (ret);
@@ -90,10 +86,12 @@ __txn_regop_recover(logp, dbtp, lsnp, redo, info)
 			__db_txnlist_add(info, argp->txnid->txnid);
 		break;
 	case TXN_PREPARE:	/* Nothing to do. */
-	case TXN_BEGIN:
-		/* Call find so that we update the maxid. */
+		/* Call __db_txnlist_find so that we update the maxid. */
 		(void)__db_txnlist_find(info, argp->txnid->txnid);
 		break;
+	default:
+		ret = EINVAL;
+		break;
 	}
 
 	*lsnp = argp->prev_lsn;
@@ -118,13 +116,20 @@ __txn_ckp_recover(logp, dbtp, lsnp, redo, info)
 #ifdef DEBUG_RECOVER
 	__txn_ckp_print(logp, dbtp, lsnp, redo, info);
 #endif
-	logp = logp;			/* XXX: Shut the compiler up. */
-	redo = redo;
-	info = info;
+	COMPQUIET(logp, NULL);
 
 	if ((ret = __txn_ckp_read(dbtp->data, &argp)) != 0)
 		return (ret);
 
+	/*
+	 * Check for 'restart' checkpoint record.  This occurs when the
+	 * checkpoint lsn is equal to the lsn of the checkpoint record
+	 * and means that we could set the transaction ID back to 1, so
+	 * that we don't exhaust the transaction ID name space.
+	 */
+	if (argp->ckp_lsn.file == lsnp->file &&
+	    argp->ckp_lsn.offset == lsnp->offset)
+		__db_txnlist_gen(info, redo ? -1 : 1);
 	*lsnp = argp->last_ckp;
 	__db_free(argp);
 	return (DB_TXN_CKP);