about summary refs log tree commit diff
path: root/db2
diff options
context:
space:
mode:
Diffstat (limited to 'db2')
-rw-r--r--db2/Makefile4
-rw-r--r--db2/btree/bt_cursor.c62
-rw-r--r--db2/btree/bt_delete.c15
-rw-r--r--db2/btree/bt_put.c131
-rw-r--r--db2/btree/bt_search.c14
-rw-r--r--db2/btree/bt_split.c9
-rw-r--r--db2/btree/btree_auto.c8
-rw-r--r--db2/common/db_appinit.c21
-rw-r--r--db2/common/db_apprec.c29
-rw-r--r--db2/common/db_err.c12
-rw-r--r--db2/common/db_region.c23
-rw-r--r--db2/config.h3
-rw-r--r--db2/db.h54
-rw-r--r--db2/db/db_auto.c8
-rw-r--r--db2/db/db_dispatch.c31
-rw-r--r--db2/db/db_ret.c33
-rw-r--r--db2/db_int.h3
-rw-r--r--db2/dbm/dbm.c27
-rw-r--r--db2/hash/hash_auto.c8
-rw-r--r--db2/include/btree_ext.h2
-rw-r--r--db2/include/common_ext.h2
-rw-r--r--db2/include/db.h.src54
-rw-r--r--db2/include/db_cxx.h4
-rw-r--r--db2/include/db_ext.h1
-rw-r--r--db2/include/db_int.h.src3
-rw-r--r--db2/include/lock.h8
-rw-r--r--db2/include/log.h23
-rw-r--r--db2/include/mp.h23
-rw-r--r--db2/include/mp_ext.h6
-rw-r--r--db2/include/os_ext.h4
-rw-r--r--db2/include/os_func.h6
-rw-r--r--db2/lock/lock.c90
-rw-r--r--db2/lock/lock_deadlock.c5
-rw-r--r--db2/log/log.c4
-rw-r--r--db2/log/log_archive.c10
-rw-r--r--db2/log/log_auto.c2
-rw-r--r--db2/log/log_get.c33
-rw-r--r--db2/log/log_put.c106
-rw-r--r--db2/mp/mp_bh.c92
-rw-r--r--db2/mp/mp_fget.c114
-rw-r--r--db2/mp/mp_fopen.c224
-rw-r--r--db2/mp/mp_fput.c6
-rw-r--r--db2/mp/mp_fset.c29
-rw-r--r--db2/mp/mp_open.c13
-rw-r--r--db2/mp/mp_pr.c39
-rw-r--r--db2/mp/mp_region.c27
-rw-r--r--db2/mp/mp_sync.c32
-rw-r--r--db2/mutex/README8
-rw-r--r--db2/mutex/mutex.c16
-rw-r--r--db2/os/os_config.c (renamed from db2/os/os_func.c)131
-rw-r--r--db2/os/os_open.c7
-rw-r--r--db2/os/os_spin.c56
-rw-r--r--db2/txn/txn.c51
-rw-r--r--db2/txn/txn_auto.c2
54 files changed, 1093 insertions, 635 deletions
diff --git a/db2/Makefile b/db2/Makefile
index 8083ee3117..0ae06a3089 100644
--- a/db2/Makefile
+++ b/db2/Makefile
@@ -59,9 +59,9 @@ libdb-routines := bt_close bt_compare bt_conv bt_cursor bt_delete \
 	bt_split bt_stat btree_auto db db_appinit db_apprec \
 	db_auto \
 	db_byteorder db_conv db_dispatch db_dup db_err db_log2 \
-	os_abs os_dir os_fid os_fsync os_func os_map os_oflags \
+	os_abs os_config os_dir os_fid os_fsync os_map os_oflags \
 	os_open os_rpath os_rw os_seek os_sleep os_stat os_unlink \
-	db_overflow db_pr db_rec db_region db_ret db_salloc \
+	os_spin db_overflow db_pr db_rec db_region db_ret db_salloc \
 	db_shash db_thread hash hash_auto hash_conv hash_debug \
 	hash_dup hash_func hash_page hash_rec hash_stat lock \
 	lock_conflict lock_deadlock lock_util log log_archive \
diff --git a/db2/btree/bt_cursor.c b/db2/btree/bt_cursor.c
index e5f3faeb70..47ecd7c66d 100644
--- a/db2/btree/bt_cursor.c
+++ b/db2/btree/bt_cursor.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_cursor.c	10.35 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)bt_cursor.c	10.37 (Sleepycat) 11/22/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -33,7 +33,7 @@ static int __bam_c_next __P((DB *, CURSOR *, int));
 static int __bam_c_physdel __P((DB *, CURSOR *, PAGE *));
 static int __bam_c_prev __P((DB *, CURSOR *));
 static int __bam_c_put __P((DBC *, DBT *, DBT *, int));
-static int __bam_c_rget __P((DB *, CURSOR *, DBT *, DBT *, int));
+static int __bam_c_rget __P((DB *, CURSOR *, DBT *, int));
 static int __bam_c_search __P((DB *, CURSOR *, const DBT *, u_int, int, int *));
 
 /* Discard the current page/lock held by a cursor. */
@@ -229,7 +229,7 @@ __bam_c_del(dbc, flags)
 		B_DSET(GET_BKEYDATA(h, indx + O_INDX)->type);
 	else
 		B_DSET(GET_BKEYDATA(h, indx)->type);
-	(void)__bam_ca_delete(dbp, pgno, indx, NULL);
+	(void)__bam_ca_delete(dbp, pgno, indx, NULL, 0);
 
 	ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
 
@@ -313,7 +313,7 @@ __bam_c_get(dbc, key, data, flags)
 	 * been rammed into the interface.
 	 */
 	if (LF_ISSET(DB_GET_RECNO)) {
-		ret = __bam_c_rget(dbp, cp, key, data, flags);
+		ret = __bam_c_rget(dbp, cp, data, flags);
 		PUTHANDLE(dbp);
 		return (ret);
 	}
@@ -441,10 +441,10 @@ err:		if (cp->page != NULL)
  *	Return the record number for a cursor.
  */
 static int
-__bam_c_rget(dbp, cp, key, data, flags)
+__bam_c_rget(dbp, cp, data, flags)
 	DB *dbp;
 	CURSOR *cp;
-	DBT *key, *data;
+	DBT *data;
 	int flags;
 {
 	BTREE *t;
@@ -1113,18 +1113,18 @@ __bam_cprint(dbp)
 
 /*
  * __bam_ca_delete --
- * 	Check if any of the cursors refer to the item we are about to delete.
- *	We'll return the number of cursors that refer to the item in question.
- *	If a cursor does refer to the item, then we set its deleted bit.
+ * 	Check if any of the cursors refer to the item we are about to delete,
+ *	returning the number of cursors that refer to the item in question.
  *
- * PUBLIC: int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *));
+ * PUBLIC: int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *, int));
  */
 int
-__bam_ca_delete(dbp, pgno, indx, curs)
+__bam_ca_delete(dbp, pgno, indx, curs, key_delete)
 	DB *dbp;
 	db_pgno_t pgno;
 	u_int32_t indx;
 	CURSOR *curs;
+	int key_delete;
 {
 	DBC *dbc;
 	CURSOR *cp;
@@ -1140,22 +1140,40 @@ __bam_ca_delete(dbp, pgno, indx, curs)
 	 * It's possible for multiple cursors within the thread to have write
 	 * locks on the same page, but, cursors within a thread must be single
 	 * threaded, so all we're locking here is the cursor linked list.
-	 *
-	 * indx refers to the first of what might be a duplicate set.  The
-	 * cursor passed in is the one initiating the delete, so we don't
-	 * want to count it.
 	 */
 	DB_THREAD_LOCK(dbp);
+
 	for (count = 0, dbc = TAILQ_FIRST(&dbp->curs_queue);
 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
 		cp = (CURSOR *)dbc->internal;
-		if ((curs != cp &&
-		    cp->pgno == pgno && cp->indx == indx) ||
-		    (cp->dpgno == pgno && cp->dindx == indx)) {
-			++count;
-			F_SET(cp, C_DELETED);
-		}
+
+		/*
+		 * Optionally, a cursor passed in is the one initiating the
+		 * delete, so we don't want to count it or set its deleted
+		 * flag.  Otherwise, if a cursor refers to the item, then we
+		 * set its deleted flag.
+		 */
+		if (curs == cp)
+			continue;
+
+		/*
+		 * If we're deleting the key itself and not just one of its
+		 * duplicates, repoint the cursor to the main-page key/data
+		 * pair, everything else is about to be discarded.
+		 */
+		if (key_delete || cp->dpgno == PGNO_INVALID) {
+			if (cp->pgno == pgno && cp->indx == indx) {
+				cp->dpgno = PGNO_INVALID;
+				++count;
+				F_SET(cp, C_DELETED);
+			}
+		} else
+			if (cp->dpgno == pgno && cp->dindx == indx) {
+				++count;
+				F_SET(cp, C_DELETED);
+			}
 	}
+
 	DB_THREAD_UNLOCK(dbp);
 	return (count);
 }
@@ -1440,7 +1458,7 @@ __bam_c_physdel(dbp, cp, h)
 	 * If the item is referenced by another cursor, leave it up to that
 	 * cursor to do the delete.
 	 */
-	if (__bam_ca_delete(dbp, pgno, indx, cp) != 0)
+	if (__bam_ca_delete(dbp, pgno, indx, cp, 0) != 0)
 		return (0);
 
 	/*
diff --git a/db2/btree/bt_delete.c b/db2/btree/bt_delete.c
index 9593d0109c..dbd1995f89 100644
--- a/db2/btree/bt_delete.c
+++ b/db2/btree/bt_delete.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_delete.c	10.22 (Sleepycat) 11/2/97";
+static const char sccsid[] = "@(#)bt_delete.c	10.23 (Sleepycat) 11/22/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -101,17 +101,20 @@ __bam_delete(argdbp, txn, key, flags)
 	h = t->bt_csp->page;
 	indx = t->bt_csp->indx;
 
-	/* Delete the key/data pair, including any duplicates. */
+	/* Delete the key/data pair, including any on-or-off page duplicates. */
 	for (cnt = 1, i = indx;; ++cnt)
 		if ((i += P_INDX) >= NUM_ENT(h) || h->inp[i] != h->inp[indx])
 			break;
 	for (; cnt > 0; --cnt, ++t->lstat.bt_deleted)
-		if (__bam_ca_delete(dbp, h->pgno, indx, NULL) != 0) {
+		if (__bam_ca_delete(dbp, h->pgno, indx, NULL, 1) == 0) {
+			if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+				goto err;
+			if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+				goto err;
+		} else {
 			B_DSET(GET_BKEYDATA(h, indx + O_INDX)->type);
 			indx += P_INDX;
-		} else if ((ret = __bam_ditem(dbp, h, indx)) != 0 ||
-		    (ret = __bam_ditem(dbp, h, indx)) != 0)
-			goto err;
+		}
 
 	/* If we're using record numbers, update internal page record counts. */
 	if (F_ISSET(dbp, DB_BT_RECNUM) && (ret = __bam_adjust(dbp, t, -1)) != 0)
diff --git a/db2/btree/bt_put.c b/db2/btree/bt_put.c
index b3d775bb0f..3161b02b55 100644
--- a/db2/btree/bt_put.c
+++ b/db2/btree/bt_put.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_put.c	10.31 (Sleepycat) 10/26/97";
+static const char sccsid[] = "@(#)bt_put.c	10.35 (Sleepycat) 11/22/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -64,6 +64,7 @@ static const char sccsid[] = "@(#)bt_put.c	10.31 (Sleepycat) 10/26/97";
 #include "btree.h"
 
 static int __bam_fixed __P((BTREE *, DBT *));
+static int __bam_isdeleted __P((DB *, PAGE *, u_int32_t, int *));
 static int __bam_lookup __P((DB *, DBT *, int *));
 static int __bam_ndup __P((DB *, PAGE *, u_int32_t));
 static int __bam_ovput __P((DB *, PAGE *, u_int32_t, DBT *));
@@ -89,7 +90,7 @@ __bam_put(argdbp, txn, key, data, flags)
 	DB *dbp;
 	PAGE *h;
 	db_indx_t indx;
-	int exact, iflags, newkey, replace, ret, stack;
+	int exact, iflags, isdeleted, newkey, replace, ret, stack;
 
 	DEBUG_LWRITE(argdbp, txn, "bam_put", key, data, flags);
 
@@ -114,21 +115,25 @@ retry:	/*
 	stack = 1;
 
 	/*
-	 * If an identical key is already in the tree, and DB_NOOVERWRITE is
-	 * set, an error is returned.  If an identical key is already in the
-	 * tree and DB_NOOVERWRITE is not set, the key is either added (when
-	 * duplicates are permitted) or an error is returned.  The exception
-	 * is when the item located is referenced by a cursor and marked for
-	 * deletion, in which case we permit the overwrite and flag the cursor.
+	 * If DB_NOOVERWRITE is set and there's an identical key in the tree,
+	 * return an error unless the data item has already been marked for
+	 * deletion, or, all the remaining data items have already been marked
+	 * for deletion in the case of duplicates.  If all the data items have
+	 * been marked for deletion, we do a replace, otherwise, it has to be
+	 * a set of duplicates, and we simply append a new one to the set.
 	 */
-	replace = 0;
-	if (exact && flags == DB_NOOVERWRITE) {
-		if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type)) {
-			ret = DB_KEYEXIST;
+	isdeleted = replace = 0;
+	if (exact) {
+		if ((ret = __bam_isdeleted(dbp, h, indx, &isdeleted)) != 0)
 			goto err;
-		}
-		replace = 1;
-		__bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP);
+		if (isdeleted) {
+			replace = 1;
+			__bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP);
+		} else
+			if (flags == DB_NOOVERWRITE) {
+				ret = DB_KEYEXIST;
+				goto err;
+			}
 	}
 
 	/*
@@ -151,7 +156,7 @@ retry:	/*
 	 */
 	newkey = dbp->type == DB_BTREE && !exact;
 	if (exact) {
-		if (F_ISSET(dbp, DB_AM_DUP)) {
+		if (!isdeleted && F_ISSET(dbp, DB_AM_DUP)) {
 			/*
 			 * Make sure that we're not looking at a page of
 			 * duplicates -- if so, move to the last entry on
@@ -234,6 +239,88 @@ err:	if (stack)
 }
 
 /*
+ * __bam_isdeleted --
+ *	Return if the only remaining data item for the element has been
+ *	deleted.
+ */
+static int
+__bam_isdeleted(dbp, h, indx, isdeletedp)
+	DB *dbp;
+	PAGE *h;
+	u_int32_t indx;
+	int *isdeletedp;
+{
+	BKEYDATA *bk;
+	db_pgno_t pgno;
+	int ret;
+
+	*isdeletedp = 1;
+	for (;;) {
+		bk = GET_BKEYDATA(h, indx + O_INDX);
+		switch (B_TYPE(bk->type)) {
+		case B_KEYDATA:
+		case B_OVERFLOW:
+			if (!B_DISSET(bk->type)) {
+				*isdeletedp = 0;
+				return (0);
+			}
+			break;
+		case B_DUPLICATE:
+			/*
+			 * If the data item referencing the off-page duplicates
+			 * is flagged as deleted, we're done.  Else, we have to
+			 * walk the chain of duplicate pages.
+			 */
+			if (B_DISSET(bk->type))
+				return (0);
+			goto dupchk;
+		default:
+			return (__db_pgfmt(dbp, h->pgno));
+		}
+
+		/*
+		 * If there are no more on-page duplicate items, then every
+		 * data item for this key must have been deleted.
+		 */
+		if (indx + P_INDX >= (u_int32_t)NUM_ENT(h))
+			return (0);
+		if (h->inp[indx] != h->inp[indx + P_INDX])
+			return (0);
+
+		/* Check the next item. */
+		indx += P_INDX;
+	}
+	/* NOTREACHED */
+
+dupchk:	/* Check a chain of duplicate pages. */
+	pgno = ((BOVERFLOW *)bk)->pgno;
+	for (;;) {
+		/* Acquire the next page in the duplicate chain. */
+		if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+			return (ret);
+
+		/* Check each item for a delete flag. */
+		for (indx = 0; indx < NUM_ENT(h); ++indx)
+			if (!B_DISSET(GET_BKEYDATA(h, indx)->type)) {
+				*isdeletedp = 0;
+				goto done;
+			}
+		/*
+		 * If we reach the end of the duplicate pages, then every
+		 * item we reviewed must have been deleted.
+		 */
+		if ((pgno = NEXT_PGNO(h)) == PGNO_INVALID)
+			goto done;
+
+		(void)memp_fput(dbp->mpf, h, 0);
+	}
+	/* NOTREACHED */
+
+done:	(void)memp_fput(dbp->mpf, h, 0);
+	return (0);
+}
+
+/*
  * __bam_lookup --
  *	Find the right location in the tree for the key.
  */
@@ -425,10 +512,10 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 		if (op == DB_CURRENT) {
 			bk = GET_BKEYDATA(h,
 			    indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
-			if (B_TYPE(bk->type) == B_OVERFLOW)
-				have_bytes = BOVERFLOW_PSIZE;
-			else
+			if (B_TYPE(bk->type) == B_KEYDATA)
 				have_bytes = BKEYDATA_PSIZE(bk->len);
+			else
+				have_bytes = BOVERFLOW_PSIZE;
 			need_bytes = 0;
 		} else {
 			have_bytes = 0;
@@ -542,7 +629,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags)
 			 * If we're dealing with offpage items, we have to 
 			 * delete and then re-add the item.
 			 */
-			if (bigdata || B_TYPE(bk->type) == B_OVERFLOW) {
+			if (bigdata || B_TYPE(bk->type) != B_KEYDATA) {
 				if ((ret = __bam_ditem(dbp, h, indx)) != 0)
 					return (ret);
 				break;
@@ -704,9 +791,9 @@ __bam_ritem(dbp, h, indx, data)
 {
 	BKEYDATA *bk;
 	DBT orig, repl;
-	db_indx_t lo, ln, min, off, prefix, suffix;
+	db_indx_t cnt, lo, ln, min, off, prefix, suffix;
 	int32_t nbytes;
-	int cnt, ret;
+	int ret;
 	u_int8_t *p, *t;
 
 	/*
diff --git a/db2/btree/bt_search.c b/db2/btree/bt_search.c
index a21a8208bc..c39c9af322 100644
--- a/db2/btree/bt_search.c
+++ b/db2/btree/bt_search.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_search.c	10.8 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)bt_search.c	10.9 (Sleepycat) 11/18/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -119,12 +119,20 @@ __bam_search(dbp, key, flags, stop, recnop, exactp)
 		return (ret);
 	}
 
-	/* Decide if we need to save this page; if we do, write lock it. */
+	/*
+	 * Decide if we need to save this page; if we do, write lock it.
+	 * We deliberately don't lock-couple on this call.  If the tree
+	 * is tiny, i.e., one page, and two threads are busily updating
+	 * the root page, we're almost guaranteed deadlocks galore, as
+	 * each one gets a read lock and then blocks the other's attempt
+	 * for a write lock.
+	 */
 	if (!stack &&
 	    ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
 	    (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
 		(void)memp_fput(dbp->mpf, h, 0);
-		if ((ret = __bam_lget(dbp, 1, pg, DB_LOCK_WRITE, &lock)) != 0)
+		(void)__BT_LPUT(dbp, lock);
+		if ((ret = __bam_lget(dbp, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
 			return (ret);
 		if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
 			(void)__BT_LPUT(dbp, lock);
diff --git a/db2/btree/bt_split.c b/db2/btree/bt_split.c
index bc09131b00..219d486dc5 100644
--- a/db2/btree/bt_split.c
+++ b/db2/btree/bt_split.c
@@ -44,7 +44,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)bt_split.c	10.17 (Sleepycat) 11/2/97";
+static const char sccsid[] = "@(#)bt_split.c	10.18 (Sleepycat) 11/23/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -396,14 +396,14 @@ __bam_broot(dbp, rootp, lp, rp)
 	 * The btree comparison code guarantees that the left-most key on any
 	 * level of the tree is never used, so it doesn't need to be filled in.
 	 */
+	memset(&bi, 0, sizeof(bi));
 	bi.len = 0;
 	B_TSET(bi.type, B_KEYDATA, 0);
 	bi.pgno = lp->pgno;
 	if (F_ISSET(dbp, DB_BT_RECNUM)) {
 		bi.nrecs = __bam_total(lp);
 		RE_NREC_SET(rootp, bi.nrecs);
-	} else
-		bi.nrecs = 0;
+	}
 	hdr.data = &bi;
 	hdr.size = SSZA(BINTERNAL, data);
 	if ((ret =
@@ -591,6 +591,7 @@ __bam_pinsert(dbp, parent, lchild, rchild)
 			return (DB_NEEDSPLIT);
 
 		/* Add a new record for the right page. */
+		memset(&bi, 0, sizeof(bi));
 		bi.len = child_bi->len;
 		B_TSET(bi.type, child_bi->type, 0);
 		bi.pgno = rchild->pgno;
@@ -640,6 +641,7 @@ noprefix:			nksize = child_bk->len;
 			if (P_FREESPACE(ppage) < nbytes)
 				return (DB_NEEDSPLIT);
 
+			memset(&bi, 0, sizeof(bi));
 			bi.len = nksize;
 			B_TSET(bi.type, child_bk->type, 0);
 			bi.pgno = rchild->pgno;
@@ -661,6 +663,7 @@ noprefix:			nksize = child_bk->len;
 			if (P_FREESPACE(ppage) < nbytes)
 				return (DB_NEEDSPLIT);
 
+			memset(&bi, 0, sizeof(bi));
 			bi.len = BOVERFLOW_SIZE;
 			B_TSET(bi.type, child_bk->type, 0);
 			bi.pgno = rchild->pgno;
diff --git a/db2/btree/btree_auto.c b/db2/btree/btree_auto.c
index 45232bbc41..18b9b34975 100644
--- a/db2/btree/btree_auto.c
+++ b/db2/btree/btree_auto.c
@@ -100,7 +100,6 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __bam_pg_alloc_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __bam_pg_alloc_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -265,7 +264,6 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __bam_pg_free_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __bam_pg_free_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -460,7 +458,6 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __bam_split_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __bam_split_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -657,7 +654,6 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __bam_rsplit_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -836,7 +832,6 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __bam_adj_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __bam_adj_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -995,7 +990,6 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __bam_cadjust_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __bam_cadjust_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -1145,7 +1139,6 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __bam_cdel_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __bam_cdel_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -1329,7 +1322,6 @@ int __bam_repl_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __bam_repl_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __bam_repl_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
diff --git a/db2/common/db_appinit.c b/db2/common/db_appinit.c
index 74ba9ff426..05fc7cc084 100644
--- a/db2/common/db_appinit.c
+++ b/db2/common/db_appinit.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_appinit.c	10.36 (Sleepycat) 10/28/97";
+static const char sccsid[] = "@(#)db_appinit.c	10.37 (Sleepycat) 11/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -108,10 +108,23 @@ db_appinit(db_home, db_config, dbenv, flags)
 		if ((ret = __db_parse(dbenv, *p)) != 0)
 			goto err;
 
-	/* Parse the config file. */
+	/*
+	 * Parse the config file.
+	 *
+	 * XXX
+	 * Don't use sprintf(3)/snprintf(3) -- the former is dangerous, and
+	 * the latter isn't standard, and we're manipulating strings handed
+	 * us by the application.
+	 */
 	if (dbenv->db_home != NULL) {
-		(void)snprintf(buf,
-		    sizeof(buf), "%s/DB_CONFIG", dbenv->db_home);
+#define	CONFIG_NAME	"/DB_CONFIG"
+		if (strlen(dbenv->db_home) +
+		    strlen(CONFIG_NAME) + 1 > sizeof(buf)) {
+			ret = ENAMETOOLONG;
+			goto err;
+		}
+		(void)strcpy(buf, dbenv->db_home);
+		(void)strcat(buf, CONFIG_NAME);
 		if ((fp = fopen(buf, "r")) != NULL) {
 			while (fgets(buf, sizeof(buf), fp) != NULL) {
 				if ((lp = strchr(buf, '\n')) != NULL)
diff --git a/db2/common/db_apprec.c b/db2/common/db_apprec.c
index ac0176d70f..188c6b9f95 100644
--- a/db2/common/db_apprec.c
+++ b/db2/common/db_apprec.c
@@ -11,7 +11,7 @@
 static const char copyright[] =
 "@(#) Copyright (c) 1997\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_apprec.c	10.18 (Sleepycat) 9/30/97";
+static const char sccsid[] = "@(#)db_apprec.c	10.19 (Sleepycat) 11/23/97";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -72,10 +72,8 @@ __db_apprec(dbenv, flags)
 	if (LF_ISSET(DB_RECOVER_FATAL))
 		first_flag = DB_FIRST;
 	else {
-		if ((ret = __log_findckp(lp, &lsn)) == DB_NOTFOUND) {
-			F_SET(lp, is_thread);
-			return (0);
-		}
+		if ((ret = __log_findckp(lp, &lsn)) == DB_NOTFOUND)
+			goto out;
 		first_flag = DB_SET;
 	}
 
@@ -88,7 +86,7 @@ __db_apprec(dbenv, flags)
 			    (u_long)lsn.file, (u_long)lsn.offset);
 		else
 			__db_err(dbenv, "Retrieving first LSN");
-		goto err;
+		goto out;
 	}
 
 	first_lsn = lsn;
@@ -99,7 +97,7 @@ __db_apprec(dbenv, flags)
 		if ((ret =
 		    log_get(dbenv->lg_info, &lsn, &data, DB_NEXT)) != 0) {
 			if (ret != DB_NOTFOUND)
-				goto err;
+				goto out;
 			break;
 		}
 	}
@@ -123,7 +121,7 @@ __db_apprec(dbenv, flags)
 			goto msgerr;
 	}
 	if (ret != 0 && ret != DB_NOTFOUND)
-		goto err;
+		goto out;
 
 	for (ret = log_get(lp, &lsn, &data, DB_NEXT);
 	    ret == 0; ret = log_get(lp, &lsn, &data, DB_NEXT)) {
@@ -134,7 +132,7 @@ __db_apprec(dbenv, flags)
 			goto msgerr;
 	}
 	if (ret != DB_NOTFOUND)
-		goto err;
+		goto out;
 
 	/* Now close all the db files that are open. */
 	__log_close_files(lp);
@@ -148,7 +146,7 @@ __db_apprec(dbenv, flags)
 	dbenv->tx_info->region->last_ckp = ckp_lsn;
 	dbenv->tx_info->region->time_ckp = (u_int32_t)now;
 	if ((ret = txn_checkpoint(dbenv->tx_info, 0, 0)) != 0)
-		goto err;
+		goto out;
 
 	if (dbenv->db_verbose) {
 		__db_err(lp->dbenv, "Recovery complete at %s", ctime(&now));
@@ -160,12 +158,13 @@ __db_apprec(dbenv, flags)
 		    (u_long)dbenv->tx_info->region->last_ckp.offset);
 	}
 
-	F_SET(lp, is_thread);
-	return (0);
+	if (0) {
+msgerr:		__db_err(dbenv, "Recovery function for LSN %lu %lu failed",
+		    (u_long)lsn.file, (u_long)lsn.offset);
+	}
 
-msgerr:	__db_err(dbenv, "Recovery function for LSN %lu %lu failed",
-	    (u_long)lsn.file, (u_long)lsn.offset);
+out:	F_SET(lp, is_thread);
+	__db_txnlist_end(txninfo);
 
-err:	F_SET(lp, is_thread);
 	return (ret);
 }
diff --git a/db2/common/db_err.c b/db2/common/db_err.c
index f3e7b40448..13f2cb5dc3 100644
--- a/db2/common/db_err.c
+++ b/db2/common/db_err.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_err.c	10.18 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)db_err.c	10.19 (Sleepycat) 11/9/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -302,10 +302,13 @@ __db_cgetchk(dbp, key, data, flags, isvalid)
 		break;
 	case DB_SET:
 		break;
-	case DB_SET_RECNO:
 	case DB_GET_RECNO:
 		if (!F_ISSET(dbp, DB_BT_RECNUM))
 			goto err;
+		break;
+	case DB_SET_RECNO:
+		if (!F_ISSET(dbp, DB_BT_RECNUM))
+			goto err;
 		check_key = 1;
 		break;
 	default:
@@ -313,8 +316,9 @@ err:		return (__db_ferr(dbp->dbenv, "c_get", 0));
 	}
 
 	/* Check for invalid key/data flags. */
-	DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
-	    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+	if (check_key)
+		DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
+		    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
 	DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
 	    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
 
diff --git a/db2/common/db_region.c b/db2/common/db_region.c
index 3e8cd2dc66..4fc603a2b0 100644
--- a/db2/common/db_region.c
+++ b/db2/common/db_region.c
@@ -43,7 +43,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_region.c	10.15 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)db_region.c	10.18 (Sleepycat) 11/28/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -80,14 +80,14 @@ static int __db_rmap __P((DB_ENV *, int, size_t, void *));
  * into memory, NULL on error.
  *
  * PUBLIC: int __db_rcreate __P((DB_ENV *, APPNAME,
- * PUBLIC:    const char *, const char *, int, size_t, int *, void *));
+ * PUBLIC:    const char *, const char *, int, size_t, int, int *, void *));
  */
 int
-__db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp)
+__db_rcreate(dbenv, appname, path, file, mode, size, oflags, fdp, retp)
 	DB_ENV *dbenv;
 	APPNAME appname;
 	const char *path, *file;
-	int mode, *fdp;
+	int mode, oflags, *fdp;
 	size_t size;
 	void *retp;
 {
@@ -110,12 +110,13 @@ __db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp)
 	/*
 	 * Now open the file. We need to make sure that multiple processes
 	 * that attempt to create the region at the same time are properly
-	 * ordered, so we open it O_EXCL and O_CREAT so two simultaneous
+	 * ordered, so we open it DB_EXCL and DB_CREATE so two simultaneous
 	 * attempts to create the region will return failure in one of the
 	 * attempts.
 	 */
-	if (fd == -1 && (ret = __db_open(name,
-	    DB_CREATE | DB_EXCL, DB_CREATE | DB_EXCL, mode, &fd)) != 0) {
+	oflags |= DB_CREATE | DB_EXCL;
+	if (fd == -1 &&
+	    (ret = __db_open(name, oflags, oflags, mode, &fd)) != 0) {
 		if (ret != EEXIST)
 			__db_err(dbenv,
 			    "region create: %s: %s", name, strerror(ret));
@@ -398,7 +399,7 @@ __db_runlink(dbenv, appname, path, file, force)
 
 	/* If the file doesn't exist, we're done. */
 	if (__db_exists(name, NULL))
-		return (0);		/* XXX: ENOENT? */
+		goto done;
 
 	/*
 	 * If we're called with a force flag, try and unlink the file.  This
@@ -412,8 +413,7 @@ __db_runlink(dbenv, appname, path, file, force)
 	if (force) {
 		if ((ret = __db_unlink(name)) != 0 && ret != ENOENT)
 			goto err1;
-		FREES(name);
-		return (0);
+		goto done;
 	}
 
 	/* Open and lock the region. */
@@ -453,7 +453,7 @@ __db_runlink(dbenv, appname, path, file, force)
 		(void)__db_sleep(0, 250000);
 	}
 	if (ret == 0) {
-		FREES(name);
+done:		FREES(name);
 		return (0);
 	}
 
@@ -467,6 +467,7 @@ __db_runlink(dbenv, appname, path, file, force)
 err2:	(void)__db_mutex_unlock(&rp->lock, fd);
 	(void)__db_rclose(dbenv, fd, rp);
 err1:	__db_err(dbenv, "region unlink: %s: %s", name, strerror(ret));
+
 	FREES(name);
 	return (ret);
 }
diff --git a/db2/config.h b/db2/config.h
index 27dbdaa439..7f784a0d9b 100644
--- a/db2/config.h
+++ b/db2/config.h
@@ -114,6 +114,9 @@
 /* Define if you have the strsep function.  */
 #define HAVE_STRSEP 1
 
+/* Define if you have the sysconf function.  */
+#define HAVE_SYSCONF 1
+
 /* Define if you have the vsnprintf function.  */
 #define HAVE_VSNPRINTF 1
 
diff --git a/db2/db.h b/db2/db.h
index fb2d6bb3da..f976acafb7 100644
--- a/db2/db.h
+++ b/db2/db.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db.h.src	10.91 (Sleepycat) 11/3/97
+ *	@(#)db.h.src	10.97 (Sleepycat) 11/28/97
  */
 
 #ifndef _DB_H_
@@ -73,8 +73,8 @@
 
 #define	DB_VERSION_MAJOR	2
 #define	DB_VERSION_MINOR	3
-#define	DB_VERSION_PATCH	12
-#define	DB_VERSION_STRING	"Sleepycat Software: DB 2.3.12: (11/3/97)"
+#define	DB_VERSION_PATCH	14
+#define	DB_VERSION_STRING	"Sleepycat Software: DB 2.3.14: (11/28/97)"
 
 typedef	u_int32_t	db_pgno_t;	/* Page number type. */
 typedef	u_int16_t	db_indx_t;	/* Page offset type. */
@@ -129,8 +129,10 @@ struct __db_dbt {
 };
 
 /*
- * DB configuration.  There are a set of functions which the application
- * can replace with its own versions.
+ * DB internal configuration.
+ *
+ * There are a set of functions that the application can replace with its
+ * own versions, and some other knobs which can be turned at run-time.
  */
 #define	DB_FUNC_CALLOC	 1		/* ANSI C calloc. */
 #define	DB_FUNC_CLOSE	 2		/* POSIX 1003.1 close. */
@@ -147,11 +149,12 @@ struct __db_dbt {
 #define	DB_FUNC_REALLOC	13		/* ANSI C realloc. */
 #define	DB_FUNC_SEEK	14		/* POSIX 1003.1 lseek. */
 #define	DB_FUNC_SLEEP	15		/* DB: sleep secs/usecs. */
-#define	DB_FUNC_STRDUP	16		/* ANSI C strdup. */
+#define	DB_FUNC_STRDUP	16		/* DB: strdup(3). */
 #define	DB_FUNC_UNLINK	17		/* POSIX 1003.1 unlink. */
 #define	DB_FUNC_UNMAP	18		/* DB: unmap shared memory file. */
 #define	DB_FUNC_WRITE	19		/* POSIX 1003.1 write. */
 #define	DB_FUNC_YIELD	20		/* DB: yield thread to scheduler. */
+#define	DB_TSL_SPINS	21		/* DB: initialize spin count. */
 
 /*
  * Database configuration and initialization.
@@ -211,10 +214,10 @@ struct __db_dbt {
  * locking subsystem.
  */
 #define	DB_LOCK_NORUN		0x0
-#define	DB_LOCK_DEFAULT		0x1
-#define	DB_LOCK_OLDEST		0x2
-#define	DB_LOCK_RANDOM		0x3
-#define	DB_LOCK_YOUNGEST	0x4
+#define	DB_LOCK_DEFAULT		0x1	/* Default policy. */
+#define	DB_LOCK_OLDEST		0x2	/* Abort oldest transaction. */
+#define	DB_LOCK_RANDOM		0x3	/* Abort random transaction. */
+#define	DB_LOCK_YOUNGEST	0x4	/* Abort youngest transaction. */
 
 struct __db_env {
 	int		 db_lorder;	/* Byte order. */
@@ -265,6 +268,10 @@ struct __db_env {
 /*******************************************************
  * Access methods.
  *******************************************************/
+/*
+ * XXX
+ * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
+ */
 typedef enum {
 	DB_BTREE=1,			/* B+tree. */
 	DB_HASH,			/* Extended Linear Hashing. */
@@ -347,7 +354,13 @@ struct __db_info {
 #define	DB_SET_RANGE	0x020000	/* c_get() */
 #define	DB_SET_RECNO	0x040000	/* c_get() */
 
-/* DB (user visible) error return codes. */
+/*
+ * DB (user visible) error return codes.
+ *
+ * XXX
+ * Changes to any of the user visible error return codes must be reflected
+ * in java/src/com/sleepycat/db/Db.java.
+ */
 #define	DB_INCOMPLETE		( -1)	/* Sync didn't finish. */
 #define	DB_KEYEMPTY		( -2)	/* The key/data pair was deleted or
 					   was never created by the user. */
@@ -516,6 +529,7 @@ int   db_appinit __P((const char *, char * const *, DB_ENV *, int));
 int   db_appexit __P((DB_ENV *));
 int   db_jump_set __P((void *, int));
 int   db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **));
+int   db_value_set __P((int, int));
 char *db_version __P((int *, int *, int *));
 #if defined(__cplusplus)
 };
@@ -533,16 +547,26 @@ char *db_version __P((int *, int *, int *));
 /* Flag values for lock_detect(). */
 #define	DB_LOCK_CONFLICT	0x01	/* Run on any conflict. */
 
-/* Request types. */
+/*
+ * Request types.
+ *
+ * XXX
+ * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
+ */
 typedef enum {
-	DB_LOCK_DUMP,			/* Display held locks. */
+	DB_LOCK_DUMP=0,			/* Display held locks. */
 	DB_LOCK_GET,			/* Get the lock. */
 	DB_LOCK_PUT,			/* Release the lock. */
 	DB_LOCK_PUT_ALL,		/* Release locker's locks. */
 	DB_LOCK_PUT_OBJ			/* Release locker's locks on obj. */
 } db_lockop_t;
 
-/* Simple R/W lock modes and for multi-granularity intention locking. */
+/*
+ * Simple R/W lock modes and for multi-granularity intention locking.
+ *
+ * XXX
+ * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
+ */
 typedef enum {
 	DB_LOCK_NG=0,			/* Not granted. */
 	DB_LOCK_READ,			/* Shared/read. */
@@ -577,7 +601,7 @@ extern const u_int8_t db_riw_conflicts[];
 extern "C" {
 #endif
 int	  lock_close __P((DB_LOCKTAB *));
-int	  lock_detect __P((DB_LOCKTAB *, int, u_int32_t));
+int	  lock_detect __P((DB_LOCKTAB *, int, int));
 int	  lock_get __P((DB_LOCKTAB *,
 	    u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *));
 int	  lock_id __P((DB_LOCKTAB *, u_int32_t *));
diff --git a/db2/db/db_auto.c b/db2/db/db_auto.c
index d40d964542..88bca7b583 100644
--- a/db2/db/db_auto.c
+++ b/db2/db/db_auto.c
@@ -122,7 +122,6 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __db_addrem_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __db_addrem_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -312,7 +311,6 @@ int __db_split_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __db_split_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __db_split_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -507,7 +505,6 @@ int __db_big_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __db_big_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __db_big_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -678,7 +675,6 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __db_ovref_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __db_ovref_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -842,7 +838,6 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __db_relink_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __db_relink_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -1005,7 +1000,6 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __db_addpage_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __db_addpage_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -1180,7 +1174,6 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __db_debug_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __db_debug_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -1335,7 +1328,6 @@ int __db_noop_log(logp, txnid, ret_lsnp, flags)
  * PUBLIC: int __db_noop_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __db_noop_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
diff --git a/db2/db/db_dispatch.c b/db2/db/db_dispatch.c
index a4bcdb7628..4f89d2b917 100644
--- a/db2/db/db_dispatch.c
+++ b/db2/db/db_dispatch.c
@@ -43,7 +43,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_dispatch.c	10.6 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)db_dispatch.c	10.7 (Sleepycat) 11/23/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -236,8 +236,8 @@ __db_txnlist_find(listp, txnid)
 	void *listp;
 	u_int32_t txnid;
 {
-	__db_txnlist *p;
 	__db_txnhead *hp;
+	__db_txnlist *p;
 
 	if ((hp = (struct __db_txnhead *)listp) == NULL)
 		return (DB_NOTFOUND);
@@ -255,12 +255,16 @@ __db_txnlist_find(listp, txnid)
 }
 
 #ifdef DEBUG
+/*
+ * __db_txnlist_print --
+ *	Print out the transaction list.
+ */
 void
 __db_txnlist_print(listp)
 	void *listp;
 {
-	__db_txnlist *p;
 	__db_txnhead *hp;
+	__db_txnlist *p;
 
 	hp = (struct __db_txnhead *)listp;
 	printf("Maxid: %lu\n", (u_long)hp->maxid);
@@ -268,3 +272,24 @@ __db_txnlist_print(listp)
 		printf("TXNID: %lu\n", (u_long)p->txnid);
 }
 #endif
+
+/*
+ * __db_txnlist_end --
+ *	Discard transaction linked list.
+ *
+ * PUBLIC: void __db_txnlist_end __P((void *));
+ */
+void
+__db_txnlist_end(listp)
+	void *listp;
+{
+	__db_txnhead *hp;
+	__db_txnlist *p;
+
+	hp = (struct __db_txnhead *)listp;
+	while ((p = LIST_FIRST(&hp->head)) != LIST_END(&hp->head)) {
+		LIST_REMOVE(p, links);
+		__db_free(p);
+	}
+	__db_free(listp);
+}
diff --git a/db2/db/db_ret.c b/db2/db/db_ret.c
index bcec308b95..65441aa45a 100644
--- a/db2/db/db_ret.c
+++ b/db2/db/db_ret.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_ret.c	10.8 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)db_ret.c	10.10 (Sleepycat) 11/28/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -116,35 +116,48 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc)
 	dbt->size = len;
 
 	/*
-	 * Allocate any necessary memory.
+	 * Allocate memory to be owned by the application: DB_DBT_MALLOC.
 	 *
-	 * XXX: Never allocate 0 bytes.
+	 * !!!
+	 * We always allocate memory, even if we're copying out 0 bytes. This
+	 * guarantees consistency, i.e., the application can always free memory
+	 * without concern as to how many bytes of the record were requested.
+	 *
+	 * XXX
+	 * Never allocate 0 bytes, it's known to make malloc/realloc unhappy.
+	 *
+	 * Use the memory specified by the application: DB_DBT_USERMEM.
+	 *
+	 * !!!
+	 * If the length we're going to copy is 0, the application-supplied
+	 * memory pointer is allowed to be NULL.
 	 */
 	if (F_ISSET(dbt, DB_DBT_MALLOC)) {
 		dbt->data = db_malloc == NULL ?
-		    (void *)__db_malloc(len + 1) :
+		    (void *)__db_malloc(len) :
 		    (void *)db_malloc(len + 1);
 		if (dbt->data == NULL)
 			return (ENOMEM);
 	} else if (F_ISSET(dbt, DB_DBT_USERMEM)) {
-		if (dbt->ulen < len)
+		if (len != 0 && (dbt->data == NULL || dbt->ulen < len))
 			return (ENOMEM);
 	} else if (memp == NULL || memsize == NULL) {
 		return (EINVAL);
 	} else {
-		if (*memsize == 0 || *memsize < len) {
+		if (len != 0 && (*memsize == 0 || *memsize < len)) {
 			*memp = *memp == NULL ?
-			    (void *)__db_malloc(len + 1) :
-			    (void *)__db_realloc(*memp, len + 1);
+			    (void *)__db_malloc(len) :
+			    (void *)__db_realloc(*memp, len);
 			if (*memp == NULL) {
 				*memsize = 0;
 				return (ENOMEM);
 			}
-			*memsize = len + 1;
+			*memsize = len;
 		}
 		dbt->data = *memp;
 	}
 
-	memcpy(dbt->data, data, len);
+	if (len != 0)
+		memcpy(dbt->data, data, len);
 	return (0);
 }
diff --git a/db2/db_int.h b/db2/db_int.h
index 1f6c790345..21460722a3 100644
--- a/db2/db_int.h
+++ b/db2/db_int.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_int.h.src	10.36 (Sleepycat) 10/31/97
+ *	@(#)db_int.h.src	10.37 (Sleepycat) 11/25/97
  */
 
 #ifndef _DB_INTERNAL_H_
@@ -168,6 +168,7 @@ typedef struct _db_mutex_t {
 	off_t	off;			/* Backing file offset. */
 	u_long	pid;			/* Lock holder: 0 or process pid. */
 #endif
+	u_int32_t spins;		/* Spins before block. */
 	u_int32_t mutex_set_wait;	/* Granted after wait. */
 	u_int32_t mutex_set_nowait;	/* Granted without waiting. */
 } db_mutex_t;
diff --git a/db2/dbm/dbm.c b/db2/dbm/dbm.c
index 1fa92ce1fa..bd7c7a6636 100644
--- a/db2/dbm/dbm.c
+++ b/db2/dbm/dbm.c
@@ -47,7 +47,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)dbm.c	10.6 (Sleepycat) 8/27/97";
+static const char sccsid[] = "@(#)dbm.c	10.7 (Sleepycat) 11/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -198,9 +198,20 @@ dbm_open(file, oflags, mode)
 	dbinfo.h_ffactor = 40;
 	dbinfo.h_nelem = 1;
 
-	(void)snprintf(path, sizeof(path), "%s%s", file, DBM_SUFFIX);
-	if ((__set_errno(db_open(path,
-	    DB_HASH, __db_oflags(oflags), mode, NULL, &dbinfo, &dbp))) != 0)
+	/*
+	 * XXX
+	 * Don't use sprintf(3)/snprintf(3) -- the former is dangerous, and
+	 * the latter isn't standard, and we're manipulating strings handed
+	 * us by the application.
+	 */
+	if (strlen(file) + strlen(DBM_SUFFIX) + 1 > sizeof(path)) {
+		errno = ENAMETOOLONG;
+		return (NULL);
+	}
+	(void)strcpy(path, file);
+	(void)strcat(path, DBM_SUFFIX);
+	if ((errno = db_open(path,
+	    DB_HASH, __db_oflags(oflags), mode, NULL, &dbinfo, &dbp)) != 0)
 		return (NULL);
 	return ((DBM *)dbp);
 }
@@ -261,7 +272,7 @@ dbm_firstkey(db)
 	DBC *cp;
 
 	if ((cp = TAILQ_FIRST(&db->curs_queue)) == NULL)
-		if ((__set_errno(db->cursor(db, NULL, &cp))) != 0) {
+		if ((errno = db->cursor(db, NULL, &cp)) != 0) {
 			memset(&key, 0, sizeof(key));
 			return (key);
 		}
@@ -294,7 +305,7 @@ dbm_nextkey(db)
 	int status;
 
 	if ((cp = TAILQ_FIRST(&db->curs_queue)) == NULL)
-		if ((__set_errno(db->cursor(db, NULL, &cp))) != 0) {
+		if ((errno = db->cursor(db, NULL, &cp)) != 0) {
 			memset(&key, 0, sizeof(key));
 			return (key);
 		}
@@ -330,9 +341,9 @@ dbm_delete(db, key)
 	_key.size = key.dsize;
 	ret = (((DB *)db)->del)((DB *)db, NULL, &_key, 0);
 	if (ret < 0)
-		__set_errno(ENOENT);
+		errno = ENOENT;
 	else if (ret > 0) {
-		__set_errno(ret);
+		errno = ret;
 		ret = -1;
 	}
 	return (ret);
diff --git a/db2/hash/hash_auto.c b/db2/hash/hash_auto.c
index 4820eb8611..787ee04ddb 100644
--- a/db2/hash/hash_auto.c
+++ b/db2/hash/hash_auto.c
@@ -119,7 +119,6 @@ int __ham_insdel_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __ham_insdel_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __ham_insdel_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -316,7 +315,6 @@ int __ham_newpage_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __ham_newpage_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __ham_newpage_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -479,7 +477,6 @@ int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __ham_splitmeta_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __ham_splitmeta_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -640,7 +637,6 @@ int __ham_splitdata_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __ham_splitdata_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __ham_splitdata_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -832,7 +828,6 @@ int __ham_replace_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __ham_replace_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __ham_replace_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -1034,7 +1029,6 @@ int __ham_newpgno_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __ham_newpgno_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __ham_newpgno_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -1203,7 +1197,6 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __ham_ovfl_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __ham_ovfl_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -1386,7 +1379,6 @@ int __ham_copypage_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __ham_copypage_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __ham_copypage_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
diff --git a/db2/include/btree_ext.h b/db2/include/btree_ext.h
index bbe0d971b2..46f2227bdd 100644
--- a/db2/include/btree_ext.h
+++ b/db2/include/btree_ext.h
@@ -11,7 +11,7 @@ int __bam_cursor __P((DB *, DB_TXN *, DBC **));
 int __bam_c_iclose __P((DB *, DBC *));
 int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
 int __bam_ovfl_chk __P((DB *, CURSOR *, u_int32_t, int));
-int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *));
+int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *, int));
 void __bam_ca_di __P((DB *, db_pgno_t, u_int32_t, int));
 void __bam_ca_dup __P((DB *,
    db_pgno_t, u_int32_t, u_int32_t, db_pgno_t, u_int32_t));
diff --git a/db2/include/common_ext.h b/db2/include/common_ext.h
index b814582abd..29bc9aa4e2 100644
--- a/db2/include/common_ext.h
+++ b/db2/include/common_ext.h
@@ -23,7 +23,7 @@ int __db_syncchk __P((const DB *, int));
 int __db_ferr __P((const DB_ENV *, const char *, int));
 u_int32_t __db_log2 __P((u_int32_t));
 int __db_rcreate __P((DB_ENV *, APPNAME,
-   const char *, const char *, int, size_t, int *, void *));
+   const char *, const char *, int, size_t, int, int *, void *));
 int __db_rinit __P((DB_ENV *, RLAYOUT *, int, size_t, int));
 int __db_ropen __P((DB_ENV *,
    APPNAME, const char *, const char *, int, int *, void *));
diff --git a/db2/include/db.h.src b/db2/include/db.h.src
index 3cc2bfd4fc..654eb16425 100644
--- a/db2/include/db.h.src
+++ b/db2/include/db.h.src
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db.h.src	10.91 (Sleepycat) 11/3/97
+ *	@(#)db.h.src	10.97 (Sleepycat) 11/28/97
  */
 
 #ifndef _DB_H_
@@ -73,8 +73,8 @@
 
 #define	DB_VERSION_MAJOR	2
 #define	DB_VERSION_MINOR	3
-#define	DB_VERSION_PATCH	12
-#define	DB_VERSION_STRING	"Sleepycat Software: DB 2.3.12: (11/3/97)"
+#define	DB_VERSION_PATCH	14
+#define	DB_VERSION_STRING	"Sleepycat Software: DB 2.3.14: (11/28/97)"
 
 typedef	u_int32_t	db_pgno_t;	/* Page number type. */
 typedef	u_int16_t	db_indx_t;	/* Page offset type. */
@@ -129,8 +129,10 @@ struct __db_dbt {
 };
 
 /*
- * DB configuration.  There are a set of functions which the application
- * can replace with its own versions.
+ * DB internal configuration.
+ *
+ * There are a set of functions that the application can replace with its
+ * own versions, and some other knobs which can be turned at run-time.
  */
 #define	DB_FUNC_CALLOC	 1		/* ANSI C calloc. */
 #define	DB_FUNC_CLOSE	 2		/* POSIX 1003.1 close. */
@@ -147,11 +149,12 @@ struct __db_dbt {
 #define	DB_FUNC_REALLOC	13		/* ANSI C realloc. */
 #define	DB_FUNC_SEEK	14		/* POSIX 1003.1 lseek. */
 #define	DB_FUNC_SLEEP	15		/* DB: sleep secs/usecs. */
-#define	DB_FUNC_STRDUP	16		/* ANSI C strdup. */
+#define	DB_FUNC_STRDUP	16		/* DB: strdup(3). */
 #define	DB_FUNC_UNLINK	17		/* POSIX 1003.1 unlink. */
 #define	DB_FUNC_UNMAP	18		/* DB: unmap shared memory file. */
 #define	DB_FUNC_WRITE	19		/* POSIX 1003.1 write. */
 #define	DB_FUNC_YIELD	20		/* DB: yield thread to scheduler. */
+#define	DB_TSL_SPINS	21		/* DB: initialize spin count. */
 
 /*
  * Database configuration and initialization.
@@ -211,10 +214,10 @@ struct __db_dbt {
  * locking subsystem.
  */
 #define	DB_LOCK_NORUN		0x0
-#define	DB_LOCK_DEFAULT		0x1
-#define	DB_LOCK_OLDEST		0x2
-#define	DB_LOCK_RANDOM		0x3
-#define	DB_LOCK_YOUNGEST	0x4
+#define	DB_LOCK_DEFAULT		0x1	/* Default policy. */
+#define	DB_LOCK_OLDEST		0x2	/* Abort oldest transaction. */
+#define	DB_LOCK_RANDOM		0x3	/* Abort random transaction. */
+#define	DB_LOCK_YOUNGEST	0x4	/* Abort youngest transaction. */
 
 struct __db_env {
 	int		 db_lorder;	/* Byte order. */
@@ -265,6 +268,10 @@ struct __db_env {
 /*******************************************************
  * Access methods.
  *******************************************************/
+/*
+ * XXX
+ * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
+ */
 typedef enum {
 	DB_BTREE=1,			/* B+tree. */
 	DB_HASH,			/* Extended Linear Hashing. */
@@ -347,7 +354,13 @@ struct __db_info {
 #define	DB_SET_RANGE	0x020000	/* c_get() */
 #define	DB_SET_RECNO	0x040000	/* c_get() */
 
-/* DB (user visible) error return codes. */
+/*
+ * DB (user visible) error return codes.
+ *
+ * XXX
+ * Changes to any of the user visible error return codes must be reflected
+ * in java/src/com/sleepycat/db/Db.java.
+ */
 #define	DB_INCOMPLETE		( -1)	/* Sync didn't finish. */
 #define	DB_KEYEMPTY		( -2)	/* The key/data pair was deleted or
 					   was never created by the user. */
@@ -516,6 +529,7 @@ int   db_appinit __P((const char *, char * const *, DB_ENV *, int));
 int   db_appexit __P((DB_ENV *));
 int   db_jump_set __P((void *, int));
 int   db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **));
+int   db_value_set __P((int, int));
 char *db_version __P((int *, int *, int *));
 #if defined(__cplusplus)
 };
@@ -533,16 +547,26 @@ char *db_version __P((int *, int *, int *));
 /* Flag values for lock_detect(). */
 #define	DB_LOCK_CONFLICT	0x01	/* Run on any conflict. */
 
-/* Request types. */
+/*
+ * Request types.
+ *
+ * XXX
+ * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
+ */
 typedef enum {
-	DB_LOCK_DUMP,			/* Display held locks. */
+	DB_LOCK_DUMP=0,			/* Display held locks. */
 	DB_LOCK_GET,			/* Get the lock. */
 	DB_LOCK_PUT,			/* Release the lock. */
 	DB_LOCK_PUT_ALL,		/* Release locker's locks. */
 	DB_LOCK_PUT_OBJ			/* Release locker's locks on obj. */
 } db_lockop_t;
 
-/* Simple R/W lock modes and for multi-granularity intention locking. */
+/*
+ * Simple R/W lock modes and for multi-granularity intention locking.
+ *
+ * XXX
+ * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
+ */
 typedef enum {
 	DB_LOCK_NG=0,			/* Not granted. */
 	DB_LOCK_READ,			/* Shared/read. */
@@ -577,7 +601,7 @@ extern const u_int8_t db_riw_conflicts[];
 extern "C" {
 #endif
 int	  lock_close __P((DB_LOCKTAB *));
-int	  lock_detect __P((DB_LOCKTAB *, int, u_int32_t));
+int	  lock_detect __P((DB_LOCKTAB *, int, int));
 int	  lock_get __P((DB_LOCKTAB *,
 	    u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *));
 int	  lock_id __P((DB_LOCKTAB *, u_int32_t *));
diff --git a/db2/include/db_cxx.h b/db2/include/db_cxx.h
index 01d1231092..83523c5559 100644
--- a/db2/include/db_cxx.h
+++ b/db2/include/db_cxx.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_cxx.h	10.12 (Sleepycat) 10/25/97
+ *	@(#)db_cxx.h	10.13 (Sleepycat) 11/25/97
  */
 
 #ifndef _DB_CXX_H_
@@ -202,7 +202,7 @@ class _exported DbLockTab
 friend DbEnv;
 public:
     int close();
-    int detect(int atype, u_int32_t flags);
+    int detect(int flags, int atype);
     int get(u_int32_t locker, int flags, const Dbt *obj,
             db_lockmode_t lock_mode, DbLock *lock);
     int id(u_int32_t *idp);
diff --git a/db2/include/db_ext.h b/db2/include/db_ext.h
index f9b3b3a214..15eeaf50a3 100644
--- a/db2/include/db_ext.h
+++ b/db2/include/db_ext.h
@@ -65,6 +65,7 @@ int __db_add_recovery __P((DB_ENV *,
 int __db_txnlist_init __P((void *));
 int __db_txnlist_add __P((void *, u_int32_t));
 int __db_txnlist_find __P((void *, u_int32_t));
+void __db_txnlist_end __P((void *));
 int __db_dput __P((DB *,
    DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **)));
 int __db_drem __P((DB *,
diff --git a/db2/include/db_int.h.src b/db2/include/db_int.h.src
index abd93a6e8e..03a882fded 100644
--- a/db2/include/db_int.h.src
+++ b/db2/include/db_int.h.src
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_int.h.src	10.36 (Sleepycat) 10/31/97
+ *	@(#)db_int.h.src	10.37 (Sleepycat) 11/25/97
  */
 
 #ifndef _DB_INTERNAL_H_
@@ -168,6 +168,7 @@ typedef struct _db_mutex_t {
 	off_t	off;			/* Backing file offset. */
 	u_long	pid;			/* Lock holder: 0 or process pid. */
 #endif
+	u_int32_t spins;		/* Spins before block. */
 	u_int32_t mutex_set_wait;	/* Granted after wait. */
 	u_int32_t mutex_set_nowait;	/* Granted without waiting. */
 } db_mutex_t;
diff --git a/db2/include/lock.h b/db2/include/lock.h
index 8a927f076e..5031b65d06 100644
--- a/db2/include/lock.h
+++ b/db2/include/lock.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)lock.h	10.9 (Sleepycat) 10/25/97
+ *	@(#)lock.h	10.10 (Sleepycat) 11/13/97
  */
 
 typedef struct __db_lockobj	DB_LOCKOBJ;
@@ -85,10 +85,14 @@ struct __db_lockobj {
 	} dlinks;
 #define	DB_LOCK_OBJTYPE		1
 #define	DB_LOCK_LOCKER		2
+					/* Allocate room in the object to
+					 * hold typical DB lock structures
+					 * so that we do not have to
+					 * allocate them from shalloc. */
+	u_int8_t objdata[sizeof(struct __db_ilock)];
 	u_int8_t type;			/* Real object or locker id. */
 };
 
-
 #define dd_id	wlinks._dd_id
 #define	waiters	wlinks._waiters
 #define	holders	dlinks._holders
diff --git a/db2/include/log.h b/db2/include/log.h
index a192a38136..405daf4148 100644
--- a/db2/include/log.h
+++ b/db2/include/log.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)log.h	10.15 (Sleepycat) 11/2/97
+ *	@(#)log.h	10.16 (Sleepycat) 11/9/97
  */
 
 #ifndef _LOG_H_
@@ -117,20 +117,35 @@ struct __log {
 
 	SH_TAILQ_HEAD(__fq) fq;		/* List of file names. */
 
+	/*
+	 * The lsn LSN is the file offset that we're about to write and which
+	 * we will return to the user.
+	 */
 	DB_LSN	  lsn;			/* LSN at current file offset. */
-	DB_LSN	  c_lsn;		/* LSN of the last checkpoint. */
+
+	/*
+	 * The s_lsn LSN is the last LSN that we know is on disk, not just
+	 * written, by synced.
+	 */
 	DB_LSN	  s_lsn;		/* LSN of the last sync. */
-	DB_LSN	  uw_lsn;		/* LSN of 1st rec not fully on disk. */
 
 	u_int32_t len;			/* Length of the last record. */
 
-	size_t	  b_off;		/* Current offset in the buffer. */
 	u_int32_t w_off;		/* Current write offset in the file. */
 
+	DB_LSN	  c_lsn;		/* LSN of the last checkpoint. */
 	time_t	  chkpt;		/* Time of the last checkpoint. */
 
 	DB_LOG_STAT stat;		/* Log statistics. */
 
+	/*
+	 * The f_lsn LSN is the LSN (returned to the user) that "owns" the
+	 * first byte of the buffer.  If the record associated with the LSN
+	 * spans buffers, it may not reflect the physical file location of
+	 * the first byte of the buffer.
+	 */
+	DB_LSN	  f_lsn;		/* LSN of first byte in the buffer. */
+	size_t	  b_off;		/* Current offset in the buffer. */
 	u_int8_t buf[4 * 1024];		/* Log buffer. */
 };
 
diff --git a/db2/include/mp.h b/db2/include/mp.h
index f68f42b144..f108246f2c 100644
--- a/db2/include/mp.h
+++ b/db2/include/mp.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)mp.h	10.19 (Sleepycat) 10/25/97
+ *	@(#)mp.h	10.22 (Sleepycat) 11/28/97
  */
 
 struct __bh;		typedef struct __bh BH;
@@ -23,7 +23,6 @@ struct __mpoolfile;	typedef struct __mpoolfile MPOOLFILE;
 #define	DB_CACHESIZE_MIN	( 20 * 1024)
 
 #define	INVALID		0		/* Invalid shared memory offset. */
-#define	TEMPORARY	"<tmp>"		/* Temporary file name. */
 
 /*
  * There are three ways we do locking in the mpool code:
@@ -122,10 +121,10 @@ struct __db_mpool {
 
 	int	    fd;			/* Underlying mmap'd fd. */
 
-
 #define	MP_ISPRIVATE	0x01		/* Private, so local memory. */
 #define	MP_LOCKHANDLE	0x02		/* Threaded, lock handles and region. */
 #define	MP_LOCKREGION	0x04		/* Concurrent access, lock region. */
+#define	MP_MALLOC	0x08		/* If region in allocated memory. */
 	u_int32_t  flags;
 };
 
@@ -157,7 +156,6 @@ struct __db_mpoolfile {
 /* These fields are not protected. */
 	TAILQ_ENTRY(__db_mpoolfile) q;	/* Linked list of DB_MPOOLFILE's. */
 
-	char	  *path;		/* Initial file path. */
 	DB_MPOOL  *dbmp;		/* Overlying DB_MPOOL. */
 	MPOOLFILE *mfp;			/* Underlying MPOOLFILE. */
 
@@ -165,11 +163,9 @@ struct __db_mpoolfile {
 	size_t	   len;			/* Length of mmap'd region. */
 
 /* These fields need to be protected for multi-threaded support. */
-#define	MP_PATH_ALLOC	0x001		/* Path is allocated memory. */
-#define	MP_PATH_TEMP	0x002		/* Backing file is a temporary. */
-#define	MP_READONLY	0x004		/* File is readonly. */
-#define	MP_UPGRADE	0x008		/* File descriptor is readwrite. */
-#define	MP_UPGRADE_FAIL	0x010		/* Upgrade wasn't possible. */
+#define	MP_READONLY	0x01		/* File is readonly. */
+#define	MP_UPGRADE	0x02		/* File descriptor is readwrite. */
+#define	MP_UPGRADE_FAIL	0x04		/* Upgrade wasn't possible. */
 	u_int32_t  flags;
 };
 
@@ -220,12 +216,9 @@ struct __mpoolfile {
 	u_int32_t ref;			/* Reference count. */
 
 	int	  ftype;		/* File type. */
-	int	  can_mmap;		/* If the file can be mmap'd. */
-
 	int	  lsn_off;		/* Page's LSN offset. */
 
 	size_t	  path_off;		/* File name location. */
-
 	size_t	  fileid_off;		/* File identification location. */
 
 	size_t	  pgcookie_len;		/* Pgin/pgout cookie length. */
@@ -233,6 +226,12 @@ struct __mpoolfile {
 
 	int	  lsn_cnt;		/* Checkpoint buffers left to write. */
 
+	db_pgno_t last_pgno;		/* Last page in the file. */
+
+#define	MP_CAN_MMAP	0x01		/* If the file can be mmap'd. */
+#define	MP_TEMP		0x02		/* Backing file is a temporary. */
+	u_int32_t  flags;
+
 	DB_MPOOL_FSTAT stat;		/* Per-file mpool statistics. */
 };
 
diff --git a/db2/include/mp_ext.h b/db2/include/mp_ext.h
index 49d86ba2e5..b78b3423cd 100644
--- a/db2/include/mp_ext.h
+++ b/db2/include/mp_ext.h
@@ -5,8 +5,10 @@ int __memp_pgread __P((DB_MPOOLFILE *, BH *, int));
 int __memp_pgwrite __P((DB_MPOOLFILE *, BH *, int *, int *));
 int __memp_pg __P((DB_MPOOLFILE *, BH *, int));
 void __memp_bhfree __P((DB_MPOOL *, MPOOLFILE *, BH *, int));
-int __memp_fopen __P((DB_MPOOL *, const char *, int, int,
-   int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
+int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, int,
+   int, int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
+char * __memp_fn __P((DB_MPOOLFILE *));
+char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *));
 void __memp_debug __P((DB_MPOOL *, FILE *, int));
 int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *));
 int __memp_ropen
diff --git a/db2/include/os_ext.h b/db2/include/os_ext.h
index e48a1e9407..2edf2e257d 100644
--- a/db2/include/os_ext.h
+++ b/db2/include/os_ext.h
@@ -1,5 +1,8 @@
 /* DO NOT EDIT: automatically built by dist/distrib. */
 int __db_abspath __P((const char *));
+void *__db_calloc __P((size_t, size_t));
+void *__db_malloc __P((size_t));
+void *__db_realloc __P((void *, size_t));
 int __os_dirlist __P((const char *, char ***, int *));
 void __os_dirfree __P((char **, int));
 int __db_fileid __P((DB_ENV *, const char *, int, u_int8_t *));
@@ -14,6 +17,7 @@ int __db_read __P((int, void *, size_t, ssize_t *));
 int __db_write __P((int, void *, size_t, ssize_t *));
 int __os_seek __P((int, size_t, db_pgno_t, u_long, int));
 int __os_sleep __P((u_long, u_long));
+int __os_spin __P((void));
 int __os_exists __P((const char *, int *));
 int __os_ioinfo __P((const char *, int, off_t *, off_t *));
 int __db_unlink __P((const char *));
diff --git a/db2/include/os_func.h b/db2/include/os_func.h
index 0a72942903..54b64ffaa2 100644
--- a/db2/include/os_func.h
+++ b/db2/include/os_func.h
@@ -4,12 +4,11 @@
  * Copyright (c) 1997
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)os_func.h	10.2 (Sleepycat) 10/28/97
+ *	@(#)os_func.h	10.4 (Sleepycat) 11/28/97
  */
 
 /* Calls which can be replaced by the application. */
 struct __db_jumptab {
-	void   *(*db_calloc) __P((size_t, size_t));	/* DB_FUNC_CALLOC */
 	int	(*db_close) __P((int));			/* DB_FUNC_CLOSE */
 	void	(*db_dirfree) __P((char **, int));	/* DB_FUNC_DIRFREE */
 	int	(*db_dirlist)				/* DB_FUNC_DIRLIST */
@@ -54,7 +53,6 @@ extern struct __db_jumptab __db_jump;
  * part of DB is the only code that should use the __os_XXX names, all other
  * parts of DB should be calling __db_XXX functions.
  */
-#define	__db_calloc	__db_jump.db_calloc
 #define	__os_close	__db_jump.db_close	/* __db_close is a wrapper. */
 #define	__db_dirfree	__db_jump.db_dirfree
 #define	__db_dirlist	__db_jump.db_dirlist
@@ -62,11 +60,9 @@ extern struct __db_jumptab __db_jump;
 #define	__db_free	__db_jump.db_free
 #define	__os_fsync	__db_jump.db_fsync	/* __db_fsync is a wrapper. */
 #define	__db_ioinfo	__db_jump.db_ioinfo
-#define	__db_malloc	__db_jump.db_malloc
 #define	__db_map	__db_jump.db_map
 #define	__os_open	__db_jump.db_open	/* __db_open is a wrapper. */
 #define	__os_read	__db_jump.db_read	/* __db_read is a wrapper. */
-#define	__db_realloc	__db_jump.db_realloc
 #define	__db_seek	__db_jump.db_seek
 #define	__db_sleep	__db_jump.db_sleep
 #define	__db_strdup	__db_jump.db_strdup
diff --git a/db2/lock/lock.c b/db2/lock/lock.c
index f1223a9fa6..9b1cbc8a08 100644
--- a/db2/lock/lock.c
+++ b/db2/lock/lock.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)lock.c	10.38 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)lock.c	10.41 (Sleepycat) 11/28/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -91,7 +91,7 @@ __lock_create(path, mode, dbenv)
 	if ((ret =
 	    __db_rcreate(dbenv, DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, mode,
 	    LOCK_REGION_SIZE(lock_modes, maxlocks, __db_tablesize(maxlocks)),
-	    &fd, &lrp)) != 0)
+	    0, &fd, &lrp)) != 0)
 		return (ret);
 
 	/* Region exists; now initialize it. */
@@ -600,7 +600,9 @@ __lock_put_internal(lt, lockp, do_all)
 	if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL) {
 		HASHREMOVE_EL(lt->hashtab, __db_lockobj,
 		    links, sh_obj, lt->region->table_size, __lock_lhash);
-		__db_shalloc_free(lt->mem, SH_DBT_PTR(&sh_obj->lockobj));
+		if (sh_obj->lockobj.size > sizeof(sh_obj->objdata))
+			__db_shalloc_free(lt->mem,
+			    SH_DBT_PTR(&sh_obj->lockobj));
 		SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, sh_obj, links,
 		    __db_lockobj);
 		state_changed = 1;
@@ -633,7 +635,7 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
 	DB_LOCKOBJ *sh_obj, *sh_locker;
 	DB_LOCKREGION *lrp;
 	size_t newl_off;
-	int ret;
+	int ihold, ret;
 
 	ret = 0;
 	/*
@@ -680,29 +682,40 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
 	 * new lock if it does not conflict with anyone on the holders list
 	 * OR anyone on the waiters list.  The reason that we don't grant if
 	 * there's a conflict is that this can lead to starvation (a writer
-	 * waiting on a popularly read item will never ben granted).  The
+	 * waiting on a popularly read item will never be granted).  The
 	 * downside of this is that a waiting reader can prevent an upgrade
-	 * from reader to writer, which is not uncommon.  In case of conflict,
-	 * we put the new lock on the end of the waiters list.
+	 * from reader to writer, which is not uncommon.
+	 *
+	 * There is one exception to the no-conflict rule.  If a lock is held
+	 * by the requesting locker AND the new lock does not conflict with
+	 * any other holders, then we grant the lock.  The most common place
+	 * this happens is when the holder has a WRITE lock and a READ lock
+	 * request comes in for the same locker.  If we do not grant the read
+	 * lock, then we guarantee deadlock.
+	 *
+	 * In case of conflict, we put the new lock on the end of the waiters
+	 * list.
 	 */
+	ihold = 0;
 	for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
 	    lp != NULL;
 	    lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
-		if (CONFLICTS(lt, lp->mode, lock_mode) &&
-		    locker != lp->holder)
+		if (locker == lp->holder) {
+			if (lp->mode == lock_mode &&
+			    lp->status == DB_LSTAT_HELD) {
+				/* Lock is held, just inc the ref count. */
+				lp->refcount++;
+				SH_TAILQ_INSERT_HEAD(&lrp->free_locks,
+				    newl, links, __db_lock);
+				*lockp = lp;
+				return (0);
+			} else
+				ihold = 1;
+		} else if (CONFLICTS(lt, lp->mode, lock_mode))
 			break;
-		else if (lp->holder == locker && lp->mode == lock_mode &&
-		    lp->status == DB_LSTAT_HELD) {
-			/* Lock is already held, just inc the ref count. */
-			lp->refcount++;
-			SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links,
-			    __db_lock);
-			*lockp = lp;
-			return (0);
-		}
     	}
 
-	if (lp == NULL)
+	if (lp == NULL && !ihold)
 		for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
 		    lp != NULL;
 		    lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
@@ -1261,25 +1274,37 @@ __lock_getobj(lt, locker, dbt, type, objp)
 	 */
 	if (sh_obj == NULL) {
 		/* Create new object and then insert it into hash table. */
-		if ((sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj))
-		    == NULL) {
+		if ((sh_obj =
+		    SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj)) == NULL) {
 			if ((ret = __lock_grow_region(lt, DB_LOCK_OBJ, 0)) != 0)
 				return (ret);
 			lrp = lt->region;
 			sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
 		}
-		if ((ret = __db_shalloc(lt->mem, obj_size, 0, &p)) != 0) {
-			if ((ret = __lock_grow_region(lt,
-			    DB_LOCK_MEM, obj_size)) != 0)
-				return (ret);
-			lrp = lt->region;
-			/* Reacquire the head of the list. */
-			sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
-			(void)__db_shalloc(lt->mem, obj_size, 0, &p);
-		}
-		sh_obj->type = type;
+
+		/*
+		 * If we can fit this object in the structure, do so instead
+		 * of shalloc-ing space for it.
+		 */
+		if (obj_size <= sizeof(sh_obj->objdata))
+			p = sh_obj->objdata;
+		else
+			if ((ret =
+			    __db_shalloc(lt->mem, obj_size, 0, &p)) != 0) {
+				if ((ret = __lock_grow_region(lt,
+				    DB_LOCK_MEM, obj_size)) != 0)
+					return (ret);
+				lrp = lt->region;
+				/* Reacquire the head of the list. */
+				sh_obj = SH_TAILQ_FIRST(&lrp->free_objs,
+				    __db_lockobj);
+				(void)__db_shalloc(lt->mem, obj_size, 0, &p);
+			}
+
 		src = type == DB_LOCK_OBJTYPE ? dbt->data : (void *)&locker;
 		memcpy(p, src, obj_size);
+
+		sh_obj->type = type;
 		SH_TAILQ_REMOVE(&lrp->free_objs, sh_obj, links, __db_lockobj);
 
 		SH_TAILQ_INIT(&sh_obj->waiters);
@@ -1329,7 +1354,8 @@ __lock_freeobj(lt, obj)
 {
 	HASHREMOVE_EL(lt->hashtab,
 	    __db_lockobj, links, obj, lt->region->table_size, __lock_lhash);
-	__db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj));
+	if (obj->lockobj.size > sizeof(obj->objdata))
+		__db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj));
 	SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, obj, links, __db_lockobj);
 }
 
diff --git a/db2/lock/lock_deadlock.c b/db2/lock/lock_deadlock.c
index 566021fe89..93c438ca36 100644
--- a/db2/lock/lock_deadlock.c
+++ b/db2/lock/lock_deadlock.c
@@ -11,7 +11,7 @@
 static const char copyright[] =
 "@(#) Copyright (c) 1997\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)lock_deadlock.c	10.25 (Sleepycat) 11/1/97";
+static const char sccsid[] = "@(#)lock_deadlock.c	10.26 (Sleepycat) 11/25/97";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -66,8 +66,7 @@ static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t));
 int
 lock_detect(lt, flags, atype)
 	DB_LOCKTAB *lt;
-	int flags;
-	u_int32_t atype;
+	int flags, atype;
 {
 	DB_ENV *dbenv;
 	locker_info *idmap;
diff --git a/db2/log/log.c b/db2/log/log.c
index 17681f8e0f..a9bf7a95ab 100644
--- a/db2/log/log.c
+++ b/db2/log/log.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log.c	10.33 (Sleepycat) 11/2/97";
+static const char sccsid[] = "@(#)log.c	10.34 (Sleepycat) 11/28/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -91,7 +91,7 @@ log_open(path, flags, mode, dbenv, lpp)
 	retry_cnt = newregion = 0;
 retry:	if (LF_ISSET(DB_CREATE)) {
 		ret = __db_rcreate(dbenv, DB_APP_LOG, path,
-		    DB_DEFAULT_LOG_FILE, mode, len, &fd, &dblp->maddr);
+		    DB_DEFAULT_LOG_FILE, mode, len, 0, &fd, &dblp->maddr);
 		if (ret == 0) {
 			/* Put the LOG structure first in the region. */
 			lp = dblp->maddr;
diff --git a/db2/log/log_archive.c b/db2/log/log_archive.c
index 140ea31fd1..0248e2815c 100644
--- a/db2/log/log_archive.c
+++ b/db2/log/log_archive.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_archive.c	10.28 (Sleepycat) 10/28/97";
+static const char sccsid[] = "@(#)log_archive.c	10.29 (Sleepycat) 11/12/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -360,9 +360,9 @@ __absname(pref, name, newnamep)
  *	If the user has their own malloc routine, use it.
  */
 static int
-__usermem(listp, func)
+__usermem(listp, cmpfunc)
 	char ***listp;
-	void *(*func) __P((size_t));
+	void *(*cmpfunc) __P((size_t));
 {
 	size_t len;
 	char **array, **arrayp, **orig, *strp;
@@ -378,10 +378,10 @@ __usermem(listp, func)
 	 * XXX
 	 * Don't simplify this expression, SunOS compilers don't like it.
 	 */
-	if (func == NULL)
+	if (cmpfunc == NULL)
 		array = (char **)__db_malloc(len);
 	else
-		array = (char **)func(len);
+		array = (char **)cmpfunc(len);
 	if (array == NULL)
 		return (ENOMEM);
 	strp = (char *)(array + (orig - *listp) + 1);
diff --git a/db2/log/log_auto.c b/db2/log/log_auto.c
index d5dbfe1f5f..61626b090e 100644
--- a/db2/log/log_auto.c
+++ b/db2/log/log_auto.c
@@ -102,7 +102,6 @@ int __log_register_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __log_register_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __log_register_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -250,7 +249,6 @@ int __log_unregister_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __log_unregister_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __log_unregister_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
diff --git a/db2/log/log_get.c b/db2/log/log_get.c
index ed35d57f82..2d1512c6b9 100644
--- a/db2/log/log_get.c
+++ b/db2/log/log_get.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_get.c	10.21 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)log_get.c	10.22 (Sleepycat) 11/22/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -44,24 +44,21 @@ log_get(dblp, alsn, dbt, flags)
 	/* Validate arguments. */
 #define	OKFLAGS	(DB_CHECKPOINT | \
     DB_CURRENT | DB_FIRST | DB_LAST | DB_NEXT | DB_PREV | DB_SET)
-	if (flags != 0) {
-		if ((ret =
-		    __db_fchk(dblp->dbenv, "log_get", flags, OKFLAGS)) != 0)
-			return (ret);
-		switch (flags) {
-		case DB_CHECKPOINT:
-		case DB_CURRENT:
-		case DB_FIRST:
-		case DB_LAST:
-		case DB_NEXT:
-		case DB_PREV:
-		case DB_SET:
-		case 0:
-			break;
-		default:
-			return (__db_ferr(dblp->dbenv, "log_get", 1));
-		}
+	if ((ret = __db_fchk(dblp->dbenv, "log_get", flags, OKFLAGS)) != 0)
+		return (ret);
+	switch (flags) {
+	case DB_CHECKPOINT:
+	case DB_CURRENT:
+	case DB_FIRST:
+	case DB_LAST:
+	case DB_NEXT:
+	case DB_PREV:
+	case DB_SET:
+		break;
+	default:
+		return (__db_ferr(dblp->dbenv, "log_get", 1));
 	}
+
 	if (F_ISSET(dblp, DB_AM_THREAD)) {
 		if (LF_ISSET(DB_NEXT | DB_PREV | DB_CURRENT))
 			return (__db_ferr(dblp->dbenv, "log_get", 1));
diff --git a/db2/log/log_put.c b/db2/log/log_put.c
index 92d9563301..42fec88a7d 100644
--- a/db2/log/log_put.c
+++ b/db2/log/log_put.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_put.c	10.20 (Sleepycat) 11/2/97";
+static const char sccsid[] = "@(#)log_put.c	10.22 (Sleepycat) 11/12/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -28,10 +28,10 @@ static const char sccsid[] = "@(#)log_put.c	10.20 (Sleepycat) 11/2/97";
 #include "hash.h"
 #include "common_ext.h"
 
-static int __log_fill __P((DB_LOG *, void *, u_int32_t));
+static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t));
 static int __log_flush __P((DB_LOG *, const DB_LSN *));
 static int __log_newfd __P((DB_LOG *));
-static int __log_putr __P((DB_LOG *, const DBT *, u_int32_t));
+static int __log_putr __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
 static int __log_write __P((DB_LOG *, void *, u_int32_t));
 
 /*
@@ -117,12 +117,13 @@ __log_put(dblp, lsn, dbt, flags)
 
 		/* Reset the file write offset. */
 		lp->w_off = 0;
-
-		/* Reset the first-unwritten LSN for the buffer. */
-		lp->uw_lsn = lp->lsn;
 	} else
 		lastoff = 0;
 
+	/* Initialize the LSN information returned to the user. */
+	lsn->file = lp->lsn.file;
+	lsn->offset = lp->lsn.offset;
+
 	/*
 	 * Insert persistent information as the first record in every file.
 	 * Note that the previous length is wrong for the very first record
@@ -131,17 +132,17 @@ __log_put(dblp, lsn, dbt, flags)
 	if (lp->lsn.offset == 0) {
 		t.data = &lp->persist;
 		t.size = sizeof(LOGP);
-		if ((ret = __log_putr(dblp,
+		if ((ret = __log_putr(dblp, lsn,
 		    &t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0)
 			return (ret);
-	}
 
-	/* Initialize the LSN information returned to the user. */
-	lsn->file = lp->lsn.file;
-	lsn->offset = lp->lsn.offset;
+		/* Update the LSN information returned to the user. */
+		lsn->file = lp->lsn.file;
+		lsn->offset = lp->lsn.offset;
+	}
 
-	/* Put out the user's record. */
-	if ((ret = __log_putr(dblp, dbt, lp->lsn.offset - lp->len)) != 0)
+	/* Write the application's log record. */
+	if ((ret = __log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len)) != 0)
 		return (ret);
 
 	/*
@@ -184,19 +185,6 @@ __log_put(dblp, lsn, dbt, flags)
 		(void)time(&lp->chkpt);
 		lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0;
 	}
-
-	/*
-	 * When an application calls the log_flush routine, we need to figure
-	 * out if the current buffer needs to be flushed.  The problem is that
-	 * if a record spans buffers, it's possible for the record continued
-	 * in the current buffer to have begun in a previous buffer.  Each time
-	 * we write a buffer, we update the first-unwritten LSN to point to the
-	 * first LSN after that written buffer.  If we have a spanning record,
-	 * correct that value to be the LSN that started it all, here.
-	 */
-	if (lsn->offset < lp->w_off && lsn->offset + lp->len > lp->w_off)
-		lp->uw_lsn = *lsn;
-
 	return (0);
 }
 
@@ -205,8 +193,9 @@ __log_put(dblp, lsn, dbt, flags)
  *	Actually put a record into the log.
  */
 static int
-__log_putr(dblp, dbt, prev)
+__log_putr(dblp, lsn, dbt, prev)
 	DB_LOG *dblp;
+	DB_LSN *lsn;
 	const DBT *dbt;
 	u_int32_t prev;
 {
@@ -225,15 +214,15 @@ __log_putr(dblp, dbt, prev)
 	hdr.len = sizeof(HDR) + dbt->size;
 	hdr.cksum = __ham_func4(dbt->data, dbt->size);
 
-	if ((ret = __log_fill(dblp, &hdr, sizeof(HDR))) != 0)
+	if ((ret = __log_fill(dblp, lsn, &hdr, sizeof(HDR))) != 0)
 		return (ret);
+	lp->len = sizeof(HDR);
 	lp->lsn.offset += sizeof(HDR);
 
-	if ((ret = __log_fill(dblp, dbt->data, dbt->size)) != 0)
+	if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0)
 		return (ret);
+	lp->len += dbt->size;
 	lp->lsn.offset += dbt->size;
-
-	lp->len = sizeof(HDR) + dbt->size;
 	return (0);
 }
 
@@ -266,7 +255,7 @@ __log_flush(dblp, lsn)
 {
 	DB_LSN t_lsn;
 	LOG *lp;
-	int ret;
+	int current, ret;
 
 	ret = 0;
 	lp = dblp->lp;
@@ -292,23 +281,27 @@ __log_flush(dblp, lsn)
 	/*
 	 * If the LSN is less than the last-sync'd LSN, we're done.  Note,
 	 * the last-sync LSN saved in s_lsn is the LSN of the first byte 
-	 * that has not yet been written to disk, so the test is <, not <=.
+	 * we absolutely know has been written to disk, so the test is <=.
 	 */
 	if (lsn->file < lp->s_lsn.file ||
-	    (lsn->file == lp->s_lsn.file && lsn->offset < lp->s_lsn.offset))
+	    (lsn->file == lp->s_lsn.file && lsn->offset <= lp->s_lsn.offset))
 		return (0);
 
 	/*
 	 * We may need to write the current buffer.  We have to write the
 	 * current buffer if the flush LSN is greater than or equal to the
-	 * first-unwritten LSN (uw_lsn).  If we write the buffer, then we
-	 * update the first-unwritten LSN.
+	 * buffer's starting LSN.
 	 */
+	current = 0;
 	if (lp->b_off != 0 &&
-	    lsn->file >= lp->uw_lsn.file && lsn->offset >= lp->uw_lsn.offset)
+	    lsn->file >= lp->f_lsn.file && lsn->offset >= lp->f_lsn.offset) {
 		if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
 			return (ret);
 
+		lp->b_off = 0;
+		current = 1;
+	}
+
 	/*
 	 * It's possible that this thread may never have written to this log
 	 * file.  Acquire a file descriptor if we don't already have one.
@@ -323,10 +316,14 @@ __log_flush(dblp, lsn)
 	++lp->stat.st_scount;
 
 	/*
-	 * Set the last-synced LSN, the first LSN after the last record
-	 * that we know is on disk.
+	 * Set the last-synced LSN, using the LSN of the current buffer.  If
+	 * the current buffer was flushed, we know the LSN of the first byte
+	 * of the buffer is on disk, otherwise, we only know that the LSN of
+	 * the record before the one beginning the current buffer is on disk.
 	 */
-	lp->s_lsn = lp->uw_lsn;
+	lp->s_lsn = lp->f_lsn;
+	if (!current)
+		--lp->s_lsn.offset;
 
 	return (0);
 }
@@ -336,8 +333,9 @@ __log_flush(dblp, lsn)
  *	Write information into the log.
  */
 static int
-__log_fill(dblp, addr, len)
+__log_fill(dblp, lsn, addr, len)
 	DB_LOG *dblp;
+	DB_LSN *lsn;
 	void *addr;
 	u_int32_t len;
 {
@@ -349,6 +347,15 @@ __log_fill(dblp, addr, len)
 	/* Copy out the data. */
 	for (lp = dblp->lp; len > 0;) {
 		/*
+		 * If we're beginning a new buffer, note the user LSN to which
+		 * the first byte of the buffer belongs.  We have to know this
+		 * when flushing the buffer so that we know if the in-memory
+		 * buffer needs to be flushed.
+		 */
+		if (lp->b_off == 0)
+			lp->f_lsn = *lsn;
+
+		/*
 		 * If we're on a buffer boundary and the data is big enough,
 		 * copy as many records as we can directly from the data.
 		 */
@@ -371,9 +378,12 @@ __log_fill(dblp, addr, len)
 		lp->b_off += nw;
 
 		/* If we fill the buffer, flush it. */
-		if (lp->b_off == sizeof(lp->buf) &&
-		    (ret = __log_write(dblp, lp->buf, sizeof(lp->buf))) != 0)
-			return (ret);
+		if (lp->b_off == sizeof(lp->buf)) {
+			if ((ret =
+			    __log_write(dblp, lp->buf, sizeof(lp->buf))) != 0)
+				return (ret);
+			lp->b_off = 0;
+		}
 	}
 	return (0);
 }
@@ -412,14 +422,8 @@ __log_write(dblp, addr, len)
 	if (nw != (int32_t)len)
 		return (EIO);
 
-	/*
-	 * Reset the buffer offset, update the seek offset, and update the
-	 * first-unwritten LSN.
-	 */
-	lp->b_off = 0;
+	/* Reset the buffer offset and update the seek offset. */
 	lp->w_off += len;
-	lp->uw_lsn.file = lp->lsn.file;
-	lp->uw_lsn.offset = lp->w_off;
 
 	/* Update written statistics. */
 	if ((lp->stat.st_w_bytes += len) >= MEGABYTE) {
diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c
index a707603eec..578abedcb6 100644
--- a/db2/mp/mp_bh.c
+++ b/db2/mp/mp_bh.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_bh.c	10.21 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)mp_bh.c	10.23 (Sleepycat) 11/26/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -40,7 +40,6 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
 	BH *bhp;
 	int *restartp, *wrotep;
 {
-	DBT dbt;
 	DB_MPOOLFILE *dbmfp;
 	DB_MPREG *mpreg;
 
@@ -53,7 +52,7 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
 	 * Walk the process' DB_MPOOLFILE list and find a file descriptor for
 	 * the file.  We also check that the descriptor is open for writing.
 	 * If we find a descriptor on the file that's not open for writing, we
-	 * try and upgrade it to make it writeable.
+	 * try and upgrade it to make it writeable.  If that fails, we're done.
 	 */
 	LOCKHANDLE(dbmp, dbmp->mutexp);
 	for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
@@ -86,18 +85,34 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
 	}
 
 	/*
-	 * Try and open the file; ignore any error, assume it's a permissions
-	 * problem.
+	 * Try and open the file, attaching to the underlying shared area.
 	 *
 	 * XXX
-	 * There's no negative cache here, so we may repeatedly try and open
-	 * files that we have previously tried (and failed) to open.
+	 * Don't try to attach to temporary files.  There are two problems in
+	 * trying to do that.  First, if we have different privileges than the
+	 * process that "owns" the temporary file, we might create the backing
+	 * disk file such that the owning process couldn't read/write its own
+	 * buffers, e.g., memp_trickle() running as root creating a file owned
+	 * as root, mode 600.  Second, if the temporary file has already been
+	 * created, we don't have any way of finding out what its real name is,
+	 * and, even if we did, it was already unlinked (so that it won't be
+	 * left if the process dies horribly).  This decision causes a problem,
+	 * however: if the temporary file consumes the entire buffer cache,
+	 * and the owner doesn't flush the buffers to disk, we could end up
+	 * with resource starvation, and the memp_trickle() thread couldn't do
+	 * anything about it.  That's a pretty unlikely scenario, though.
+	 *
+	 * XXX
+	 * There's no negative cache, so we may repeatedly try and open files
+	 * that we have previously tried (and failed) to open.
+	 *
+	 * Ignore any error, assume it's a permissions problem.
 	 */
-	dbt.size = mfp->pgcookie_len;
-	dbt.data = R_ADDR(dbmp, mfp->pgcookie_off);
-	if (__memp_fopen(dbmp, R_ADDR(dbmp, mfp->path_off),
-	    mfp->ftype, 0, 0, mfp->stat.st_pagesize,
-	    mfp->lsn_off, &dbt, R_ADDR(dbmp, mfp->fileid_off), 0, &dbmfp) != 0)
+	if (F_ISSET(mfp, MP_TEMP))
+		return (0);
+
+	if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off), mfp->ftype,
+	    0, 0, mfp->stat.st_pagesize, 0, NULL, NULL, 0, &dbmfp) != 0)
 		return (0);
 
 found:	return (__memp_pgwrite(dbmfp, bhp, restartp, wrotep));
@@ -144,7 +159,7 @@ __memp_pgread(dbmfp, bhp, can_create)
 			UNLOCKHANDLE(dbmp, dbmfp->mutexp);
 			__db_err(dbmp->dbenv,
 			    "%s: page %lu doesn't exist, create flag not set",
-			    dbmfp->path, (u_long)bhp->pgno);
+			    __memp_fn(dbmfp), (u_long)bhp->pgno);
 			goto err;
 		}
 		UNLOCKHANDLE(dbmp, dbmfp->mutexp);
@@ -270,12 +285,14 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 
 	/* Temporary files may not yet have been created. */
 	LOCKHANDLE(dbmp, dbmfp->mutexp);
-	if (dbmfp->fd == -1 && ((ret = __db_appname(dbenv, DB_APP_TMP,
-	    NULL, NULL, &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1)) {
-		UNLOCKHANDLE(dbmp, dbmfp->mutexp);
-		__db_err(dbenv, "unable to create temporary backing file");
-		goto err;
-	}
+	if (dbmfp->fd == -1)
+		if ((ret = __db_appname(dbenv, DB_APP_TMP,
+		    NULL, NULL, &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1) {
+			UNLOCKHANDLE(dbmp, dbmfp->mutexp);
+			__db_err(dbenv,
+			    "unable to create temporary backing file");
+			goto err;
+		}
 
 	/* Write the page out. */
 	if ((ret = __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0)
@@ -350,8 +367,8 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 
 	return (0);
 
-syserr:	__db_err(dbenv,
-	    "%s: %s failed for page %lu", dbmfp->path, fail, (u_long)bhp->pgno);
+syserr:	__db_err(dbenv, "%s: %s failed for page %lu",
+	    __memp_fn(dbmfp), fail, (u_long)bhp->pgno);
 
 err:	UNLOCKBUFFER(dbmp, bhp);
 	LOCKREGION(dbmp);
@@ -416,7 +433,7 @@ __memp_pg(dbmfp, bhp, is_pgin)
 
 err:	UNLOCKHANDLE(dbmp, dbmp->mutexp);
 	__db_err(dbmp->dbenv, "%s: %s failed for page %lu",
-	    dbmfp->path, is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno);
+	    __memp_fn(dbmfp), is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno);
 	return (ret);
 }
 
@@ -462,7 +479,8 @@ __memp_upgrade(dbmp, dbmfp, mfp)
 	DB_MPOOLFILE *dbmfp;
 	MPOOLFILE *mfp;
 {
-	int fd;
+	int fd, ret;
+	char *rpath;
 
 	/*
 	 * !!!
@@ -477,16 +495,24 @@ __memp_upgrade(dbmp, dbmfp, mfp)
 	if (F_ISSET(dbmfp, MP_UPGRADE_FAIL))
 		return (1);
 
-	/* Try the open. */
-	if (__db_open(R_ADDR(dbmp, mfp->path_off), 0, 0, 0, &fd) != 0) {
+	/*
+	 * Calculate the real name for this file and try to open it read/write.
+	 * We know we have a valid pathname for the file because it's the only
+	 * way we could have gotten a file descriptor of any kind.
+	 */
+	if ((ret = __db_appname(dbmp->dbenv, DB_APP_DATA,
+	    NULL, R_ADDR(dbmp, mfp->path_off), NULL, &rpath)) != 0)
+		return (ret);
+	if (__db_open(rpath, 0, 0, 0, &fd) != 0) {
 		F_SET(dbmfp, MP_UPGRADE_FAIL);
-		return (1);
+		ret = 1;
+	} else {
+		/* Swap the descriptors and set the upgrade flag. */
+		(void)__db_close(dbmfp->fd);
+		dbmfp->fd = fd;
+		F_SET(dbmfp, MP_UPGRADE);
+		ret = 0;
 	}
-
-	/* Swap the descriptors and set the upgrade flag. */
-	(void)__db_close(dbmfp->fd);
-	dbmfp->fd = fd;
-	F_SET(dbmfp, MP_UPGRADE);
-
-	return (0);
+	FREES(rpath);
+	return (ret);
 }
diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c
index 3f99e60505..1010751c92 100644
--- a/db2/mp/mp_fget.c
+++ b/db2/mp/mp_fget.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fget.c	10.30 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)mp_fget.c	10.32 (Sleepycat) 11/26/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -38,13 +38,11 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 	int flags;
 	void *addrp;
 {
-	BH *bhp, *tbhp;
+	BH *bhp;
 	DB_MPOOL *dbmp;
 	MPOOL *mp;
 	MPOOLFILE *mfp;
-	db_pgno_t lastpgno;
 	size_t bucket, mf_offset;
-	off_t size;
 	u_long cnt;
 	int b_incr, b_inserted, readonly_alloc, ret;
 	void *addr;
@@ -97,7 +95,7 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 	mf_offset = R_OFFSET(dbmp, mfp);
 	addr = NULL;
 	bhp = NULL;
-	b_incr = b_inserted = readonly_alloc = ret = 0;
+	b_incr = b_inserted = ret = 0;
 
 	LOCKREGION(dbmp);
 
@@ -114,11 +112,10 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 	 * It would be possible to do so by reference counting the open
 	 * pages from the mmap, but it's unclear to me that it's worth it.
 	 */
-	if (dbmfp->addr != NULL && dbmfp->mfp->can_mmap) {
-		lastpgno = dbmfp->len == 0 ?
-		    0 : (dbmfp->len - 1) / mfp->stat.st_pagesize;
+	if (dbmfp->addr != NULL && F_ISSET(dbmfp->mfp, MP_CAN_MMAP)) {
+		readonly_alloc = 0;
 		if (LF_ISSET(DB_MPOOL_LAST))
-			*pgnoaddr = lastpgno;
+			*pgnoaddr = mfp->last_pgno;
 		else {
 			/*
 			 * !!!
@@ -128,10 +125,10 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 			 */
 			if (LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))
 				readonly_alloc = 1;
-			else if (*pgnoaddr > lastpgno) {
+			else if (*pgnoaddr > mfp->last_pgno) {
 				__db_err(dbmp->dbenv,
 				    "%s: page %lu doesn't exist",
-				    dbmfp->path, (u_long)*pgnoaddr);
+				    __memp_fn(dbmfp), (u_long)*pgnoaddr);
 				ret = EINVAL;
 				goto err;
 			}
@@ -146,79 +143,38 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 		}
 	}
 
-	/*
-	 * If requesting the last page or a new page, find the last page.  The
-	 * tricky thing is that the user may have created a page already that's
-	 * after any page that exists in the file.
-	 */
-	if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) {
-		/*
-		 * Temporary files may not yet have been created.
-		 *
-		 * Don't lock -- there are no atomicity issues for stat(2).
-		 */
-		if (dbmfp->fd == -1)
-			size = 0;
-		else if ((ret =
-		    __db_ioinfo(dbmfp->path, dbmfp->fd, &size, NULL)) != 0) {
-			__db_err(dbmp->dbenv,
-			    "%s: %s", dbmfp->path, strerror(ret));
-			goto err;
-		}
-
-		*pgnoaddr = size == 0 ? 0 : (size - 1) / mfp->stat.st_pagesize;
+	/* Check if requesting the last page or a new page. */
+	if (LF_ISSET(DB_MPOOL_LAST))
+		*pgnoaddr = mfp->last_pgno;
 
-		/*
-		 * Walk the list of BH's, looking for later pages.  Save the
-		 * pointer if a later page is found so that we don't have to
-		 * search the list twice.
-		 *
-		 * If requesting a new page, return the page one after the last
-		 * page -- which we'll have to create.
-		 */
-		for (tbhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
-		    tbhp != NULL; tbhp = SH_TAILQ_NEXT(tbhp, q, __bh))
-			if (tbhp->pgno >= *pgnoaddr &&
-			    tbhp->mf_offset == mf_offset) {
-				bhp = tbhp;
-				*pgnoaddr = bhp->pgno;
-			}
-		if (LF_ISSET(DB_MPOOL_NEW))
-			++*pgnoaddr;
-	}
-
-	/* If we already found the right buffer, return it. */
-	if (LF_ISSET(DB_MPOOL_LAST) && bhp != NULL) {
-		addr = bhp->buf;
-		goto found;
+	if (LF_ISSET(DB_MPOOL_NEW)) {
+		*pgnoaddr = mfp->last_pgno + 1;
+		goto alloc;
 	}
 
-	/* If we haven't checked the BH hash bucket queue, do the search. */
-	if (!LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) {
-		bucket = BUCKET(mp, mf_offset, *pgnoaddr);
-		for (cnt = 0,
-		    bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
-		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
-			++cnt;
-			if (bhp->pgno == *pgnoaddr &&
-			    bhp->mf_offset == mf_offset) {
-				addr = bhp->buf;
-				++mp->stat.st_hash_searches;
-				if (cnt > mp->stat.st_hash_longest)
-					mp->stat.st_hash_longest = cnt;
-				mp->stat.st_hash_examined += cnt;
-				goto found;
-			}
-		}
-		if (cnt != 0) {
+	/* Check the BH hash bucket queue. */
+	bucket = BUCKET(mp, mf_offset, *pgnoaddr);
+	for (cnt = 0,
+	    bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
+	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
+		++cnt;
+		if (bhp->pgno == *pgnoaddr && bhp->mf_offset == mf_offset) {
+			addr = bhp->buf;
 			++mp->stat.st_hash_searches;
 			if (cnt > mp->stat.st_hash_longest)
 				mp->stat.st_hash_longest = cnt;
 			mp->stat.st_hash_examined += cnt;
+			goto found;
 		}
 	}
+	if (cnt != 0) {
+		++mp->stat.st_hash_searches;
+		if (cnt > mp->stat.st_hash_longest)
+			mp->stat.st_hash_longest = cnt;
+		mp->stat.st_hash_examined += cnt;
+	}
 
-	/*
+alloc:	/*
 	 * Allocate a new buffer header and data space, and mark the contents
 	 * as useless.
 	 */
@@ -300,7 +256,7 @@ found:		/* Increment the reference count. */
 		if (bhp->ref == UINT16_T_MAX) {
 			__db_err(dbmp->dbenv,
 			    "%s: too many references to page %lu",
-			    dbmfp->path, bhp->pgno);
+			    __memp_fn(dbmfp), bhp->pgno);
 			ret = EINVAL;
 			goto err;
 		}
@@ -346,6 +302,14 @@ found:		/* Increment the reference count. */
 		++mfp->stat.st_cache_hit;
 	}
 
+	/*
+	 * If we're returning a page after our current notion of the last-page,
+	 * update our information.  Note, there's no way to un-instantiate this
+	 * page, it's going to exist whether it's returned to us dirty or not.
+	 */
+	if (bhp->pgno > mfp->last_pgno)
+		mfp->last_pgno = bhp->pgno;
+
 mapret:	LOCKHANDLE(dbmp, dbmfp->mutexp);
 	++dbmfp->pinref;
 	UNLOCKHANDLE(dbmp, dbmfp->mutexp);
diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c
index de59c9ea9b..bdc4713863 100644
--- a/db2/mp/mp_fopen.c
+++ b/db2/mp/mp_fopen.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fopen.c	10.30 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)mp_fopen.c	10.32 (Sleepycat) 11/26/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -28,8 +28,8 @@ static const char sccsid[] = "@(#)mp_fopen.c	10.30 (Sleepycat) 10/25/97";
 #include "common_ext.h"
 
 static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
-static int __memp_mf_open __P((DB_MPOOL *,
-    DB_MPOOLFILE *, int, size_t, int, DBT *, u_int8_t *, int, MPOOLFILE **));
+static int __memp_mf_open __P((DB_MPOOL *, DB_MPOOLFILE *,
+    const char *, int, size_t, off_t, int, DBT *, u_int8_t *, MPOOLFILE **));
 
 /*
  * memp_fopen --
@@ -53,7 +53,13 @@ memp_fopen(dbmp, path, ftype,
 	    "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0)
 		return (ret);
 
-	return (__memp_fopen(dbmp, path, ftype,
+	/* Require a non-zero pagesize. */
+	if (pagesize == 0) {
+		__db_err(dbmp->dbenv, "memp_fopen: pagesize not specified");
+		return (EINVAL);
+	}
+
+	return (__memp_fopen(dbmp, NULL, path, ftype,
 	    flags, mode, pagesize, lsn_offset, pgcookie, fileid, 1, retp));
 }
 
@@ -61,13 +67,14 @@ memp_fopen(dbmp, path, ftype,
  * __memp_fopen --
  *	Open a backing file for the memory pool; internal version.
  *
- * PUBLIC: int __memp_fopen __P((DB_MPOOL *, const char *, int, int,
- * PUBLIC:    int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
+ * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, int,
+ * PUBLIC:    int, int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
  */
 int
-__memp_fopen(dbmp, path,
+__memp_fopen(dbmp, mfp, path,
     ftype, flags, mode, pagesize, lsn_offset, pgcookie, fileid, needlock, retp)
 	DB_MPOOL *dbmp;
+	MPOOLFILE *mfp;
 	const char *path;
 	int ftype, flags, mode, lsn_offset, needlock;
 	size_t pagesize;
@@ -77,24 +84,27 @@ __memp_fopen(dbmp, path,
 {
 	DB_ENV *dbenv;
 	DB_MPOOLFILE *dbmfp;
-	MPOOLFILE *mfp;
 	off_t size;
 	int ret;
+	u_int8_t idbuf[DB_FILE_ID_LEN];
+	char *rpath;
 
+	/*
+	 * XXX
+	 * If mfp is provided, the following arguments do NOT need to be
+	 * specified:
+	 *      lsn_offset
+	 *      pgcookie
+	 *      fileid
+	 */
 	dbenv = dbmp->dbenv;
 	ret = 0;
-
-	/* Require a non-zero pagesize. */
-	if (pagesize == 0) {
-		__db_err(dbenv, "memp_fopen: pagesize not specified");
-		return (EINVAL);
-	}
+	rpath = NULL;
 
 	/* Allocate and initialize the per-process structure. */
 	if ((dbmfp =
 	    (DB_MPOOLFILE *)__db_calloc(1, sizeof(DB_MPOOLFILE))) == NULL) {
-		__db_err(dbenv, "%s: %s",
-		    path == NULL ? TEMPORARY : path, strerror(ENOMEM));
+		__db_err(dbenv, "memp_fopen: %s", strerror(ENOMEM));
 		return (ENOMEM);
 	}
 	dbmfp->dbmp = dbmp;
@@ -109,54 +119,66 @@ __memp_fopen(dbmp, path,
 			ret = EINVAL;
 			goto err;
 		}
-		dbmfp->path = (char *)TEMPORARY;
-		F_SET(dbmfp, MP_PATH_TEMP);
+		size = 0;
 	} else {
-		/* Calculate the real name for this file. */
+		/* Get the real name for this file and open it. */
 		if ((ret = __db_appname(dbenv,
-		    DB_APP_DATA, NULL, path, NULL, &dbmfp->path)) != 0)
+		    DB_APP_DATA, NULL, path, NULL, &rpath)) != 0)
 			goto err;
-		F_SET(dbmfp, MP_PATH_ALLOC);
-
-
-		/* Open the file. */
-		if ((ret = __db_open(dbmfp->path,
+		if ((ret = __db_open(rpath,
 		    LF_ISSET(DB_CREATE | DB_RDONLY), DB_CREATE | DB_RDONLY,
 		    mode, &dbmfp->fd)) != 0) {
-			__db_err(dbenv, "%s: %s", dbmfp->path, strerror(ret));
+			__db_err(dbenv, "%s: %s", rpath, strerror(ret));
 			goto err;
 		}
 
 		/* Don't permit files that aren't a multiple of the pagesize. */
-		if ((ret =
-		    __db_ioinfo(dbmfp->path, dbmfp->fd, &size, NULL)) != 0) {
-			__db_err(dbenv, "%s: %s", dbmfp->path, strerror(ret));
+		if ((ret = __db_ioinfo(rpath, dbmfp->fd, &size, NULL)) != 0) {
+			__db_err(dbenv, "%s: %s", rpath, strerror(ret));
 			goto err;
 		}
 		if (size % pagesize) {
 			__db_err(dbenv,
 			    "%s: file size not a multiple of the pagesize",
-			    dbmfp->path);
+			    rpath);
 			ret = EINVAL;
 			goto err;
 		}
+
+		/*
+		 * Get the file id if we weren't given one.  Generated file id's
+		 * don't use timestamps, otherwise there'd be no chance of any
+		 * other process joining the party.
+		 */
+		if (mfp == NULL && fileid == NULL) {
+			if ((ret = __db_fileid(dbenv, rpath, 0, idbuf)) != 0)
+				goto err;
+			fileid = idbuf;
+		}
 	}
 
 	/*
-	 * Find/allocate the shared file objects.  This includes allocating
-	 * space for the per-process thread lock.
+	 * If we weren't provided an underlying shared object to join with,
+	 * find/allocate the shared file objects.  Also allocate space for
+	 * for the per-process thread lock.
 	 */
 	if (needlock)
 		LOCKREGION(dbmp);
-	ret = __memp_mf_open(dbmp, dbmfp, ftype, pagesize,
-	    lsn_offset, pgcookie, fileid, F_ISSET(dbmfp, MP_PATH_TEMP), &mfp);
+
+	if (mfp == NULL)
+		ret = __memp_mf_open(dbmp, dbmfp, path,
+		    ftype, pagesize, size, lsn_offset, pgcookie, fileid, &mfp);
+	else {
+		++mfp->ref;
+		ret = 0;
+	}
 	if (ret == 0 &&
 	    F_ISSET(dbmp, MP_LOCKHANDLE) && (ret =
 	    __memp_ralloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0)
 		LOCKINIT(dbmp, dbmfp->mutexp);
+
 	if (needlock)
 		UNLOCKREGION(dbmp);
-
 	if (ret != 0)
 		goto err;
 
@@ -184,25 +206,25 @@ __memp_fopen(dbmp, path,
 	 * flatly impossible.  Hope that mmap fails if the file is too large.
 	 */
 #define	DB_MAXMMAPSIZE	(10 * 1024 * 1024)	/* 10 Mb. */
-	if (mfp->can_mmap) {
+	if (F_ISSET(mfp, MP_CAN_MMAP)) {
 		if (!F_ISSET(dbmfp, MP_READONLY))
-			mfp->can_mmap = 0;
+			F_CLR(mfp, MP_CAN_MMAP);
 		if (path == NULL)
-			mfp->can_mmap = 0;
+			F_CLR(mfp, MP_CAN_MMAP);
 		if (ftype != 0)
-			mfp->can_mmap = 0;
+			F_CLR(mfp, MP_CAN_MMAP);
 		if (LF_ISSET(DB_NOMMAP))
-			mfp->can_mmap = 0;
+			F_CLR(mfp, MP_CAN_MMAP);
 		if (size > (dbenv == NULL || dbenv->mp_mmapsize == 0 ?
 		    DB_MAXMMAPSIZE : (off_t)dbenv->mp_mmapsize))
-			mfp->can_mmap = 0;
+			F_CLR(mfp, MP_CAN_MMAP);
 	}
 	dbmfp->addr = NULL;
-	if (mfp->can_mmap) {
+	if (F_ISSET(mfp, MP_CAN_MMAP)) {
 		dbmfp->len = size;
 		if (__db_map(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) {
-			mfp->can_mmap = 0;
 			dbmfp->addr = NULL;
+			F_CLR(mfp, MP_CAN_MMAP);
 		}
 	}
 
@@ -217,8 +239,8 @@ err:	/*
 	 * Note that we do not have to free the thread mutex, because we
 	 * never get to here after we have successfully allocated it.
 	 */
-	if (F_ISSET(dbmfp, MP_PATH_ALLOC))
-		FREES(dbmfp->path);
+	if (rpath != NULL)
+		FREES(rpath);
 	if (dbmfp->fd != -1)
 		(void)__db_close(dbmfp->fd);
 	if (dbmfp != NULL)
@@ -231,78 +253,80 @@ err:	/*
  *	Open an MPOOLFILE.
  */
 static int
-__memp_mf_open(dbmp, dbmfp,
-    ftype, pagesize, lsn_offset, pgcookie, fileid, istemp, retp)
+__memp_mf_open(dbmp, dbmfp, path,
+    ftype, pagesize, size, lsn_offset, pgcookie, fileid, retp)
 	DB_MPOOL *dbmp;
 	DB_MPOOLFILE *dbmfp;
-	int ftype, lsn_offset, istemp;
+	const char *path;
+	int ftype, lsn_offset;
 	size_t pagesize;
+	off_t size;
 	DBT *pgcookie;
 	u_int8_t *fileid;
 	MPOOLFILE **retp;
 {
 	MPOOLFILE *mfp;
 	int ret;
-	u_int8_t idbuf[DB_FILE_ID_LEN];
 	void *p;
 
-	/* Temporary files can't match previous files. */
-	if (istemp)
-		goto alloc;
+#define	ISTEMPORARY	(path == NULL)
 
 	/*
-	 * Get the file id if we weren't give one.  Generated file id's don't
-	 * use timestamps, otherwise there'd be no chance of anyone joining
-	 * the party.
+	 * Walk the list of MPOOLFILE's, looking for a matching file.
+	 * Temporary files can't match previous files.
 	 */
-	if (fileid == NULL) {
-		if ((ret =
-		    __db_fileid(dbmp->dbenv, dbmfp->path, 0, idbuf)) != 0)
-			return (ret);
-		fileid = idbuf;
-	}
-
-	/* Walk the list of MPOOLFILE's, looking for a matching file. */
-	for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
-	    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
-		if (!memcmp(fileid,
-		    R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
-			if (ftype != mfp->ftype ||
-			    pagesize != mfp->stat.st_pagesize) {
-				__db_err(dbmp->dbenv,
-				    "%s: ftype or pagesize changed",
-				    dbmfp->path);
-				ret = EINVAL;
-				mfp = NULL;
-				goto ret1;
+	if (!ISTEMPORARY)
+		for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
+		    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
+			if (F_ISSET(mfp, MP_TEMP))
+				continue;
+			if (!memcmp(fileid,
+			    R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
+				if (ftype != mfp->ftype ||
+				    pagesize != mfp->stat.st_pagesize) {
+					__db_err(dbmp->dbenv,
+					    "%s: ftype or pagesize changed",
+					    path);
+					return (EINVAL);
+				}
+
+				/* Found it: increment the reference count. */
+				++mfp->ref;
+				*retp = mfp;
+				return (0);
 			}
-			/* Found it: increment the reference count. */
-			++mfp->ref;
-			goto ret1;
 		}
 
 	/* Allocate a new MPOOLFILE. */
-alloc:	if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
-		goto ret1;
+	if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
+		return (ret);
+	*retp = mfp;
 
 	/* Initialize the structure. */
 	memset(mfp, 0, sizeof(MPOOLFILE));
 	mfp->ref = 1;
 	mfp->ftype = ftype;
-	mfp->can_mmap = 1;
 	mfp->lsn_off = lsn_offset;
-	mfp->stat.st_pagesize = pagesize;
 
-	/* Copy the file path into shared memory. */
-	if ((ret = __memp_ralloc(dbmp,
-	    strlen(dbmfp->path) + 1, &mfp->path_off, &p)) != 0)
-		goto err;
-	memcpy(p, dbmfp->path, strlen(dbmfp->path) + 1);
+	/*
+	 * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget,
+	 * we have to know the last page in the file.  Figure it out and save
+	 * it away.
+	 */
+	mfp->stat.st_pagesize = pagesize;
+	mfp->last_pgno = size == 0 ? 0 : (size - 1) / mfp->stat.st_pagesize;
 
-	/* Copy the file identification string into shared memory. */
-	if (istemp)
-		mfp->fileid_off = 0;
+	F_SET(mfp, MP_CAN_MMAP);
+	if (ISTEMPORARY)
+		F_SET(mfp, MP_TEMP);
 	else {
+		/* Copy the file path into shared memory. */
+		if ((ret = __memp_ralloc(dbmp,
+		    strlen(path) + 1, &mfp->path_off, &p)) != 0)
+			goto err;
+		memcpy(p, path, strlen(path) + 1);
+
+		/* Copy the file identification string into shared memory. */
 		if ((ret = __memp_ralloc(dbmp,
 		    DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
 			goto err;
@@ -328,15 +352,13 @@ alloc:	if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
 err:		if (mfp->path_off != 0)
 			__db_shalloc_free(dbmp->addr,
 			    R_ADDR(dbmp, mfp->path_off));
-		if (!istemp)
+		if (mfp->fileid_off != 0)
 			__db_shalloc_free(dbmp->addr,
 			    R_ADDR(dbmp, mfp->fileid_off));
 		if (mfp != NULL)
 			__db_shalloc_free(dbmp->addr, mfp);
 		mfp = NULL;
 	}
-
-ret1:	*retp = mfp;
 	return (0);
 }
 
@@ -357,7 +379,7 @@ memp_fclose(dbmfp)
 	/* Complain if pinned blocks never returned. */
 	if (dbmfp->pinref != 0)
 		__db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned",
-		    dbmfp->path, (u_long)dbmfp->pinref);
+		    __memp_fn(dbmfp), (u_long)dbmfp->pinref);
 
 	/* Remove the DB_MPOOLFILE structure from the list. */
 	LOCKHANDLE(dbmp, dbmp->mutexp);
@@ -370,18 +392,18 @@ memp_fclose(dbmfp)
 	/* Discard any mmap information. */
 	if (dbmfp->addr != NULL &&
 	    (ret = __db_unmap(dbmfp->addr, dbmfp->len)) != 0)
-		__db_err(dbmp->dbenv, "%s: %s", dbmfp->path, strerror(ret));
+		__db_err(dbmp->dbenv,
+		    "%s: %s", __memp_fn(dbmfp), strerror(ret));
 
 	/* Close the file; temporary files may not yet have been created. */
 	if (dbmfp->fd != -1 && (t_ret = __db_close(dbmfp->fd)) != 0) {
-		__db_err(dbmp->dbenv, "%s: %s", dbmfp->path, strerror(t_ret));
+		__db_err(dbmp->dbenv,
+		    "%s: %s", __memp_fn(dbmfp), strerror(t_ret));
 		if (ret != 0)
 			t_ret = ret;
 	}
 
 	/* Free memory. */
-	if (F_ISSET(dbmfp, MP_PATH_ALLOC))
-		FREES(dbmfp->path);
 	if (dbmfp->mutexp != NULL) {
 		LOCKREGION(dbmp);
 		__db_shalloc_free(dbmp->addr, dbmfp->mutexp);
@@ -434,7 +456,8 @@ __memp_mf_close(dbmp, dbmfp)
 		if (F_ISSET(bhp, BH_DIRTY))
 			__db_err(dbmp->dbenv,
 			    "%s: close: pgno %lu left dirty; ref %lu",
-			    dbmfp->path, (u_long)bhp->pgno, (u_long)bhp->ref);
+			    __memp_fn(dbmfp),
+			    (u_long)bhp->pgno, (u_long)bhp->ref);
 #endif
 
 		if (bhp->mf_offset == mf_offset) {
@@ -452,7 +475,8 @@ __memp_mf_close(dbmp, dbmfp)
 
 	/* Free the space. */
 	__db_shalloc_free(dbmp->addr, mfp);
-	__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
+	if (mfp->path_off != 0)
+		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
 	if (mfp->fileid_off != 0)
 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));
 	if (mfp->pgcookie_off != 0)
diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c
index 892f179d3a..38e86b8ac5 100644
--- a/db2/mp/mp_fput.c
+++ b/db2/mp/mp_fput.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fput.c	10.14 (Sleepycat) 10/5/97";
+static const char sccsid[] = "@(#)mp_fput.c	10.16 (Sleepycat) 11/26/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -54,7 +54,7 @@ memp_fput(dbmfp, pgaddr, flags)
 		if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
 			__db_err(dbmp->dbenv,
 			    "%s: dirty flag set for readonly file page",
-			    dbmfp->path);
+			    __memp_fn(dbmfp));
 			return (EACCES);
 		}
 	}
@@ -64,7 +64,7 @@ memp_fput(dbmfp, pgaddr, flags)
 	if (dbmfp->pinref == 0)
 		__db_err(dbmp->dbenv,
 		    "%s: put: more blocks returned than retrieved",
-		    dbmfp->path);
+		    __memp_fn(dbmfp));
 	else
 		--dbmfp->pinref;
 	UNLOCKHANDLE(dbmp, dbmfp->mutexp);
diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c
index a7d2706008..2eff7dd74c 100644
--- a/db2/mp/mp_fset.c
+++ b/db2/mp/mp_fset.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fset.c	10.10 (Sleepycat) 10/5/97";
+static const char sccsid[] = "@(#)mp_fset.c	10.12 (Sleepycat) 11/26/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -43,20 +43,21 @@ memp_fset(dbmfp, pgaddr, flags)
 	mp = dbmp->mp;
 
 	/* Validate arguments. */
-	if (flags != 0) {
-		if ((ret = __db_fchk(dbmp->dbenv, "memp_fset", flags,
-		    DB_MPOOL_DIRTY | DB_MPOOL_CLEAN | DB_MPOOL_DISCARD)) != 0)
-			return (ret);
-		if ((ret = __db_fcchk(dbmp->dbenv, "memp_fset",
-		    flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0)
-			return (ret);
+	if (flags == 0)
+		return (__db_ferr(dbmp->dbenv, "memp_fset", 1));
 
-		if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
-			__db_err(dbmp->dbenv,
-			    "%s: dirty flag set for readonly file page",
-			    dbmfp->path);
-			return (EACCES);
-		}
+	if ((ret = __db_fchk(dbmp->dbenv, "memp_fset", flags,
+	    DB_MPOOL_DIRTY | DB_MPOOL_CLEAN | DB_MPOOL_DISCARD)) != 0)
+		return (ret);
+	if ((ret = __db_fcchk(dbmp->dbenv, "memp_fset",
+	    flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0)
+		return (ret);
+
+	if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
+		__db_err(dbmp->dbenv,
+		    "%s: dirty flag set for readonly file page",
+		    __memp_fn(dbmfp));
+		return (EACCES);
 	}
 
 	/* Convert the page address to a buffer header. */
diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c
index 4c19739ebd..ca81f8d6d6 100644
--- a/db2/mp/mp_open.c
+++ b/db2/mp/mp_open.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_open.c	10.15 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)mp_open.c	10.16 (Sleepycat) 11/28/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -66,17 +66,6 @@ memp_open(path, flags, mode, dbenv, retp)
 		F_SET(dbmp, MP_ISPRIVATE);
 
 	/*
-	 * XXX
-	 * HP-UX won't permit mutexes to live in anything but shared memory.
-	 * So, we have to instantiate the shared mpool region file on that
-	 * architecture, regardless.  If this turns out to be a performance
-	 * problem, we could probably use anonymous memory instead.
-	 */
-#if defined(__hppa)
-	F_CLR(dbmp, MP_ISPRIVATE);
-#endif
-
-	/*
 	 * Map in the region.  We do locking regardless, as portions of it are
 	 * implemented in common code (if we put the region in a file, that is).
 	 */
diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c
index 01f0920df4..6ff1131b6e 100644
--- a/db2/mp/mp_pr.c
+++ b/db2/mp/mp_pr.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_pr.c	10.18 (Sleepycat) 11/1/97";
+static const char sccsid[] = "@(#)mp_pr.c	10.20 (Sleepycat) 11/26/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -100,7 +100,7 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
 		    mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
 		    mfp != NULL;
 		    ++tfsp, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
-			name = R_ADDR(dbmp, mfp->path_off);
+			name = __memp_fns(dbmp, mfp);
 			nlen = strlen(name);
 			len = sizeof(DB_MPOOL_FSTAT) + nlen + 1;
 			if ((*tfsp = db_malloc == NULL ?
@@ -120,6 +120,37 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
 }
 
 /*
+ * __memp_fn --
+ *	On errors we print whatever is available as the file name.
+ *
+ * PUBLIC: char * __memp_fn __P((DB_MPOOLFILE *));
+ */
+char *
+__memp_fn(dbmfp)
+	DB_MPOOLFILE *dbmfp;
+{
+	return (__memp_fns(dbmfp->dbmp, dbmfp->mfp));
+}
+
+/*
+ * __memp_fns --
+ *	On errors we print whatever is available as the file name.
+ *
+ * PUBLIC: char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *));
+ *
+ */
+char *
+__memp_fns(dbmp, mfp)
+	DB_MPOOL *dbmp;
+	MPOOLFILE *mfp;
+{
+	if (mfp->path_off == 0)
+		return ((char *)"temporary");
+
+	return ((char *)R_ADDR(dbmp, mfp->path_off));
+}
+
+/*
  * __memp_debug --
  *	Display MPOOL structures.
  *
@@ -152,7 +183,7 @@ __memp_debug(dbmp, fp, data)
 	(void)fprintf(fp, "%lu process-local files\n", cnt);
 	for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
 	    dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) {
-		(void)fprintf(fp, "%s\n", dbmfp->path);
+		(void)fprintf(fp, "%s\n", __memp_fn(dbmfp));
 		__memp_pdbmf(fp, dbmfp, data);
 	}
 
@@ -285,7 +316,7 @@ __memp_pmf(fp, mfp, data)
 		return;
 
 	(void)fprintf(fp, "    %d references; %s; pagesize: %lu\n", mfp->ref,
-	    mfp->can_mmap ? "mmap" : "read/write",
+	    F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write",
 	    (u_long)mfp->stat.st_pagesize);
 }
 
diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c
index 6b2f93125c..c20e669749 100644
--- a/db2/mp/mp_region.c
+++ b/db2/mp/mp_region.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_region.c	10.16 (Sleepycat) 10/25/97";
+static const char sccsid[] = "@(#)mp_region.c	10.18 (Sleepycat) 11/29/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -224,17 +224,28 @@ retry:	if (LF_ISSET(DB_CREATE)) {
 		 * the file descriptor for locking.  However, it should not
 		 * be possible for DB_THREAD to be set if HAVE_SPINLOCKS aren't
 		 * defined.
+		 *
+		 * XXX
+		 * HP-UX won't permit mutexes to live in anything but shared
+		 * memory.  So, instantiate the shared mpool region file on
+		 * that architecture, regardless.  If this turns out to be a
+		 * performance problem, we could use anonymous memory instead.
 		 */
-		if (F_ISSET(dbmp, MP_ISPRIVATE)) {
+#if !defined(__hppa)
+		if (F_ISSET(dbmp, MP_ISPRIVATE))
 			if ((dbmp->maddr = __db_malloc(rlen)) == NULL)
 				ret = ENOMEM;
-			else
+			else {
+				F_SET(dbmp, MP_MALLOC);
 				ret = __db_rinit(dbmp->dbenv,
 				    dbmp->maddr, 0, rlen, 0);
-		} else
+			}
+		else
+#endif
 			ret = __db_rcreate(dbmp->dbenv, DB_APP_NONE, path,
-			    DB_DEFAULT_MPOOL_FILE, mode, rlen, &fd,
-			    &dbmp->maddr);
+			    DB_DEFAULT_MPOOL_FILE, mode, rlen,
+			    F_ISSET(dbmp, MP_ISPRIVATE) ? DB_TEMPORARY : 0,
+			    &fd, &dbmp->maddr);
 		if (ret == 0) {
 			/* Put the MPOOL structure first in the region. */
 			mp = dbmp->maddr;
@@ -315,7 +326,7 @@ retry:	if (LF_ISSET(DB_CREATE)) {
 	dbmp->fd = fd;
 
 	/* If we locked the region, release it now. */
-	if (!F_ISSET(dbmp, MP_ISPRIVATE))
+	if (!F_ISSET(dbmp, MP_MALLOC))
 		UNLOCKREGION(dbmp);
 	return (0);
 
@@ -339,7 +350,7 @@ int
 __memp_rclose(dbmp)
 	DB_MPOOL *dbmp;
 {
-	if (F_ISSET(dbmp, MP_ISPRIVATE)) {
+	if (F_ISSET(dbmp, MP_MALLOC)) {
 		__db_free(dbmp->maddr);
 		return (0);
 	}
diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c
index 2f042df9e1..47a7f2ebca 100644
--- a/db2/mp/mp_sync.c
+++ b/db2/mp/mp_sync.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_sync.c	10.15 (Sleepycat) 11/1/97";
+static const char sccsid[] = "@(#)mp_sync.c	10.17 (Sleepycat) 11/26/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -201,7 +201,7 @@ memp_sync(dbmp, lsnp)
 			 */
 			if (!wrote) {
 				__db_err(dbenv, "%s: unable to flush page: %lu",
-				    R_ADDR(dbmp, mfp->path_off),
+				    __memp_fns(dbmp, mfp),
 				    (u_long)bharray[next]->pgno);
 				ret = EPERM;
 				goto err;
@@ -244,16 +244,24 @@ memp_fsync(dbmfp)
 	size_t mf_offset;
 	int ar_cnt, cnt, nalloc, next, pincnt, notused, ret, wrote;
 
+	dbmp = dbmfp->dbmp;
+
 	/*
 	 * If this handle doesn't have a file descriptor that's open for
 	 * writing, or if the file is a temporary, there's no reason to
 	 * proceed further.
 	 */
-	if (F_ISSET(dbmfp, MP_READONLY | MP_PATH_TEMP))
+	if (F_ISSET(dbmfp, MP_READONLY))
 		return (0);
 
 	ret = 0;
-	dbmp = dbmfp->dbmp;
+	LOCKREGION(dbmp);
+	if (F_ISSET(dbmfp->mfp, MP_TEMP))
+		ret = 1;
+	UNLOCKREGION(dbmp);
+	if (ret)
+		return (0);
+
 	mf_offset = R_OFFSET(dbmp, dbmfp->mfp);
 
 	/*
@@ -407,18 +415,26 @@ loop:	total = mp->stat.st_page_clean + mp->stat.st_page_dirty;
 			continue;
 
 		mfp = R_ADDR(dbmp, bhp->mf_offset);
+
+		/*
+		 * We can't write to temporary files -- see the comment in
+		 * mp_bh.c:__memp_bhwrite().
+		 */
+		if (F_ISSET(mfp, MP_TEMP))
+			continue;
+
 		if ((ret =
 		    __memp_bhwrite(dbmp, mfp, bhp, &notused, &wrote)) != 0)
 			goto err;
 
 		/*
-		 * Any process syncing the shared memory buffer pool
-		 * had better be able to write to any underlying file.
-		 * Be understanding, but firm, on this point.
+		 * Any process syncing the shared memory buffer pool had better
+		 * be able to write to any underlying file.  Be understanding,
+		 * but firm, on this point.
 		 */
 		if (!wrote) {
 			__db_err(dbmp->dbenv, "%s: unable to flush page: %lu",
-			    R_ADDR(dbmp, mfp->path_off), (u_long)bhp->pgno);
+			    __memp_fns(dbmp, mfp), (u_long)bhp->pgno);
 			ret = EPERM;
 			goto err;
 		}
diff --git a/db2/mutex/README b/db2/mutex/README
index 30d6b6a7d1..fceeef7ed8 100644
--- a/db2/mutex/README
+++ b/db2/mutex/README
@@ -1,4 +1,4 @@
-# @(#)README	10.1 (Sleepycat) 4/12/97
+# @(#)README	10.2 (Sleepycat) 11/25/97
 
 Resource locking routines: lock based on a db_mutex_t.  All this gunk
 (including trying to make assembly code portable), is necessary because
@@ -11,9 +11,9 @@ information.
 
 If HAVE_SPINLOCKS is defined (i.e. we know how to do test-and-sets for
 this compiler/architecture combination), we try and lock the resource tsl
-TSL_DEFAULT_SPINS times.  If we can't acquire the lock that way, we use
-a system call to sleep for 10ms, 20ms, 40ms, etc.  (The time is bounded
-at 1 second, just in case.)  Using the timer backoff means that there are
+__db_tsl_spins times.  If we can't acquire the lock that way, we use a
+system call to sleep for 10ms, 20ms, 40ms, etc.  (The time is bounded at
+1 second, just in case.)  Using the timer backoff means that there are
 two assumptions: that locks are held for brief periods (never over system
 calls or I/O) and that locks are not hotly contested.
 
diff --git a/db2/mutex/mutex.c b/db2/mutex/mutex.c
index 7c8ea6ebd1..6e87c5f215 100644
--- a/db2/mutex/mutex.c
+++ b/db2/mutex/mutex.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mutex.c	10.28 (Sleepycat) 10/31/97";
+static const char sccsid[] = "@(#)mutex.c	10.29 (Sleepycat) 11/25/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -101,12 +101,6 @@ static const char sccsid[] = "@(#)mutex.c	10.28 (Sleepycat) 10/31/97";
 
 #endif /* HAVE_SPINLOCKS */
 
-#ifdef	MORE_THAN_ONE_PROCESSOR
-#define	TSL_DEFAULT_SPINS	5	/* Default spins before block. */
-#else
-#define	TSL_DEFAULT_SPINS	1	/* Default spins before block. */
-#endif
-
 /*
  * __db_mutex_init --
  *	Initialize a DB mutex structure.
@@ -130,6 +124,7 @@ __db_mutex_init(mp, off)
 
 #ifdef HAVE_SPINLOCKS
 	TSL_INIT(&mp->tsl_resource);
+	mp->spins = __os_spin();
 #else
 	mp->off = off;
 #endif
@@ -155,11 +150,8 @@ __db_mutex_lock(mp, fd)
 	int nspins;
 
 	for (usecs = MS(10);;) {
-		/*
-		 * Try and acquire the uncontested resource lock for
-		 * TSL_DEFAULT_SPINS.
-		 */
-		for (nspins = TSL_DEFAULT_SPINS; nspins > 0; --nspins)
+		/* Try and acquire the uncontested resource lock for N spins. */
+		for (nspins = mp->spins; nspins > 0; --nspins)
 			if (TSL_SET(&mp->tsl_resource)) {
 #ifdef DEBUG
 				if (mp->pid != 0) {
diff --git a/db2/os/os_func.c b/db2/os/os_config.c
index afd40f4624..ecb4f1c2e7 100644
--- a/db2/os/os_func.c
+++ b/db2/os/os_config.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_func.c	10.4 (Sleepycat) 10/28/97";
+static const char sccsid[] = "@(#)os_config.c	10.9 (Sleepycat) 11/28/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -31,7 +31,6 @@ static const char sccsid[] = "@(#)os_func.c	10.4 (Sleepycat) 10/28/97";
 #define imported
 #endif
 
-imported extern void    *calloc __P((size_t, size_t));
 imported extern int	 close __P((int));
 imported extern void	 free __P((void *));
 imported extern int	 fsync __P((int));
@@ -42,16 +41,16 @@ imported extern char	*strdup __P((const char *));
 imported extern void    *realloc __P((void *, size_t));
 imported extern int	 unlink __P((const char *));
 imported extern ssize_t	 write __P((int, const void *, size_t));
+imported extern void	*memset __P((void *, int, size_t));
 
 /*
  * __db_jump --
  *	This list of interfaces that applications can replace.  In some
  *	cases, the user is permitted to replace the standard ANSI C or
- *	POSIX 1003.1 call, e.g., calloc or read.  In others, we provide
+ *	POSIX 1003.1 call, e.g., malloc or read.  In others, we provide
  *	a local interface to the functionality, e.g., __os_map.
  */
 struct __db_jumptab __db_jump = {
-	calloc,				/* DB_FUNC_CALLOC */
 	close,				/* DB_FUNC_CLOSE */
 	__os_dirfree,			/* DB_FUNC_DIRFREE */
 	__os_dirlist,			/* DB_FUNC_DIRLIST */
@@ -73,9 +72,11 @@ struct __db_jumptab __db_jump = {
 	NULL				/* DB_FUNC_YIELD */
 };
 
+int __db_tsl_spins;			/* DB_TSL_SPINS */
+
 /*
  * db_jump_set --
- *	Replace an interface.
+ *	Replace functions for the DB package.
  */
 int
 db_jump_set(func, which)
@@ -84,70 +85,148 @@ db_jump_set(func, which)
 {
 	switch (which) {
 	case DB_FUNC_CALLOC:
-		__db_calloc = (void *(*) __P((size_t, size_t)))func;
-		break;
+		/*
+		 * XXX
+		 * Obsolete, calloc is no longer called by DB.
+		 */
+		 break;
 	case DB_FUNC_CLOSE:
-		__os_close = (int (*) __P((int)))func;
+		__db_jump.db_close = (int (*) __P((int)))func;
 		break;
 	case DB_FUNC_DIRFREE:
-		__db_dirfree = (void (*) __P((char **, int)))func;
+		__db_jump.db_dirfree = (void (*) __P((char **, int)))func;
 		break;
 	case DB_FUNC_DIRLIST:
-		__db_dirlist =
+		__db_jump.db_dirlist =
 		    (int (*) __P((const char *, char ***, int *)))func;
 		break;
 	case DB_FUNC_EXISTS:
-		__db_exists = (int (*) __P((const char *, int *)))func;
+		__db_jump.db_exists = (int (*) __P((const char *, int *)))func;
 		break;
 	case DB_FUNC_FREE:
-		__db_free = (void (*) __P((void *)))func;
+		__db_jump.db_free = (void (*) __P((void *)))func;
 		break;
 	case DB_FUNC_FSYNC:
-		__os_fsync = (int (*) __P((int)))func;
+		__db_jump.db_fsync = (int (*) __P((int)))func;
 		break;
 	case DB_FUNC_IOINFO:
-		__db_ioinfo =
+		__db_jump.db_ioinfo =
 		    (int (*) __P((const char *, int, off_t *, off_t *)))func;
 		break;
 	case DB_FUNC_MALLOC:
-		__db_malloc = (void *(*) __P((size_t)))func;
+		__db_jump.db_malloc = (void *(*) __P((size_t)))func;
 		break;
 	case DB_FUNC_MAP:
-		__db_map = (int (*) __P((int, size_t, int, int, void **)))func;
+		__db_jump.db_map =
+		    (int (*) __P((int, size_t, int, int, void **)))func;
 		break;
 	case DB_FUNC_OPEN:
-		__os_open = (int (*) __P((const char *, int, ...)))func;
+		__db_jump.db_open = (int (*) __P((const char *, int, ...)))func;
 		break;
 	case DB_FUNC_READ:
-		__os_read = (ssize_t (*) __P((int, void *, size_t)))func;
+		__db_jump.db_read =
+		    (ssize_t (*) __P((int, void *, size_t)))func;
 		break;
 	case DB_FUNC_REALLOC:
-		__db_realloc = (void *(*) __P((void *, size_t)))func;
+		__db_jump.db_realloc = (void *(*) __P((void *, size_t)))func;
 		break;
 	case DB_FUNC_SEEK:
-		__db_seek =
+		__db_jump.db_seek =
 		    (int (*) __P((int, size_t, db_pgno_t, u_long, int)))func;
 		break;
 	case DB_FUNC_SLEEP:
-		__db_sleep = (int (*) __P((u_long, u_long)))func;
+		__db_jump.db_sleep = (int (*) __P((u_long, u_long)))func;
 		break;
 	case DB_FUNC_STRDUP:
-		__db_strdup = (char *(*) __P((const char *)))func;
+		__db_jump.db_strdup = (char *(*) __P((const char *)))func;
 		break;
 	case DB_FUNC_UNLINK:
-		__os_unlink = (int (*) __P((const char *)))func;
+		__db_jump.db_unlink = (int (*) __P((const char *)))func;
 		break;
 	case DB_FUNC_UNMAP:
-		__db_unmap = (int (*) __P((void *, size_t)))func;
+		__db_jump.db_unmap = (int (*) __P((void *, size_t)))func;
 		break;
 	case DB_FUNC_WRITE:
-		__os_write = (ssize_t (*) __P((int, const void *, size_t)))func;
+		__db_jump.db_write =
+		    (ssize_t (*) __P((int, const void *, size_t)))func;
 		break;
 	case DB_FUNC_YIELD:
-		__db_yield = (int (*) __P((void)))func;
+		__db_jump.db_yield = (int (*) __P((void)))func;
+		break;
+	default:
+		return (EINVAL);
+	}
+	return (0);
+}
+
+/*
+ * db_value_set --
+ *	Replace values for the DB package.
+ */
+int
+db_value_set(value, which)
+	int value, which;
+{
+	switch (which) {
+	case DB_TSL_SPINS:
+		if (value <= 0)
+			return (EINVAL);
+		__db_tsl_spins = value;
 		break;
 	default:
 		return (EINVAL);
 	}
 	return (0);
 }
+
+/*
+ * XXX
+ * Correct for systems that return NULL when you allocate 0 bytes of memory.
+ * There are several places in DB where we allocate the number of bytes held
+ * by the key/data item, and it can be 0.  Correct here so that malloc never
+ * returns a NULL for that reason.
+ */
+/*
+ * __db_calloc --
+ *	The calloc(3) function for DB.
+ *
+ * PUBLIC: void *__db_calloc __P((size_t, size_t));
+ */
+void *
+__db_calloc(num, size)
+	size_t num, size;
+{
+	void *p;
+
+	size *= num;
+	if ((p = __db_jump.db_malloc(size == 0 ? 1 : size)) != NULL)
+		memset(p, 0, size);
+	return (p);
+}
+
+/*
+ * __db_malloc --
+ *	The malloc(3) function for DB.
+ *
+ * PUBLIC: void *__db_malloc __P((size_t));
+ */
+void *
+__db_malloc(size)
+	size_t size;
+{
+	return (__db_jump.db_malloc(size == 0 ? 1 : size));
+}
+
+/*
+ * __db_realloc --
+ *	The realloc(3) function for DB.
+ *
+ * PUBLIC: void *__db_realloc __P((void *, size_t));
+ */
+void *
+__db_realloc(ptr, size)
+	void *ptr;
+	size_t size;
+{
+	return (__db_jump.db_realloc(ptr, size == 0 ? 1 : size));
+}
diff --git a/db2/os/os_open.c b/db2/os/os_open.c
index 05784e4810..a628765556 100644
--- a/db2/os/os_open.c
+++ b/db2/os/os_open.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)os_open.c	10.19 (Sleepycat) 10/28/97";
+static const char sccsid[] = "@(#)os_open.c	10.20 (Sleepycat) 11/27/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -38,6 +38,11 @@ __db_open(name, arg_flags, ok_flags, mode, fdp)
 		return (EINVAL);
 
 	flags = 0;
+
+	/*
+	 * DB requires the semantic that two files opened at the same time
+	 * with O_CREAT and O_EXCL set will return failure in at least one.
+	 */
 	if (arg_flags & DB_CREATE)
 		flags |= O_CREAT;
 
diff --git a/db2/os/os_spin.c b/db2/os/os_spin.c
new file mode 100644
index 0000000000..fb693c2848
--- /dev/null
+++ b/db2/os/os_spin.c
@@ -0,0 +1,56 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)os_spin.c	10.3 (Sleepycat) 11/25/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <limits.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * __os_spin --
+ *	Return the number of default spins before blocking.
+ *
+ * PUBLIC: int __os_spin __P((void));
+ */
+int
+__os_spin()
+{
+	extern int __db_tsl_spins;
+
+	/* If the application specified the spins, use its value. */
+	if (__db_tsl_spins != 0)
+		return (__db_tsl_spins);
+
+	/*
+	 * XXX
+	 * Sysconf: Solaris uses _SC_NPROCESSORS_ONLN to return the number
+	 * of online processors.  I don't know if this call is portable or
+	 * not.
+	 */
+#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
+	{
+		long sys_val;
+
+		sys_val = sysconf(_SC_NPROCESSORS_ONLN);
+		if (sys_val > 0)
+			return (sys_val * 50);
+	}
+#endif
+
+	/* Default to a single processor. */
+	return (1);
+}
diff --git a/db2/txn/txn.c b/db2/txn/txn.c
index 55423f0470..e7a1798350 100644
--- a/db2/txn/txn.c
+++ b/db2/txn/txn.c
@@ -43,7 +43,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)txn.c	10.35 (Sleepycat) 11/2/97";
+static const char sccsid[] = "@(#)txn.c	10.37 (Sleepycat) 11/28/97";
 #endif /* not lint */
 
 
@@ -101,11 +101,9 @@ __txn_create(dbenv, path, mode)
 	maxtxns = dbenv->tx_max != 0 ? dbenv->tx_max : 1000;
 	(void)time(&now);
 
-	ret = __db_rcreate(dbenv, DB_APP_NONE, path,
-	    DEFAULT_TXN_FILE, mode, TXN_REGION_SIZE(maxtxns), &fd, &txn_region);
-
 	/* Region may have existed.  If it didn't, the open will fail. */
-	if (ret != 0)
+	if ((ret = __db_rcreate(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE,
+	    mode, TXN_REGION_SIZE(maxtxns), 0, &fd, &txn_region)) != 0)
 		return (ret);
 
 	txn_region->magic = DB_TXNMAGIC;
@@ -315,7 +313,10 @@ err:
 	return (ret);
 }
 
-/* The db_txn(3) man page describes txn_commit. */
+/*
+ * txn_commit --
+ *	Commit a transaction.
+ */
 int
 txn_commit(txnp)
 	DB_TXN *txnp;
@@ -337,7 +338,10 @@ txn_commit(txnp)
 	return (__txn_end(txnp, 1));
 }
 
-/* The db_txn(3) man page describes txn_abort. */
+/*
+ * txn_abort --
+ *	Abort a transcation.
+ */
 int
 txn_abort(txnp)
 	DB_TXN *txnp;
@@ -395,8 +399,8 @@ txn_id(txnp)
 }
 
 /*
- * The db_txn(3) man page describes txn_close. Currently the caller should
- * arrange a checkpoint before calling txn_close.
+ * txn_close --
+ *	Close the transaction region, does not imply a checkpoint.
  */
 int
 txn_close(tmgrp)
@@ -439,8 +443,8 @@ txn_close(tmgrp)
 }
 
 /*
- * The db_txn(3) man page describes txn_unlink.  Right now it is up to
- * txn_close to write the final checkpoint record.
+ * txn_unlink --
+ *	Remove the transaction region.
  */
 int
 txn_unlink(path, force, dbenv)
@@ -666,12 +670,19 @@ do_ckp:
 	mgr->region->pending_ckp = ckp_lsn;
 	UNLOCK_TXNREGION(mgr);
 
-	ret = memp_sync(mgr->dbenv->mp_info, &ckp_lsn);
-	if (ret > 0) {
-		__db_err(mgr->dbenv,
-		    "txn_checkpoint: system failure in memp_sync %s\n",
-		    strerror(ret));
-	} else if (ret == 0 && mgr->dbenv->lg_info != NULL) {
+	if (mgr->dbenv->mp_info != NULL &&
+	    (ret = memp_sync(mgr->dbenv->mp_info, &ckp_lsn)) != 0) {
+		/*
+		 * ret < 0 means that there are still buffers to flush;
+		 * the checkpoint is not complete. Back off and try again.
+		 */
+		if (ret > 0)
+			__db_err(mgr->dbenv,
+			    "txn_checkpoint: system failure in memp_sync %s\n",
+			    strerror(ret));
+		return (ret);
+	}
+	if (mgr->dbenv->lg_info != NULL) {
 		LOCK_TXNREGION(mgr);
 		last_ckp = mgr->region->last_ckp;
 		ZERO_LSN(mgr->region->pending_ckp);
@@ -691,11 +702,7 @@ do_ckp:
 		(void)time(&mgr->region->time_ckp);
 		UNLOCK_TXNREGION(mgr);
 	}
-	/*
-	 * ret < 0 means that there are still buffers to flush; the
-	 * checkpoint is not complete. Back off and try again.
-	 */
-	return (ret);
+	return (0);
 }
 
 /*
diff --git a/db2/txn/txn_auto.c b/db2/txn/txn_auto.c
index 9edbc03eab..38627466a8 100644
--- a/db2/txn/txn_auto.c
+++ b/db2/txn/txn_auto.c
@@ -73,7 +73,6 @@ int __txn_regop_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __txn_regop_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __txn_regop_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;
@@ -202,7 +201,6 @@ int __txn_ckp_log(logp, txnid, ret_lsnp, flags,
  * PUBLIC: int __txn_ckp_print
  * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
  */
-
 int
 __txn_ckp_print(notused1, dbtp, lsnp, notused3, notused4)
 	DB_LOG *notused1;