about summary refs log tree commit diff
path: root/db2/mp
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1998-06-09 15:16:55 +0000
committerUlrich Drepper <drepper@redhat.com>1998-06-09 15:16:55 +0000
commitbf7997b65c7887d2acda95f5201d818a19d81711 (patch)
treeda3583de3a0b5892f90a4b1eb773a87b554ae37e /db2/mp
parent7646e67e6cc4c738a7b402c60fed39d52db0433b (diff)
downloadglibc-bf7997b65c7887d2acda95f5201d818a19d81711.tar.gz
glibc-bf7997b65c7887d2acda95f5201d818a19d81711.tar.xz
glibc-bf7997b65c7887d2acda95f5201d818a19d81711.zip
Update.
1998-06-09  Ulrich Drepper  <drepper@cygnus.com>

	* sysdeps/unix/sysv/linux/netinet/ip.h (struct ip_options): Define
	__data member only for gcc.  Reported by ak@muc.de.

	* misc/mntent.h: Undo last patch.
	* sysdeps/unix/sysv/linux/fstatvfs.c (fstatvfs): Undo last patch.
	* misc/tst/mntent.c: Adjust code for this change.

	* io/fts.c: Updated from a slightly more recent BSD version.
	* io/fts.h: Likewise.

	* libc.map: Add __libc_stack_end.

	* db2/Makefile (routines): Add lock_region.
	* db2/config.h: Update from db-2.4.14.
	* db2/db.h: Likewise.
	* db2/db_185.h: Likewise.
	* db2/db_int.h: Likewise.
	* db2/bt_close.c: Likewise.
	* db2/bt_compare.c: Likewise.
	* db2/bt_conv.c: Likewise.
	* db2/bt_cursor.c: Likewise.
	* db2/bt_delete.c: Likewise.
	* db2/bt_open.c: Likewise.
	* db2/bt_page.c: Likewise.
	* db2/bt_put.c: Likewise.
	* db2/bt_rec.c: Likewise.
	* db2/bt_recno.c: Likewise.
	* db2/bt_rsearch.c: Likewise.
	* db2/bt_search.c: Likewise.
	* db2/bt_split.c: Likewise.
	* db2/bt_stat.c: Likewise.
	* db2/btree.src: Likewise.
	* db2/btree_auto.c: Likewise.
	* db2/getlong.c: Likewise.
	* db2/db_appinit.c: Likewise.
	* db2/db_apprec.c: Likewise.
	* db2/db_byteorder.c: Likewise.
	* db2/db_err.c: Likewise.
	* db2/db_log2.c: Likewise.
	* db2/db_region.c: Likewise.
	* db2/db_salloc.c: Likewise.
	* db2/db_shash.c: Likewise.
	* db2/db.c: Likewise.
	* db2/db.src: Likewise.
	* db2/db_auto.c: Likewise.
	* db2/db_conv.c: Likewise.
	* db2/db_dispatch.c: Likewise.
	* db2/db_dup.c: Likewise.
	* db2/db_overflow.c: Likewise.
	* db2/db_pr.c: Likewise.
	* db2/db_rec.c: Likewise.
	* db2/db_ret.c: Likewise.
	* db2/db_thread.c: Likewise.
	* db2/db185.c: Likewise.
	* db2/db185_int.h: Likewise.
	* db2/dbm.c: Likewise.
	* db2/hash.c: Likewise.
	* db2/hash.src: Likewise.
	* db2/hash_auto.c: Likewise.
	* db2/hash_conv.c: Likewise.
	* db2/hash_debug.c: Likewise.
	* db2/hash_dup.c: Likewise.
	* db2/hash_func.c: Likewise.
	* db2/hash_page.c: Likewise.
	* db2/hash_rec.c: Likewise.
	* db2/hash_stat.c: Likewise.
	* db2/btree.h: Likewise.
	* db2/btree_ext.h: Likewise.
	* db2/clib_ext.h: Likewise.
	* db2/common_ext.h: Likewise.
	* db2/cxx_int.h: Likewise.
	* db2/db.h.src: Likewise.
	* db2/db_185.h.src: Likewise.
	* db2/db_am.h: Likewise.
	* db2/db_auto.h: Likewise.
	* db2/db_cxx.h: Likewise.
	* db2/db_dispatch.h: Likewise.
	* db2/db_ext.h: Likewise.
	* db2/db_int.h.src: Likewise.
	* db2/db_page.h: Likewise.
	* db2/db_shash.h: Likewise.
	* db2/db_swap.h: Likewise.
	* db2/hash.h: Likewise.
	* db2/hash_ext.h: Likewise.
	* db2/lock.h: Likewise.
	* db2/lock_ext.h: Likewise.
	* db2/log.h: Likewise.
	* db2/log_ext.h: Likewise.
	* db2/mp.h: Likewise.
	* db2/mp_ext.h: Likewise.
	* db2/mutex_ext.h: Likewise.
	* db2/os_ext.h: Likewise.
	* db2/os_func.h: Likewise.
	* db2/queue.h: Likewise.
	* db2/shqueue.h: Likewise.
	* db2/txn.h: Likewise.
	* db2/lock.c: Likewise.
	* db2/lock_conflict.c: Likewise.
	* db2/lock_deadlock.c: Likewise.
	* db2/lock_region.c: Likewise.
	* db2/lock_util.c: Likewise.
	* db2/log.c: Likewise.
	* db2/log.src: Likewise.
	* db2/log_archive.c: Likewise.
	* db2/log_auto.c: Likewise.
	* db2/log_compare.c: Likewise.
	* db2/log_findckp.c: Likewise.
	* db2/log_get.c: Likewise.
	* db2/log_put.c: Likewise.
	* db2/log_rec.c: Likewise.
	* db2/log_register.c: Likewise.
	* db2/mp_bh.c: Likewise.
	* db2/mp_fget.c: Likewise.
	* db2/mp_fopen.c: Likewise.
	* db2/mp_fput.c: Likewise.
	* db2/mp_fset.c: Likewise.
	* db2/mp_open.c: Likewise.
	* db2/mp_pr.c: Likewise.
	* db2/mp_region.c: Likewise.
	* db2/mp_sync.c: Likewise.
	* db2/68020.gcc: Likewise.
	* db2/mutex.c: Likewise.
	* db2/parisc.gcc: Likewise.
	* db2/parisc.hp: Likewise.
	* db2/sco.cc: Likewise.
	* db2/os_abs.c: Likewise.
	* db2/os_alloc.c: Likewise.
	* db2/os_config.c: Likewise.
	* db2/os_dir.c: Likewise.
	* db2/os_fid.c: Likewise.
	* db2/os_fsync.c: Likewise.
	* db2/os_map.c: Likewise.
	* db2/os_oflags.c: Likewise.
	* db2/os_open.c: Likewise.
	* db2/os_rpath.c: Likewise.
	* db2/os_rw.c: Likewise.
	* db2/os_seek.c: Likewise.
	* db2/os_sleep.c: Likewise.
	* db2/os_spin.c: Likewise.
	* db2/os_stat.c: Likewise.
	* db2/os_unlink.c: Likewise.
	* db2/db_archive.c: Likewise.
	* db2/db_checkpoint.c: Likewise.
	* db2/db_deadlock.c: Likewise.
	* db2/db_dump.c: Likewise.
	* db2/db_dump185.c: Likewise.
	* db2/db_load.c: Likewise.
	* db2/db_printlog.c: Likewise.
	* db2/db_recover.c: Likewise.
	* db2/db_stat.c: Likewise.
	* db2/txn.c: Likewise.
	* db2/txn.src: Likewise.
	* db2/txn_auto.c: Likewise.
	* db2/txn_rec.c: Likewise.

	* elf/rtld.c: Move definition of __libc_stack_end to ...
	* sysdeps/generic/dl-sysdep.h: ...here.

	* sysdeps/unix/sysv/linux/fstatvfs.c: Handle nodiratime option.
	* sysdeps/unix/sysv/linux/bits/statvfs.h: Define ST_NODIRATIME.
	* sysdeps/unix/sysv/linux/sys/mount.h: Define MS_NODIRATIME.

1998-06-08 21:44  Ulrich Drepper  <drepper@cygnus.com>

	* sysdeps/unix/sysv/linux/fstatvfs.c: Handle constant option string
	from mntent correctly.

1998-06-06  Andreas Jaeger  <aj@arthur.rhein-neckar.de>

	* sunrpc/Makefile (generated): Correct typo.

1998-06-04  Philip Blundell  <philb@gnu.org>

	* elf/elf.h (EM_ARM, et al.): New definitions.
	* sysdeps/arm/dl-machine.h: Update for new draft ARM ELF ABI.
Diffstat (limited to 'db2/mp')
-rw-r--r--db2/mp/mp_bh.c79
-rw-r--r--db2/mp/mp_fget.c359
-rw-r--r--db2/mp/mp_fopen.c128
-rw-r--r--db2/mp/mp_fput.c64
-rw-r--r--db2/mp/mp_fset.c8
-rw-r--r--db2/mp/mp_open.c41
-rw-r--r--db2/mp/mp_pr.c294
-rw-r--r--db2/mp/mp_region.c229
-rw-r--r--db2/mp/mp_sync.c74
9 files changed, 640 insertions, 636 deletions
diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c
index c23abdda24..d89f9c2ded 100644
--- a/db2/mp/mp_bh.c
+++ b/db2/mp/mp_bh.c
@@ -1,13 +1,13 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_bh.c	10.28 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)mp_bh.c	10.38 (Sleepycat) 5/20/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -59,8 +59,10 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
 	    dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q))
 		if (dbmfp->mfp == mfp) {
 			if (F_ISSET(dbmfp, MP_READONLY) &&
-			    __memp_upgrade(dbmp, dbmfp, mfp))
+			    __memp_upgrade(dbmp, dbmfp, mfp)) {
+				UNLOCKHANDLE(dbmp, dbmp->mutexp);
 				return (0);
+			}
 			break;
 		}
 	UNLOCKHANDLE(dbmp, dbmp->mutexp);
@@ -111,8 +113,8 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
 	if (F_ISSET(mfp, MP_TEMP))
 		return (0);
 
-	if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off), mfp->ftype,
-	    0, 0, mfp->stat.st_pagesize, 0, NULL, NULL, 0, &dbmfp) != 0)
+	if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off),
+	    0, 0, mfp->stat.st_pagesize, 0, NULL, &dbmfp) != 0)
 		return (0);
 
 found:	return (__memp_pgwrite(dbmfp, bhp, restartp, wrotep));
@@ -152,7 +154,7 @@ __memp_pgread(dbmfp, bhp, can_create)
 	ret = 0;
 	LOCKHANDLE(dbmp, dbmfp->mutexp);
 	if (dbmfp->fd == -1 || (ret =
-	    __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) {
+	    __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, 0, SEEK_SET)) != 0) {
 		if (!can_create) {
 			if (dbmfp->fd == -1)
 				ret = EINVAL;
@@ -164,8 +166,17 @@ __memp_pgread(dbmfp, bhp, can_create)
 		}
 		UNLOCKHANDLE(dbmp, dbmfp->mutexp);
 
-		/* Clear any uninitialized data. */
-		memset(bhp->buf, 0, pagesize);
+		/* Clear the created page. */
+		if (mfp->clear_len == 0)
+			memset(bhp->buf, 0, pagesize);
+		else {
+			memset(bhp->buf, 0, mfp->clear_len);
+#ifdef DIAGNOSTIC
+			memset(bhp->buf + mfp->clear_len,
+			    0xff, pagesize - mfp->clear_len);
+#endif
+		}
+
 		goto pgin;
 	}
 
@@ -186,8 +197,16 @@ __memp_pgread(dbmfp, bhp, can_create)
 			goto err;
 		}
 
-		/* Clear any uninitialized data. */
-		memset(bhp->buf + nr, 0, pagesize - nr);
+		/*
+		 * If we didn't fail until we tried the read, don't clear the
+		 * whole page, it wouldn't be insane for a filesystem to just
+		 * always behave that way.  Else, clear any uninitialized data.
+		 */
+		if (nr == 0)
+			memset(bhp->buf, 0,
+			    mfp->clear_len == 0 ? pagesize : mfp->clear_len);
+		else
+			memset(bhp->buf + nr, 0, pagesize - nr);
 	}
 
 	/* Call any pgin function. */
@@ -308,31 +327,31 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 
 	/* Temporary files may not yet have been created. */
 	LOCKHANDLE(dbmp, dbmfp->mutexp);
-	if (dbmfp->fd == -1)
-		if ((ret = __db_appname(dbenv, DB_APP_TMP,
-		    NULL, NULL, &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1) {
-			UNLOCKHANDLE(dbmp, dbmfp->mutexp);
-			__db_err(dbenv,
-			    "unable to create temporary backing file");
-			goto err;
-		}
+	if (dbmfp->fd == -1 &&
+	    ((ret = __db_appname(dbenv, DB_APP_TMP, NULL, NULL,
+	    DB_CREATE | DB_EXCL | DB_TEMPORARY, &dbmfp->fd, NULL)) != 0 ||
+	    dbmfp->fd == -1)) {
+		UNLOCKHANDLE(dbmp, dbmfp->mutexp);
+		__db_err(dbenv, "unable to create temporary backing file");
+		goto err;
+	}
 
-	/* Write the page out. */
-	if ((ret = __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0)
+	/*
+	 * Write the page out.
+	 *
+	 * XXX
+	 * Shut the compiler up; it doesn't understand the correlation between
+	 * the failing clauses to __db_lseek and __db_write and this ret != 0.
+	 */
+	COMPQUIET(fail, NULL);
+	if ((ret =
+	    __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, 0, SEEK_SET)) != 0)
 		fail = "seek";
 	else if ((ret = __db_write(dbmfp->fd, bhp->buf, pagesize, &nw)) != 0)
 		fail = "write";
 	UNLOCKHANDLE(dbmp, dbmfp->mutexp);
-	if (ret != 0) {
-		/*
-		 * XXX
-		 * Shut the compiler up; it doesn't understand the correlation
-		 * between the failing clauses to __db_lseek and __db_write and
-		 * this ret != 0.
-		 */
-		COMPQUIET(fail, NULL);
+	if (ret != 0)
 		goto syserr;
-	}
 
 	if (nw != (ssize_t)pagesize) {
 		ret = EIO;
@@ -548,7 +567,7 @@ __memp_upgrade(dbmp, dbmfp, mfp)
 	 * way we could have gotten a file descriptor of any kind.
 	 */
 	if ((ret = __db_appname(dbmp->dbenv, DB_APP_DATA,
-	    NULL, R_ADDR(dbmp, mfp->path_off), NULL, &rpath)) != 0)
+	    NULL, R_ADDR(dbmp, mfp->path_off), 0, NULL, &rpath)) != 0)
 		return (ret);
 	if (__db_open(rpath, 0, 0, 0, &fd) != 0) {
 		F_SET(dbmfp, MP_UPGRADE_FAIL);
diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c
index f5955c4c6f..c8ae2e9d98 100644
--- a/db2/mp/mp_fget.c
+++ b/db2/mp/mp_fget.c
@@ -1,21 +1,19 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fget.c	10.33 (Sleepycat) 12/2/97";
+static const char sccsid[] = "@(#)mp_fget.c	10.48 (Sleepycat) 6/2/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <stdlib.h>
 #include <string.h>
 #endif
 
@@ -25,8 +23,6 @@ static const char sccsid[] = "@(#)mp_fget.c	10.33 (Sleepycat) 12/2/97";
 #include "mp.h"
 #include "common_ext.h"
 
-int __sleep_on_every_page_get;		/* XXX: thread debugging option. */
-
 /*
  * memp_fget --
  *	Get a page from the file.
@@ -35,7 +31,7 @@ int
 memp_fget(dbmfp, pgnoaddr, flags, addrp)
 	DB_MPOOLFILE *dbmfp;
 	db_pgno_t *pgnoaddr;
-	int flags;
+	u_int32_t flags;
 	void *addrp;
 {
 	BH *bhp;
@@ -43,11 +39,12 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 	MPOOL *mp;
 	MPOOLFILE *mfp;
 	size_t bucket, mf_offset;
-	u_long cnt;
-	int b_incr, b_inserted, readonly_alloc, ret;
-	void *addr;
+	u_int32_t st_hsearch;
+	int b_incr, first, ret;
 
 	dbmp = dbmfp->dbmp;
+	mp = dbmp->mp;
+	mfp = dbmfp->mfp;
 
 	/*
 	 * Validate arguments.
@@ -79,32 +76,62 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 		}
 	}
 
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
 	/*
 	 * XXX
 	 * We want to switch threads as often as possible.  Sleep every time
 	 * we get a new page to make it more likely.
 	 */
-	if (__sleep_on_every_page_get &&
+	if (DB_GLOBAL(db_pageyield) &&
 	    (__db_yield == NULL || __db_yield() != 0))
 		__db_sleep(0, 1);
 #endif
 
-	mp = dbmp->mp;
-	mfp = dbmfp->mfp;
+	/* Initialize remaining local variables. */
 	mf_offset = R_OFFSET(dbmp, mfp);
-	addr = NULL;
 	bhp = NULL;
-	b_incr = b_inserted = ret = 0;
+	st_hsearch = 0;
+	b_incr = ret = 0;
+
+	/* Determine the hash bucket where this page will live. */
+	bucket = BUCKET(mp, mf_offset, *pgnoaddr);
 
 	LOCKREGION(dbmp);
 
 	/*
-	 * If mmap'ing the file, just return a pointer.  However, if another
-	 * process has opened the file for writing since we mmap'd it, start
-	 * playing the game by their rules, i.e. everything goes through the
-	 * cache.  All pages previously returned should be safe, as long as
-	 * a locking protocol was observed.
+	 * Check for the last or last + 1 page requests.
+	 *
+	 * Examine and update the file's last_pgno value.  We don't care if
+	 * the last_pgno value immediately changes due to another thread --
+	 * at this instant in time, the value is correct.  We do increment the
+	 * current last_pgno value if the thread is asking for a new page,
+	 * however, to ensure that two threads creating pages don't get the
+	 * same one.
+	 */
+	if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) {
+		if (LF_ISSET(DB_MPOOL_NEW))
+			++mfp->last_pgno;
+		*pgnoaddr = mfp->last_pgno;
+		bucket = BUCKET(mp, mf_offset, mfp->last_pgno);
+
+		if (LF_ISSET(DB_MPOOL_NEW))
+			goto alloc;
+	}
+
+	/*
+	 * If mmap'ing the file and the page is not past the end of the file,
+	 * just return a pointer.
+	 *
+	 * The page may be past the end of the file, so check the page number
+	 * argument against the original length of the file.  If we previously
+	 * returned pages past the original end of the file, last_pgno will
+	 * have been updated to match the "new" end of the file, and checking
+	 * against it would return pointers past the end of the mmap'd region.
+	 *
+	 * If another process has opened the file for writing since we mmap'd
+	 * it, we will start playing the game by their rules, i.e. everything
+	 * goes through the cache.  All pages previously returned will be safe,
+	 * as long as the correct locking protocol was observed.
 	 *
 	 * XXX
 	 * We don't discard the map because we don't know when all of the
@@ -112,203 +139,180 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
 	 * It would be possible to do so by reference counting the open
 	 * pages from the mmap, but it's unclear to me that it's worth it.
 	 */
-	if (dbmfp->addr != NULL && F_ISSET(dbmfp->mfp, MP_CAN_MMAP)) {
-		readonly_alloc = 0;
-		if (LF_ISSET(DB_MPOOL_LAST))
-			*pgnoaddr = mfp->last_pgno;
-		else {
+	if (dbmfp->addr != NULL && F_ISSET(mfp, MP_CAN_MMAP))
+		if (*pgnoaddr > mfp->orig_last_pgno) {
 			/*
 			 * !!!
-			 * Allocate a page that can never really exist.  See
-			 * the comment above about non-existent pages and the
-			 * hash access method.
+			 * See the comment above about non-existent pages and
+			 * the hash access method.
 			 */
-			if (LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))
-				readonly_alloc = 1;
-			else if (*pgnoaddr > mfp->last_pgno) {
+			if (!LF_ISSET(DB_MPOOL_CREATE)) {
 				__db_err(dbmp->dbenv,
 				    "%s: page %lu doesn't exist",
 				    __memp_fn(dbmfp), (u_long)*pgnoaddr);
 				ret = EINVAL;
 				goto err;
 			}
-		}
-		if (!readonly_alloc) {
-			addr = R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
-
+		} else {
+			*(void **)addrp =
+			    R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
 			++mp->stat.st_map;
 			++mfp->stat.st_map;
+			goto done;
+		}
 
-			goto mapret;
+	/* Search the hash chain for the page. */
+	for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
+	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
+		++st_hsearch;
+		if (bhp->pgno != *pgnoaddr || bhp->mf_offset != mf_offset)
+			continue;
+
+		/* Increment the reference count. */
+		if (bhp->ref == UINT16_T_MAX) {
+			__db_err(dbmp->dbenv,
+			    "%s: page %lu: reference count overflow",
+			    __memp_fn(dbmfp), (u_long)bhp->pgno);
+			ret = EINVAL;
+			goto err;
 		}
-	}
 
-	/* Check if requesting the last page or a new page. */
-	if (LF_ISSET(DB_MPOOL_LAST))
-		*pgnoaddr = mfp->last_pgno;
+		/*
+		 * Increment the reference count.  We may discard the region
+		 * lock as we evaluate and/or read the buffer, so we need to
+		 * ensure that it doesn't move and that its contents remain
+		 * unchanged.
+		 */
+		++bhp->ref;
+		b_incr = 1;
 
-	if (LF_ISSET(DB_MPOOL_NEW)) {
-		*pgnoaddr = mfp->last_pgno + 1;
-		goto alloc;
-	}
+		/*
+	 	 * Any buffer we find might be trouble.
+		 *
+		 * BH_LOCKED --
+		 * I/O is in progress.  Because we've incremented the buffer
+		 * reference count, we know the buffer can't move.  Unlock
+		 * the region lock, wait for the I/O to complete, and reacquire
+		 * the region.
+		 */
+		for (first = 1; F_ISSET(bhp, BH_LOCKED); first = 0) {
+			UNLOCKREGION(dbmp);
 
-	/* Check the BH hash bucket queue. */
-	bucket = BUCKET(mp, mf_offset, *pgnoaddr);
-	for (cnt = 0,
-	    bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
-	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
-		++cnt;
-		if (bhp->pgno == *pgnoaddr && bhp->mf_offset == mf_offset) {
-			addr = bhp->buf;
-			++mp->stat.st_hash_searches;
-			if (cnt > mp->stat.st_hash_longest)
-				mp->stat.st_hash_longest = cnt;
-			mp->stat.st_hash_examined += cnt;
-			goto found;
+			/*
+			 * Explicitly yield the processor if it's not the first
+			 * pass through this loop -- if we don't, we might end
+			 * up running to the end of our CPU quantum as we will
+			 * simply be swapping between the two locks.
+			 */
+			if (!first && (__db_yield == NULL || __db_yield() != 0))
+				__db_sleep(0, 1);
+
+			LOCKBUFFER(dbmp, bhp);
+			/* Wait for I/O to finish... */
+			UNLOCKBUFFER(dbmp, bhp);
+			LOCKREGION(dbmp);
 		}
-	}
-	if (cnt != 0) {
-		++mp->stat.st_hash_searches;
-		if (cnt > mp->stat.st_hash_longest)
-			mp->stat.st_hash_longest = cnt;
-		mp->stat.st_hash_examined += cnt;
+
+		/*
+		 * BH_TRASH --
+		 * The contents of the buffer are garbage.  Shouldn't happen,
+		 * and this read is likely to fail, but might as well try.
+		 */
+		if (F_ISSET(bhp, BH_TRASH))
+			goto reread;
+
+		/*
+		 * BH_CALLPGIN --
+		 * The buffer was converted so it could be written, and the
+		 * contents need to be converted again.
+		 */
+		if (F_ISSET(bhp, BH_CALLPGIN)) {
+			if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
+				goto err;
+			F_CLR(bhp, BH_CALLPGIN);
+		}
+
+		++mp->stat.st_cache_hit;
+		++mfp->stat.st_cache_hit;
+		*(void **)addrp = bhp->buf;
+		goto done;
 	}
 
-alloc:	/*
-	 * Allocate a new buffer header and data space, and mark the contents
-	 * as useless.
-	 */
+alloc:	/* Allocate new buffer header and data space. */
 	if ((ret = __memp_ralloc(dbmp, sizeof(BH) -
 	    sizeof(u_int8_t) + mfp->stat.st_pagesize, NULL, &bhp)) != 0)
 		goto err;
-	addr = bhp->buf;
-#ifdef DEBUG
-	if ((ALIGNTYPE)addr & (sizeof(size_t) - 1)) {
+
+#ifdef DIAGNOSTIC
+	if ((ALIGNTYPE)bhp->buf & (sizeof(size_t) - 1)) {
 		__db_err(dbmp->dbenv,
 		    "Internal error: BH data NOT size_t aligned.");
-		abort();
+		ret = EINVAL;
+		goto err;
 	}
 #endif
+	/* Initialize the BH fields. */
 	memset(bhp, 0, sizeof(BH));
 	LOCKINIT(dbmp, &bhp->mutex);
+	bhp->ref = 1;
+	bhp->pgno = *pgnoaddr;
+	bhp->mf_offset = mf_offset;
 
 	/*
 	 * Prepend the bucket header to the head of the appropriate MPOOL
 	 * bucket hash list.  Append the bucket header to the tail of the
 	 * MPOOL LRU chain.
-	 *
-	 * We have to do this before we read in the page so we can discard
-	 * our region lock without screwing up the world.
 	 */
-	bucket = BUCKET(mp, mf_offset, *pgnoaddr);
 	SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, hq, __bh);
 	SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
-	++mp->stat.st_page_clean;
-	b_inserted = 1;
-
-	/* Set the page number, and associated MPOOLFILE. */
-	bhp->mf_offset = mf_offset;
-	bhp->pgno = *pgnoaddr;
 
 	/*
-	 * If we know we created the page, zero it out and continue.
+	 * If we created the page, zero it out and continue.
 	 *
 	 * !!!
-	 * Note: DB_MPOOL_NEW deliberately doesn't call the pgin function.
+	 * Note: DB_MPOOL_NEW specifically doesn't call the pgin function.
 	 * If DB_MPOOL_CREATE is used, then the application's pgin function
 	 * has to be able to handle pages of 0's -- if it uses DB_MPOOL_NEW,
 	 * it can detect all of its page creates, and not bother.
 	 *
 	 * Otherwise, read the page into memory, optionally creating it if
 	 * DB_MPOOL_CREATE is set.
-	 *
-	 * Increment the reference count for created buffers, but importantly,
-	 * increment the reference count for buffers we're about to read so
-	 * that the buffer can't move.
 	 */
-	++bhp->ref;
-	b_incr = 1;
+	if (LF_ISSET(DB_MPOOL_NEW)) {
+		if (mfp->clear_len == 0)
+			memset(bhp->buf, 0, mfp->stat.st_pagesize);
+		else {
+			memset(bhp->buf, 0, mfp->clear_len);
+#ifdef DIAGNOSTIC
+			memset(bhp->buf + mfp->clear_len, 0xff,
+			    mfp->stat.st_pagesize - mfp->clear_len);
+#endif
+		}
 
-	if (LF_ISSET(DB_MPOOL_NEW))
-		memset(addr, 0, mfp->stat.st_pagesize);
-	else {
+		++mp->stat.st_page_create;
+		++mfp->stat.st_page_create;
+	} else {
 		/*
 		 * It's possible for the read function to fail, which means
-		 * that we fail as well.
+		 * that we fail as well.  Note, the __memp_pgread() function
+		 * discards the region lock, so the buffer must be pinned
+		 * down so that it cannot move and its contents are unchanged.
 		 */
 reread:		if ((ret = __memp_pgread(dbmfp,
-		    bhp, LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))) != 0)
-			goto err;
-
-		/*
-		 * !!!
-		 * The __memp_pgread call discarded and reacquired the region
-		 * lock.  Because the buffer reference count was incremented
-		 * before the region lock was discarded the buffer can't move
-		 * and its contents can't change.
-		 */
-		++mp->stat.st_cache_miss;
-		++mfp->stat.st_cache_miss;
-	}
-
-	if (0) {
-found:		/* Increment the reference count. */
-		if (bhp->ref == UINT16_T_MAX) {
-			__db_err(dbmp->dbenv,
-			    "%s: too many references to page %lu",
-			    __memp_fn(dbmfp), bhp->pgno);
-			ret = EINVAL;
-			goto err;
-		}
-		++bhp->ref;
-		b_incr = 1;
-
-		/*
-	 	 * Any found buffer might be trouble.
-		 *
-		 * BH_LOCKED --
-		 * I/O in progress, wait for it to finish.  Because the buffer
-		 * reference count was incremented before the region lock was
-		 * discarded we know the buffer can't move and its contents
-		 * can't change.
-		 */
-		for (cnt = 0; F_ISSET(bhp, BH_LOCKED); ++cnt) {
-			UNLOCKREGION(dbmp);
-
+		    bhp, LF_ISSET(DB_MPOOL_CREATE))) != 0) {
 			/*
-			 * Sleep so that we don't simply spin, switching locks.
-			 * (See the comment in include/mp.h.)
+			 * !!!
+			 * Discard the buffer unless another thread is waiting
+			 * on our I/O to complete.  Regardless, the header has
+			 * the BH_TRASH flag set.
 			 */
-			if (cnt != 0 &&
-			    (__db_yield == NULL || __db_yield() != 0))
-				__db_sleep(0, 1);
-
-			LOCKBUFFER(dbmp, bhp);
-			/* Waiting for I/O to finish... */
-			UNLOCKBUFFER(dbmp, bhp);
-			LOCKREGION(dbmp);
-		}
-
-		/*
-		 * BH_TRASH --
-		 * The buffer is garbage.
-		 */
-		if (F_ISSET(bhp, BH_TRASH))
-			goto reread;
-
-		/*
-		 * BH_CALLPGIN --
-		 * The buffer was written, and the contents need to be
-		 * converted again.
-		 */
-		if (F_ISSET(bhp, BH_CALLPGIN)) {
-			if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
-				goto err;
-			F_CLR(bhp, BH_CALLPGIN);
+			if (bhp->ref == 1)
+				__memp_bhfree(dbmp, mfp, bhp, 1);
+			goto err;
 		}
 
-		++mp->stat.st_cache_hit;
-		++mfp->stat.st_cache_hit;
+		++mp->stat.st_cache_miss;
+		++mfp->stat.st_cache_miss;
 	}
 
 	/*
@@ -319,23 +323,30 @@ found:		/* Increment the reference count. */
 	if (bhp->pgno > mfp->last_pgno)
 		mfp->last_pgno = bhp->pgno;
 
-mapret:	LOCKHANDLE(dbmp, dbmfp->mutexp);
+	++mp->stat.st_page_clean;
+	*(void **)addrp = bhp->buf;
+
+done:	/* Update the chain search statistics. */
+	if (st_hsearch) {
+		++mp->stat.st_hash_searches;
+		if (st_hsearch > mp->stat.st_hash_longest)
+			mp->stat.st_hash_longest = st_hsearch;
+		mp->stat.st_hash_examined += st_hsearch;
+	}
+
+	UNLOCKREGION(dbmp);
+
+	LOCKHANDLE(dbmp, dbmfp->mutexp);
 	++dbmfp->pinref;
 	UNLOCKHANDLE(dbmp, dbmfp->mutexp);
 
-	if (0) {
-err:		/*
-		 * If no other process is already waiting on a created buffer,
-		 * go ahead and discard it, it's not useful.
-		 */
-		if (b_incr)
-			--bhp->ref;
-		if (b_inserted && bhp->ref == 0)
-			__memp_bhfree(dbmp, mfp, bhp, 1);
-	}
+	return (0);
 
+err:	/* Discard our reference. */
+	if (b_incr)
+		--bhp->ref;
 	UNLOCKREGION(dbmp);
 
-	*(void **)addrp = addr;
+	*(void **)addrp = NULL;
 	return (ret);
 }
diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c
index 0f41122373..a4cbac8d4e 100644
--- a/db2/mp/mp_fopen.c
+++ b/db2/mp/mp_fopen.c
@@ -1,24 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fopen.c	10.37 (Sleepycat) 1/18/98";
+static const char sccsid[] = "@(#)mp_fopen.c	10.47 (Sleepycat) 5/4/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
@@ -28,22 +24,21 @@ static const char sccsid[] = "@(#)mp_fopen.c	10.37 (Sleepycat) 1/18/98";
 #include "common_ext.h"
 
 static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
-static int __memp_mf_open __P((DB_MPOOL *, const char *,
-    int, size_t, db_pgno_t, int, DBT *, u_int8_t *, MPOOLFILE **));
+static int __memp_mf_open __P((DB_MPOOL *,
+    const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **));
 
 /*
  * memp_fopen --
  *	Open a backing file for the memory pool.
  */
 int
-memp_fopen(dbmp, path, ftype,
-    flags, mode, pagesize, lsn_offset, pgcookie, fileid, retp)
+memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp)
 	DB_MPOOL *dbmp;
 	const char *path;
-	int ftype, flags, mode, lsn_offset;
+	u_int32_t flags;
+	int mode;
 	size_t pagesize;
-	DBT *pgcookie;
-	u_int8_t *fileid;
+	DB_MPOOL_FINFO *finfop;
 	DB_MPOOLFILE **retp;
 {
 	int ret;
@@ -59,31 +54,31 @@ memp_fopen(dbmp, path, ftype,
 		return (EINVAL);
 	}
 
-	return (__memp_fopen(dbmp, NULL, path, ftype,
-	    flags, mode, pagesize, lsn_offset, pgcookie, fileid, 1, retp));
+	return (__memp_fopen(dbmp,
+	    NULL, path, flags, mode, pagesize, 1, finfop, retp));
 }
 
 /*
  * __memp_fopen --
  *	Open a backing file for the memory pool; internal version.
  *
- * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, int,
- * PUBLIC:    int, int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
+ * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *,
+ * PUBLIC:    u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
  */
 int
-__memp_fopen(dbmp, mfp, path,
-    ftype, flags, mode, pagesize, lsn_offset, pgcookie, fileid, needlock, retp)
+__memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp)
 	DB_MPOOL *dbmp;
 	MPOOLFILE *mfp;
 	const char *path;
-	int ftype, flags, mode, lsn_offset, needlock;
+	u_int32_t flags;
+	int mode, needlock;
 	size_t pagesize;
-	DBT *pgcookie;
-	u_int8_t *fileid;
+	DB_MPOOL_FINFO *finfop;
 	DB_MPOOLFILE **retp;
 {
 	DB_ENV *dbenv;
 	DB_MPOOLFILE *dbmfp;
+	DB_MPOOL_FINFO finfo;
 	db_pgno_t last_pgno;
 	size_t size;
 	u_int32_t mbytes, bytes;
@@ -91,18 +86,34 @@ __memp_fopen(dbmp, mfp, path,
 	u_int8_t idbuf[DB_FILE_ID_LEN];
 	char *rpath;
 
-	/*
-	 * XXX
-	 * If mfp is provided, the following arguments do NOT need to be
-	 * specified:
-	 *      lsn_offset
-	 *      pgcookie
-	 *      fileid
-	 */
 	dbenv = dbmp->dbenv;
 	ret = 0;
 	rpath = NULL;
 
+	/*
+	 * If mfp is provided, we take the DB_MPOOL_FINFO information from
+	 * the mfp.  We don't bother initializing everything, because some
+	 * of them are expensive to acquire.  If no mfp is provided and the
+	 * finfop argument is NULL, we default the values.
+	 */
+	if (finfop == NULL) {
+		memset(&finfo, 0, sizeof(finfo));
+		if (mfp != NULL) {
+			finfo.ftype = mfp->ftype;
+			finfo.pgcookie = NULL;
+			finfo.fileid = NULL;
+			finfo.lsn_offset = mfp->lsn_off;
+			finfo.clear_len = mfp->clear_len;
+		} else {
+			finfo.ftype = 0;
+			finfo.pgcookie = NULL;
+			finfo.fileid = NULL;
+			finfo.lsn_offset = -1;
+			finfo.clear_len = 0;
+		}
+		finfop = &finfo;
+	}
+
 	/* Allocate and initialize the per-process structure. */
 	if ((dbmfp =
 	    (DB_MPOOLFILE *)__db_calloc(1, sizeof(DB_MPOOLFILE))) == NULL) {
@@ -126,11 +137,11 @@ __memp_fopen(dbmp, mfp, path,
 	} else {
 		/* Get the real name for this file and open it. */
 		if ((ret = __db_appname(dbenv,
-		    DB_APP_DATA, NULL, path, NULL, &rpath)) != 0)
+		    DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0)
 			goto err;
 		if ((ret = __db_open(rpath,
-		    LF_ISSET(DB_CREATE | DB_RDONLY), DB_CREATE | DB_RDONLY,
-		    mode, &dbmfp->fd)) != 0) {
+		   LF_ISSET(DB_CREATE | DB_RDONLY),
+		   DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) {
 			__db_err(dbenv, "%s: %s", rpath, strerror(ret));
 			goto err;
 		}
@@ -156,12 +167,11 @@ __memp_fopen(dbmp, mfp, path,
 		 * don't use timestamps, otherwise there'd be no chance of any
 		 * other process joining the party.
 		 */
-		if (mfp == NULL && fileid == NULL) {
+		if (finfop->fileid == NULL) {
 			if ((ret = __db_fileid(dbenv, rpath, 0, idbuf)) != 0)
 				goto err;
-			fileid = idbuf;
+			finfop->fileid = idbuf;
 		}
-		FREES(rpath);
 	}
 
 	/*
@@ -173,8 +183,8 @@ __memp_fopen(dbmp, mfp, path,
 		LOCKREGION(dbmp);
 
 	if (mfp == NULL)
-		ret = __memp_mf_open(dbmp, path, ftype,
-		    pagesize, last_pgno, lsn_offset, pgcookie, fileid, &mfp);
+		ret = __memp_mf_open(dbmp,
+		    path, pagesize, last_pgno, finfop, &mfp);
 	else {
 		++mfp->ref;
 		ret = 0;
@@ -218,7 +228,7 @@ __memp_fopen(dbmp, mfp, path,
 			F_CLR(mfp, MP_CAN_MMAP);
 		if (path == NULL)
 			F_CLR(mfp, MP_CAN_MMAP);
-		if (ftype != 0)
+		if (finfop->ftype != 0)
 			F_CLR(mfp, MP_CAN_MMAP);
 		if (LF_ISSET(DB_NOMMAP))
 			F_CLR(mfp, MP_CAN_MMAP);
@@ -229,11 +239,14 @@ __memp_fopen(dbmp, mfp, path,
 	dbmfp->addr = NULL;
 	if (F_ISSET(mfp, MP_CAN_MMAP)) {
 		dbmfp->len = size;
-		if (__db_map(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) {
+		if (__db_mapfile(rpath,
+		    dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) {
 			dbmfp->addr = NULL;
 			F_CLR(mfp, MP_CAN_MMAP);
 		}
 	}
+	if (rpath != NULL)
+		FREES(rpath);
 
 	LOCKHANDLE(dbmp, dbmp->mutexp);
 	TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q);
@@ -260,15 +273,12 @@ err:	/*
  *	Open an MPOOLFILE.
  */
 static int
-__memp_mf_open(dbmp, path,
-    ftype, pagesize, last_pgno, lsn_offset, pgcookie, fileid, retp)
+__memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp)
 	DB_MPOOL *dbmp;
 	const char *path;
-	int ftype, lsn_offset;
 	size_t pagesize;
 	db_pgno_t last_pgno;
-	DBT *pgcookie;
-	u_int8_t *fileid;
+	DB_MPOOL_FINFO *finfop;
 	MPOOLFILE **retp;
 {
 	MPOOLFILE *mfp;
@@ -286,12 +296,13 @@ __memp_mf_open(dbmp, path,
 		    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
 			if (F_ISSET(mfp, MP_TEMP))
 				continue;
-			if (!memcmp(fileid,
+			if (!memcmp(finfop->fileid,
 			    R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
-				if (ftype != mfp->ftype ||
+				if (finfop->clear_len != mfp->clear_len ||
+				    finfop->ftype != mfp->ftype ||
 				    pagesize != mfp->stat.st_pagesize) {
 					__db_err(dbmp->dbenv,
-					    "%s: ftype or pagesize changed",
+			    "%s: ftype, clear length or pagesize changed",
 					    path);
 					return (EINVAL);
 				}
@@ -311,8 +322,9 @@ __memp_mf_open(dbmp, path,
 	/* Initialize the structure. */
 	memset(mfp, 0, sizeof(MPOOLFILE));
 	mfp->ref = 1;
-	mfp->ftype = ftype;
-	mfp->lsn_off = lsn_offset;
+	mfp->ftype = finfop->ftype;
+	mfp->lsn_off = finfop->lsn_offset;
+	mfp->clear_len = finfop->clear_len;
 
 	/*
 	 * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget,
@@ -320,7 +332,7 @@ __memp_mf_open(dbmp, path,
 	 * it away.
 	 */
 	mfp->stat.st_pagesize = pagesize;
-	mfp->last_pgno = last_pgno;
+	mfp->orig_last_pgno = mfp->last_pgno = last_pgno;
 
 	F_SET(mfp, MP_CAN_MMAP);
 	if (ISTEMPORARY)
@@ -336,19 +348,19 @@ __memp_mf_open(dbmp, path,
 		if ((ret = __memp_ralloc(dbmp,
 		    DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
 			goto err;
-		memcpy(p, fileid, DB_FILE_ID_LEN);
+		memcpy(p, finfop->fileid, DB_FILE_ID_LEN);
 	}
 
 	/* Copy the page cookie into shared memory. */
-	if (pgcookie == NULL || pgcookie->size == 0) {
+	if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) {
 		mfp->pgcookie_len = 0;
 		mfp->pgcookie_off = 0;
 	} else {
 		if ((ret = __memp_ralloc(dbmp,
-		    pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
+		    finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
 			goto err;
-		memcpy(p, pgcookie->data, pgcookie->size);
-		mfp->pgcookie_len = pgcookie->size;
+		memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size);
+		mfp->pgcookie_len = finfop->pgcookie->size;
 	}
 
 	/* Prepend the MPOOLFILE to the list of MPOOLFILE's. */
@@ -397,7 +409,7 @@ memp_fclose(dbmfp)
 
 	/* Discard any mmap information. */
 	if (dbmfp->addr != NULL &&
-	    (ret = __db_unmap(dbmfp->addr, dbmfp->len)) != 0)
+	    (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0)
 		__db_err(dbmp->dbenv,
 		    "%s: %s", __memp_fn(dbmfp), strerror(ret));
 
@@ -480,13 +492,13 @@ __memp_mf_close(dbmp, dbmfp)
 	SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);
 
 	/* Free the space. */
-	__db_shalloc_free(dbmp->addr, mfp);
 	if (mfp->path_off != 0)
 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
 	if (mfp->fileid_off != 0)
 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));
 	if (mfp->pgcookie_off != 0)
 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off));
+	__db_shalloc_free(dbmp->addr, mfp);
 
 ret1:	UNLOCKREGION(dbmp);
 	return (0);
diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c
index 335ee9ff16..5675493137 100644
--- a/db2/mp/mp_fput.c
+++ b/db2/mp/mp_fput.c
@@ -1,20 +1,19 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fput.c	10.17 (Sleepycat) 12/20/97";
+static const char sccsid[] = "@(#)mp_fput.c	10.22 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <stdlib.h>
 #endif
 
 #include "db_int.h"
@@ -31,12 +30,11 @@ int
 memp_fput(dbmfp, pgaddr, flags)
 	DB_MPOOLFILE *dbmfp;
 	void *pgaddr;
-	int flags;
+	u_int32_t flags;
 {
 	BH *bhp;
 	DB_MPOOL *dbmp;
 	MPOOL *mp;
-	MPOOLFILE *mfp;
 	int wrote, ret;
 
 	dbmp = dbmfp->dbmp;
@@ -71,8 +69,9 @@ memp_fput(dbmfp, pgaddr, flags)
 
 	/*
 	 * If we're mapping the file, there's nothing to do.  Because we can
-	 * quit mapping at any time, we have to check on each buffer to see
-	 * if it's in the map region.
+	 * stop mapping the file at any time, we have to check on each buffer
+	 * to see if the address we gave the application was part of the map
+	 * region.
 	 */
 	if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr &&
 	    (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len)
@@ -98,36 +97,33 @@ memp_fput(dbmfp, pgaddr, flags)
 		F_SET(bhp, BH_DISCARD);
 
 	/*
-	 * If more than one reference to the page, we're done.  Ignore discard
-	 * flags (for now) and leave it at its position in the LRU chain.  The
-	 * rest gets done at last reference close.
+	 * Check for a reference count going to zero.  This can happen if the
+	 * application returns a page twice.
 	 */
-#ifdef DEBUG
 	if (bhp->ref == 0) {
-		__db_err(dbmp->dbenv,
-    "Unpinned page returned: reference count on page %lu went negative.",
-		    (u_long)bhp->pgno);
-		abort();
+		__db_err(dbmp->dbenv, "%s: page %lu: unpinned page returned",
+		    __memp_fn(dbmfp), (u_long)bhp->pgno);
+		UNLOCKREGION(dbmp);
+		return (EINVAL);
 	}
-#endif
+
+	/*
+	 * If more than one reference to the page, we're done.  Ignore the
+	 * discard flags (for now) and leave it at its position in the LRU
+	 * chain.  The rest gets done at last reference close.
+	 */
 	if (--bhp->ref > 0) {
 		UNLOCKREGION(dbmp);
 		return (0);
 	}
 
-	/* Move the buffer to the head/tail of the LRU chain. */
-	SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh);
-	if (F_ISSET(bhp, BH_DISCARD))
-		SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh);
-	else
-		SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
-
 	/*
-	 * If this buffer is scheduled for writing because of a checkpoint,
-	 * write it now.  If we can't write it, set a flag so that the next
-	 * time the memp_sync function is called we try writing it there,
-	 * as the checkpoint application better be able to write all of the
-	 * files.
+	 * If this buffer is scheduled for writing because of a checkpoint, we
+	 * need to write it (if we marked it dirty), or update the checkpoint
+	 * counters (if we didn't mark it dirty).  If we try to write it and
+	 * can't, that's not necessarily an error, but set a flag so that the
+	 * next time the memp_sync function runs we try writing it there, as
+	 * the checkpoint application better be able to write all of the files.
 	 */
 	if (F_ISSET(bhp, BH_WRITE))
 		if (F_ISSET(bhp, BH_DIRTY)) {
@@ -137,12 +133,18 @@ memp_fput(dbmfp, pgaddr, flags)
 		} else {
 			F_CLR(bhp, BH_WRITE);
 
-			mfp = R_ADDR(dbmp, bhp->mf_offset);
-			--mfp->lsn_cnt;
-
+			--dbmfp->mfp->lsn_cnt;
 			--mp->lsn_cnt;
 		}
 
+	/* Move the buffer to the head/tail of the LRU chain. */
+	SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh);
+	if (F_ISSET(bhp, BH_DISCARD))
+		SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh);
+	else
+		SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
+
+
 	UNLOCKREGION(dbmp);
 	return (0);
 }
diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c
index 2eff7dd74c..3b352aa553 100644
--- a/db2/mp/mp_fset.c
+++ b/db2/mp/mp_fset.c
@@ -1,13 +1,13 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fset.c	10.12 (Sleepycat) 11/26/97";
+static const char sccsid[] = "@(#)mp_fset.c	10.15 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -30,16 +30,14 @@ int
 memp_fset(dbmfp, pgaddr, flags)
 	DB_MPOOLFILE *dbmfp;
 	void *pgaddr;
-	int flags;
+	u_int32_t flags;
 {
 	BH *bhp;
 	DB_MPOOL *dbmp;
 	MPOOL *mp;
-	MPOOLFILE *mfp;
 	int ret;
 
 	dbmp = dbmfp->dbmp;
-	mfp = dbmfp->mfp;
 	mp = dbmp->mp;
 
 	/* Validate arguments. */
diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c
index ca81f8d6d6..fc985bc521 100644
--- a/db2/mp/mp_open.c
+++ b/db2/mp/mp_open.c
@@ -1,23 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_open.c	10.16 (Sleepycat) 11/28/97";
+static const char sccsid[] = "@(#)mp_open.c	10.23 (Sleepycat) 5/3/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
-#include <fcntl.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
@@ -33,13 +30,14 @@ static const char sccsid[] = "@(#)mp_open.c	10.16 (Sleepycat) 11/28/97";
 int
 memp_open(path, flags, mode, dbenv, retp)
 	const char *path;
-	int flags, mode;
+	u_int32_t flags;
+	int mode;
 	DB_ENV *dbenv;
 	DB_MPOOL **retp;
 {
 	DB_MPOOL *dbmp;
 	size_t cachesize;
-	int ret;
+	int is_private, ret;
 
 	/* Validate arguments. */
 #ifdef HAVE_SPINLOCKS
@@ -62,15 +60,16 @@ memp_open(path, flags, mode, dbenv, retp)
 	dbmp->dbenv = dbenv;
 
 	/* Decide if it's possible for anyone else to access the pool. */
-	if ((dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE))
-		F_SET(dbmp, MP_ISPRIVATE);
+	is_private =
+	    (dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE);
 
 	/*
 	 * Map in the region.  We do locking regardless, as portions of it are
 	 * implemented in common code (if we put the region in a file, that is).
 	 */
 	F_SET(dbmp, MP_LOCKREGION);
-	if ((ret = __memp_ropen(dbmp, path, cachesize, mode, flags)) != 0)
+	if ((ret = __memp_ropen(dbmp,
+	    path, cachesize, mode, is_private, LF_ISSET(DB_CREATE))) != 0)
 		goto err;
 	F_CLR(dbmp, MP_LOCKREGION);
 
@@ -79,7 +78,7 @@ memp_open(path, flags, mode, dbenv, retp)
 	 * If it's threaded, then we have to lock both the handles and the
 	 * region, and we need to allocate a mutex for that purpose.
 	 */
-	if (!F_ISSET(dbmp, MP_ISPRIVATE))
+	if (!is_private)
 		F_SET(dbmp, MP_LOCKREGION);
 	if (LF_ISSET(DB_THREAD)) {
 		F_SET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION);
@@ -135,10 +134,11 @@ memp_close(dbmp)
 	}
 
 	/* Close the region. */
-	if ((t_ret = __memp_rclose(dbmp)) && ret == 0)
+	if ((t_ret = __db_rdetach(&dbmp->reginfo)) != 0 && ret == 0)
 		ret = t_ret;
 
-	/* Discard the structure. */
+	if (dbmp->reginfo.path != NULL)
+		FREES(dbmp->reginfo.path);
 	FREE(dbmp, sizeof(DB_MPOOL));
 
 	return (ret);
@@ -154,8 +154,19 @@ memp_unlink(path, force, dbenv)
 	int force;
 	DB_ENV *dbenv;
 {
-	return (__db_runlink(dbenv,
-	    DB_APP_NONE, path, DB_DEFAULT_MPOOL_FILE, force));
+	REGINFO reginfo;
+	int ret;
+
+	memset(&reginfo, 0, sizeof(reginfo));
+	reginfo.dbenv = dbenv;
+	reginfo.appname = DB_APP_NONE;
+	if (path != NULL && (reginfo.path = __db_strdup(path)) == NULL)
+		return (ENOMEM);
+	reginfo.file = DB_DEFAULT_MPOOL_FILE;
+	ret = __db_runlink(&reginfo, force);
+	if (reginfo.path != NULL)
+		FREES(reginfo.path);
+	return (ret);
 }
 
 /*
diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c
index 13a6c62d35..e83e0f44fa 100644
--- a/db2/mp/mp_pr.c
+++ b/db2/mp/mp_pr.c
@@ -1,13 +1,13 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_pr.c	10.21 (Sleepycat) 1/6/98";
+static const char sccsid[] = "@(#)mp_pr.c	10.26 (Sleepycat) 5/23/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -15,20 +15,20 @@ static const char sccsid[] = "@(#)mp_pr.c	10.21 (Sleepycat) 1/6/98";
 
 #include <errno.h>
 #include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #endif
 
 #include "db_int.h"
+#include "db_page.h"
 #include "shqueue.h"
 #include "db_shash.h"
 #include "mp.h"
+#include "db_auto.h"
+#include "db_ext.h"
+#include "common_ext.h"
 
-static void __memp_pbh __P((FILE *, DB_MPOOL *, BH *, int));
-static void __memp_pdbmf __P((FILE *, DB_MPOOLFILE *, int));
-static void __memp_pmf __P((FILE *, MPOOLFILE *, int));
-static void __memp_pmp __P((FILE *, DB_MPOOL *, MPOOL *, int));
+static void __memp_pbh __P((DB_MPOOL *, BH *, size_t *, FILE *));
 
 /*
  * memp_stat --
@@ -64,6 +64,8 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
 		    dbmp->mp->rlayout.lock.mutex_set_wait;
 		(*gspp)->st_region_nowait =
 		    dbmp->mp->rlayout.lock.mutex_set_nowait;
+		(*gspp)->st_refcnt = dbmp->mp->rlayout.refcnt;
+		(*gspp)->st_regsize = dbmp->mp->rlayout.size;
 
 		UNLOCKREGION(dbmp);
 	}
@@ -77,7 +79,8 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
 		for (len = 0,
 		    mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
 		    mfp != NULL;
-		    ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile));
+		    ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
+			;
 
 		UNLOCKREGION(dbmp);
 
@@ -148,174 +151,118 @@ __memp_fns(dbmp, mfp)
 	return ((char *)R_ADDR(dbmp, mfp->path_off));
 }
 
+#define	FMAP_ENTRIES	200			/* Files we map. */
+
+#define	MPOOL_DUMP_HASH	0x01			/* Debug hash chains. */
+#define	MPOOL_DUMP_LRU	0x02			/* Debug LRU chains. */
+#define	MPOOL_DUMP_MEM	0x04			/* Debug region memory. */
+#define	MPOOL_DUMP_ALL	0x07			/* Debug all. */
+
+
 /*
- * __memp_debug --
+ * __memp_dump_region --
  *	Display MPOOL structures.
  *
- * PUBLIC: void __memp_debug __P((DB_MPOOL *, FILE *, int));
+ * PUBLIC: void __memp_dump_region __P((DB_MPOOL *, char *, FILE *));
  */
 void
-__memp_debug(dbmp, fp, data)
+__memp_dump_region(dbmp, area, fp)
 	DB_MPOOL *dbmp;
+	char *area;
 	FILE *fp;
-	int data;
 {
+	BH *bhp;
+	DB_HASHTAB *htabp;
 	DB_MPOOLFILE *dbmfp;
-	u_long cnt;
+	MPOOL *mp;
+	MPOOLFILE *mfp;
+	size_t bucket, fmap[FMAP_ENTRIES + 1];
+	u_int32_t flags;
+	int cnt;
 
 	/* Make it easy to call from the debugger. */
 	if (fp == NULL)
 		fp = stderr;
 
-	/* Welcome message. */
-	(void)fprintf(fp, "%s\nMpool per-process (%lu) statistics\n",
-	    DB_LINE, (u_long)getpid());
-
-	if (data)
-		(void)fprintf(fp, "    fd: %d; addr %lx; maddr %lx\n",
-		    dbmp->fd, (u_long)dbmp->addr, (u_long)dbmp->maddr);
-
-	/* Display the DB_MPOOLFILE structures. */
-	for (cnt = 0, dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
-	    dbmfp != NULL; ++cnt, dbmfp = TAILQ_NEXT(dbmfp, q));
-	(void)fprintf(fp, "%lu process-local files\n", cnt);
-	for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
-	    dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) {
-		(void)fprintf(fp, "%s\n", __memp_fn(dbmfp));
-		__memp_pdbmf(fp, dbmfp, data);
-	}
+	for (flags = 0; *area != '\0'; ++area)
+		switch (*area) {
+		case 'A':
+			LF_SET(MPOOL_DUMP_ALL);
+			break;
+		case 'h':
+			LF_SET(MPOOL_DUMP_HASH);
+			break;
+		case 'l':
+			LF_SET(MPOOL_DUMP_LRU);
+			break;
+		case 'm':
+			LF_SET(MPOOL_DUMP_MEM);
+			break;
+		}
 
-	/* Switch to global statistics. */
-	(void)fprintf(fp, "\n%s\nMpool statistics\n", DB_LINE);
+	LOCKREGION(dbmp);
 
-	/* Display the MPOOL structure. */
-	__memp_pmp(fp, dbmp, dbmp->mp, data);
+	mp = dbmp->mp;
 
-	/* Flush in case we're debugging. */
-	(void)fflush(fp);
-}
-
-/*
- * __memp_pdbmf --
- *	Display a DB_MPOOLFILE structure.
- */
-static void
-__memp_pdbmf(fp, dbmfp, data)
-	FILE *fp;
-	DB_MPOOLFILE *dbmfp;
-	int data;
-{
-	if (!data)
-		return;
-
-	(void)fprintf(fp, "    fd: %d; %s\n",
-	    dbmfp->fd, F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write");
-}
-
-/*
- * __memp_pmp --
- *	Display the MPOOL structure.
- */
-static void
-__memp_pmp(fp, dbmp, mp, data)
-	FILE *fp;
-	DB_MPOOL *dbmp;
-	MPOOL *mp;
-	int data;
-{
-	BH *bhp;
-	MPOOLFILE *mfp;
-	DB_HASHTAB *htabp;
-	size_t bucket;
-	int cnt;
-	const char *sep;
-
-	(void)fprintf(fp, "references: %lu; cachesize: %lu\n",
-	    (u_long)mp->rlayout.refcnt, (u_long)mp->stat.st_cachesize);
-	(void)fprintf(fp,
-	    "    %lu pages created\n", (u_long)mp->stat.st_page_create);
-	(void)fprintf(fp,
-	    "    %lu mmap pages returned\n", (u_long)mp->stat.st_map);
-	(void)fprintf(fp, "    %lu I/O's (%lu read, %lu written)\n",
-	    (u_long)mp->stat.st_page_in + mp->stat.st_page_out,
-	    (u_long)mp->stat.st_page_in, (u_long)mp->stat.st_page_out);
-	if (mp->stat.st_cache_hit + mp->stat.st_cache_miss != 0)
-		(void)fprintf(fp,
-		    "    %.0f%% cache hit rate (%lu hit, %lu miss)\n",
-		    ((double)mp->stat.st_cache_hit /
-	    (mp->stat.st_cache_hit + mp->stat.st_cache_miss)) * 100,
-		    (u_long)mp->stat.st_cache_hit,
-		    (u_long)mp->stat.st_cache_miss);
+	/* Display MPOOL structures. */
+	(void)fprintf(fp, "%s\nPool (region addr 0x%lx, alloc addr 0x%lx)\n",
+	    DB_LINE, (u_long)dbmp->reginfo.addr, (u_long)dbmp->addr);
 
 	/* Display the MPOOLFILE structures. */
-	for (cnt = 0, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
-	    mfp != NULL; ++cnt, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile));
-	(void)fprintf(fp, "%d total files\n", cnt);
-	for (cnt = 1, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
-	    mfp != NULL; ++cnt, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
-		(void)fprintf(fp, "file %d\n", cnt);
-		__memp_pmf(fp, mfp, data);
+	cnt = 0;
+	for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
+	    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile), ++cnt) {
+		(void)fprintf(fp, "file #%d: %s: %lu references: %s\n",
+		    cnt + 1, __memp_fns(dbmp, mfp), (u_long)mfp->ref,
+		    F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write");
+		    if (cnt < FMAP_ENTRIES)
+			fmap[cnt] = R_OFFSET(dbmp, mfp);
 	}
 
-	if (!data)
-		return;
+	for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
+	    dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q), ++cnt) {
+		(void)fprintf(fp, "file #%d: %s: fd: %d: per-process, %s\n",
+		    cnt + 1, __memp_fn(dbmfp), dbmfp->fd,
+		    F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write");
+		    if (cnt < FMAP_ENTRIES)
+			fmap[cnt] = R_OFFSET(dbmp, mfp);
+	}
+	if (cnt < FMAP_ENTRIES)
+		fmap[cnt] = INVALID;
+	else
+		fmap[FMAP_ENTRIES] = INVALID;
 
 	/* Display the hash table list of BH's. */
-	(void)fprintf(fp, "%s\nHASH table of BH's (%lu buckets):\n",
-	    DB_LINE, (u_long)mp->htab_buckets);
-	(void)fprintf(fp,
-	    "longest chain searched %lu\n", (u_long)mp->stat.st_hash_longest);
-	(void)fprintf(fp, "average chain searched %lu (total/calls: %lu/%lu)\n",
-	    (u_long)mp->stat.st_hash_examined /
-	    (mp->stat.st_hash_searches ? mp->stat.st_hash_searches : 1),
-	    (u_long)mp->stat.st_hash_examined,
-	    (u_long)mp->stat.st_hash_searches);
-	for (htabp = dbmp->htab,
-	    bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) {
-		if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL)
-			(void)fprintf(fp, "%lu:\n", (u_long)bucket);
-		for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
-		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
-			__memp_pbh(fp, dbmp, bhp, data);
+	if (LF_ISSET(MPOOL_DUMP_HASH)) {
+		(void)fprintf(fp,
+	    "%s\nBH hash table (%lu hash slots)\npageno, file, ref, address\n",
+		    DB_LINE, (u_long)mp->htab_buckets);
+		for (htabp = dbmp->htab,
+		    bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) {
+			if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL)
+				(void)fprintf(fp, "%lu:\n", (u_long)bucket);
+			for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
+			    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
+				__memp_pbh(dbmp, bhp, fmap, fp);
+		}
 	}
 
 	/* Display the LRU list of BH's. */
-	(void)fprintf(fp, "LRU list of BH's (pgno/offset):");
-	for (sep = "\n    ", bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh);
-	    bhp != NULL; sep = ", ", bhp = SH_TAILQ_NEXT(bhp, q, __bh))
-		(void)fprintf(fp, "%s%lu/%lu", sep,
-		    (u_long)bhp->pgno, (u_long)R_OFFSET(dbmp, bhp));
-	(void)fprintf(fp, "\n");
-}
+	if (LF_ISSET(MPOOL_DUMP_LRU)) {
+		(void)fprintf(fp, "%s\nBH LRU list\n", DB_LINE);
+		(void)fprintf(fp, "pageno, file, ref, address\n");
+		for (bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh);
+		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
+			__memp_pbh(dbmp, bhp, fmap, fp);
+	}
 
-/*
- * __memp_pmf --
- *	Display an MPOOLFILE structure.
- */
-static void
-__memp_pmf(fp, mfp, data)
-	FILE *fp;
-	MPOOLFILE *mfp;
-	int data;
-{
-	(void)fprintf(fp, "    %lu pages created\n",
-	    (u_long)mfp->stat.st_page_create);
-	(void)fprintf(fp, "    %lu I/O's (%lu read, %lu written)\n",
-	    (u_long)mfp->stat.st_page_in + mfp->stat.st_page_out,
-	    (u_long)mfp->stat.st_page_in, (u_long)mfp->stat.st_page_out);
-	if (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss != 0)
-		(void)fprintf(fp,
-		    "    %.0f%% cache hit rate (%lu hit, %lu miss)\n",
-		    ((double)mfp->stat.st_cache_hit /
-		    (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss)) * 100,
-		    (u_long)mfp->stat.st_cache_hit,
-		    (u_long)mfp->stat.st_cache_miss);
-	if (!data)
-		return;
-
-	(void)fprintf(fp, "    %d references; %s; pagesize: %lu\n", mfp->ref,
-	    F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write",
-	    (u_long)mfp->stat.st_pagesize);
+	if (LF_ISSET(MPOOL_DUMP_MEM))
+		__db_shalloc_dump(dbmp->addr, fp);
+
+	UNLOCKREGION(dbmp);
+
+	/* Flush in case we're debugging. */
+	(void)fflush(fp);
 }
 
 /*
@@ -323,28 +270,37 @@ __memp_pmf(fp, mfp, data)
  *	Display a BH structure.
  */
 static void
-__memp_pbh(fp, dbmp, bhp, data)
-	FILE *fp;
+__memp_pbh(dbmp, bhp, fmap, fp)
 	DB_MPOOL *dbmp;
 	BH *bhp;
-	int data;
+	size_t *fmap;
+	FILE *fp;
 {
-	const char *sep;
-
-	if (!data)
-		return;
-
-	(void)fprintf(fp, "    BH @ %lu (mf: %lu): page %lu; ref %lu",
-	    (u_long)R_OFFSET(dbmp, bhp),
-	    (u_long)bhp->mf_offset, (u_long)bhp->pgno, (u_long)bhp->ref);
-	sep = "; ";
-	if (F_ISSET(bhp, BH_DIRTY)) {
-		(void)fprintf(fp, "%sdirty", sep);
-		sep = ", ";
-	}
-	if (F_ISSET(bhp, BH_WRITE)) {
-		(void)fprintf(fp, "%schk_write", sep);
-		sep = ", ";
-	}
+	static const FN fn[] = {
+		{ BH_CALLPGIN,	"callpgin" },
+		{ BH_DIRTY,	"dirty" },
+		{ BH_DISCARD,	"discard" },
+		{ BH_LOCKED,	"locked" },
+		{ BH_TRASH,	"trash" },
+		{ BH_WRITE,	"write" },
+		{ 0 },
+	};
+	int i;
+
+	for (i = 0; i < FMAP_ENTRIES; ++i)
+		if (fmap[i] == INVALID || fmap[i] == bhp->mf_offset)
+			break;
+
+	if (fmap[i] == INVALID)
+		(void)fprintf(fp, "  %4lu, %lu, %2lu, %lu",
+		    (u_long)bhp->pgno, (u_long)bhp->mf_offset,
+		    (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp));
+	else
+		(void)fprintf(fp, "  %4lu,   #%d,  %2lu, %lu",
+		    (u_long)bhp->pgno, i + 1,
+		    (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp));
+
+	__db_prflags(bhp->flags, fn, fp);
+
 	(void)fprintf(fp, "\n");
 }
diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c
index c20e669749..6b92fbdad4 100644
--- a/db2/mp/mp_region.c
+++ b/db2/mp/mp_region.c
@@ -1,24 +1,20 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_region.c	10.18 (Sleepycat) 11/29/97";
+static const char sccsid[] = "@(#)mp_region.c	10.30 (Sleepycat) 5/31/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
-#include <sys/stat.h>
 
 #include <errno.h>
-#include <fcntl.h>
-#include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #endif
 
 #include "db_int.h"
@@ -86,7 +82,7 @@ alloc:	if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
 
 		/*
 		 * Retry as soon as we've freed up sufficient space.  If we
-		 * have to coalesce of memory to satisfy the request, don't
+		 * will have to coalesce memory to satisfy the request, don't
 		 * try until it's likely (possible?) that we'll succeed.
 		 */
 		total += fsize = __db_shsizeof(bhp);
@@ -179,18 +175,19 @@ retry:	/* Find a buffer we can flush; pure LRU. */
  *	Attach to, and optionally create, the mpool region.
  *
  * PUBLIC: int __memp_ropen
- * PUBLIC:    __P((DB_MPOOL *, const char *, size_t, int, int));
+ * PUBLIC:    __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
  */
 int
-__memp_ropen(dbmp, path, cachesize, mode, flags)
+__memp_ropen(dbmp, path, cachesize, mode, is_private, flags)
 	DB_MPOOL *dbmp;
 	const char *path;
 	size_t cachesize;
-	int mode, flags;
+	int mode, is_private;
+	u_int32_t flags;
 {
 	MPOOL *mp;
 	size_t rlen;
-	int fd, newregion, ret, retry_cnt;
+	int defcache, ret;
 
 	/*
 	 * Unlike other DB subsystems, mpool can't simply grow the region
@@ -204,155 +201,107 @@ __memp_ropen(dbmp, path, cachesize, mode, flags)
 	 *
 	 * Up the user's cachesize by 25% to account for our overhead.
 	 */
+	defcache = 0;
 	if (cachesize < DB_CACHESIZE_MIN)
-		if (cachesize == 0)
+		if (cachesize == 0) {
+			defcache = 1;
 			cachesize = DB_CACHESIZE_DEF;
-		else
+		} else
 			cachesize = DB_CACHESIZE_MIN;
 	rlen = cachesize + cachesize / 4;
 
-	/* Map in the region. */
-	retry_cnt = newregion = 0;
-retry:	if (LF_ISSET(DB_CREATE)) {
-		/*
-		 * If it's a private mpool, use malloc, it's a lot faster than
-		 * instantiating a region.
-		 *
-		 * XXX
-		 * If we're doing locking and don't have spinlocks for this
-		 * architecture, we'd have to instantiate the file, we need
-		 * the file descriptor for locking.  However, it should not
-		 * be possible for DB_THREAD to be set if HAVE_SPINLOCKS aren't
-		 * defined.
-		 *
-		 * XXX
-		 * HP-UX won't permit mutexes to live in anything but shared
-		 * memory.  So, instantiate the shared mpool region file on
-		 * that architecture, regardless.  If this turns out to be a
-		 * performance problem, we could use anonymous memory instead.
-		 */
-#if !defined(__hppa)
-		if (F_ISSET(dbmp, MP_ISPRIVATE))
-			if ((dbmp->maddr = __db_malloc(rlen)) == NULL)
-				ret = ENOMEM;
-			else {
-				F_SET(dbmp, MP_MALLOC);
-				ret = __db_rinit(dbmp->dbenv,
-				    dbmp->maddr, 0, rlen, 0);
-			}
-		else
-#endif
-			ret = __db_rcreate(dbmp->dbenv, DB_APP_NONE, path,
-			    DB_DEFAULT_MPOOL_FILE, mode, rlen,
-			    F_ISSET(dbmp, MP_ISPRIVATE) ? DB_TEMPORARY : 0,
-			    &fd, &dbmp->maddr);
-		if (ret == 0) {
-			/* Put the MPOOL structure first in the region. */
-			mp = dbmp->maddr;
-
-			SH_TAILQ_INIT(&mp->bhq);
-			SH_TAILQ_INIT(&mp->bhfq);
-			SH_TAILQ_INIT(&mp->mpfq);
-
-			/* Initialize the rest of the region as free space. */
-			dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL);
-			__db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
-
-			/*
-			 *
-			 * Pretend that the cache will be broken up into 4K
-			 * pages, and that we want to keep it under, say, 10
-			 * pages on each chain.  This means a 256MB cache will
-			 * allocate ~6500 offset pairs.
-			 */
-			mp->htab_buckets =
-			    __db_tablesize((cachesize / (4 * 1024)) / 10);
+	/*
+	 * Map in the region.
+	 *
+	 * If it's a private mpool, use malloc, it's a lot faster than
+	 * instantiating a region.
+	 */
+	dbmp->reginfo.dbenv = dbmp->dbenv;
+	dbmp->reginfo.appname = DB_APP_NONE;
+	if (path == NULL)
+		dbmp->reginfo.path = NULL;
+	else
+		if ((dbmp->reginfo.path = __db_strdup(path)) == NULL)
+			return (ENOMEM);
+	dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE;
+	dbmp->reginfo.mode = mode;
+	dbmp->reginfo.size = rlen;
+	dbmp->reginfo.dbflags = flags;
+	dbmp->reginfo.flags = 0;
+	if (defcache)
+		F_SET(&dbmp->reginfo, REGION_SIZEDEF);
 
-			/* Allocate hash table space and initialize it. */
-			if ((ret = __db_shalloc(dbmp->addr,
-			    mp->htab_buckets * sizeof(DB_HASHTAB),
-			    0, &dbmp->htab)) != 0)
-				goto err;
-			__db_hashinit(dbmp->htab, mp->htab_buckets);
-			mp->htab = R_OFFSET(dbmp, dbmp->htab);
+	/*
+	 * If we're creating a temporary region, don't use any standard
+	 * naming.
+	 */
+	if (is_private) {
+		dbmp->reginfo.appname = DB_APP_TMP;
+		dbmp->reginfo.file = NULL;
+		F_SET(&dbmp->reginfo, REGION_PRIVATE);
+	}
 
-			ZERO_LSN(mp->lsn);
-			mp->lsn_cnt = 0;
+	if ((ret = __db_rattach(&dbmp->reginfo)) != 0) {
+		if (dbmp->reginfo.path != NULL)
+			FREES(dbmp->reginfo.path);
+		return (ret);
+	}
 
-			memset(&mp->stat, 0, sizeof(mp->stat));
-			mp->stat.st_cachesize = cachesize;
+	/*
+	 * The MPOOL structure is first in the region, the rest of the region
+	 * is free space.
+	 */
+	dbmp->mp = dbmp->reginfo.addr;
+	dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL);
 
-			mp->flags = 0;
+	/* Initialize a created region. */
+	if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) {
+		mp = dbmp->mp;
+		SH_TAILQ_INIT(&mp->bhq);
+		SH_TAILQ_INIT(&mp->bhfq);
+		SH_TAILQ_INIT(&mp->mpfq);
 
-			newregion = 1;
-		} else if (ret != EEXIST)
-			return (ret);
-	}
+		__db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
 
-	/* If we didn't or couldn't create the region, try and join it. */
-	if (!newregion &&
-	    (ret = __db_ropen(dbmp->dbenv, DB_APP_NONE,
-	    path, DB_DEFAULT_MPOOL_FILE, 0, &fd, &dbmp->maddr)) != 0) {
 		/*
-		 * If we failed because the file wasn't available, wait a
-		 * second and try again.
+		 * Assume we want to keep the hash chains with under 10 pages
+		 * on each chain.  We don't know the pagesize in advance, and
+		 * it may differ for different files.  Use a pagesize of 1K for
+		 * the calculation -- we walk these chains a lot, they should
+		 * be short.
 		 */
-		if (ret == EAGAIN && ++retry_cnt < 3) {
-			(void)__db_sleep(1, 0);
-			goto retry;
-		}
-		return (ret);
-	}
+		mp->htab_buckets =
+		    __db_tablesize((cachesize / (1 * 1024)) / 10);
 
-	/* Set up the common pointers. */
-	dbmp->mp = dbmp->maddr;
-	dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL);
+		/* Allocate hash table space and initialize it. */
+		if ((ret = __db_shalloc(dbmp->addr,
+		    mp->htab_buckets * sizeof(DB_HASHTAB),
+		    0, &dbmp->htab)) != 0)
+			goto err;
+		__db_hashinit(dbmp->htab, mp->htab_buckets);
+		mp->htab = R_OFFSET(dbmp, dbmp->htab);
 
-	/*
-	 * If not already locked, lock the region -- if it's a new region,
-	 * then either __db_rcreate() locked it for us or we malloc'd it
-	 * instead of creating a region, neither of which requires locking
-	 * here.
-	 */
-	if (!newregion)
-		LOCKREGION(dbmp);
+		ZERO_LSN(mp->lsn);
+		mp->lsn_cnt = 0;
 
-	/*
-	 * Get the hash table address; it's on the shared page, so we have
-	 * to lock first.
-	 */
-	dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
+		memset(&mp->stat, 0, sizeof(mp->stat));
+		mp->stat.st_cachesize = cachesize;
 
-	dbmp->fd = fd;
+		mp->flags = 0;
+	}
 
-	/* If we locked the region, release it now. */
-	if (!F_ISSET(dbmp, MP_MALLOC))
-		UNLOCKREGION(dbmp);
-	return (0);
+	/* Get the local hash table address. */
+	dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
 
-err:	if (fd != -1) {
-		dbmp->fd = fd;
-		(void)__memp_rclose(dbmp);
-	}
+	UNLOCKREGION(dbmp);
+	return (0);
 
-	if (newregion)
+err:	UNLOCKREGION(dbmp);
+	(void)__db_rdetach(&dbmp->reginfo);
+	if (F_ISSET(&dbmp->reginfo, REGION_CREATED))
 		(void)memp_unlink(path, 1, dbmp->dbenv);
-	return (ret);
-}
 
-/*
- * __memp_rclose --
- *	Close the mpool region.
- *
- * PUBLIC: int __memp_rclose __P((DB_MPOOL *));
- */
-int
-__memp_rclose(dbmp)
-	DB_MPOOL *dbmp;
-{
-	if (F_ISSET(dbmp, MP_MALLOC)) {
-		__db_free(dbmp->maddr);
-		return (0);
-	}
-	return (__db_rclose(dbmp->dbenv, dbmp->fd, dbmp->maddr));
+	if (dbmp->reginfo.path != NULL)
+		FREES(dbmp->reginfo.path);
+	return (ret);
 }
diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c
index 6d16cf3cd4..33218eef1a 100644
--- a/db2/mp/mp_sync.c
+++ b/db2/mp/mp_sync.c
@@ -1,13 +1,13 @@
 /*-
  * See the file LICENSE for redistribution information.
  *
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  */
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_sync.c	10.19 (Sleepycat) 12/3/97";
+static const char sccsid[] = "@(#)mp_sync.c	10.25 (Sleepycat) 4/26/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -15,7 +15,6 @@ static const char sccsid[] = "@(#)mp_sync.c	10.19 (Sleepycat) 12/3/97";
 
 #include <errno.h>
 #include <stdlib.h>
-#include <string.h>
 #endif
 
 #include "db_int.h"
@@ -25,6 +24,7 @@ static const char sccsid[] = "@(#)mp_sync.c	10.19 (Sleepycat) 12/3/97";
 #include "common_ext.h"
 
 static int __bhcmp __P((const void *, const void *));
+static int __memp_fsync __P((DB_MPOOLFILE *));
 
 /*
  * memp_sync --
@@ -145,7 +145,8 @@ memp_sync(dbmp, lsnp)
 				bharray[ar_cnt++] = bhp;
 			}
 		} else
-			F_CLR(bhp, BH_WRITE);
+			if (F_ISSET(bhp, BH_WRITE))
+				F_CLR(bhp, BH_WRITE);
 
 	/* If there no buffers we can write immediately, we're done. */
 	if (ar_cnt == 0) {
@@ -235,10 +236,8 @@ int
 memp_fsync(dbmfp)
 	DB_MPOOLFILE *dbmfp;
 {
-	BH *bhp, **bharray;
 	DB_MPOOL *dbmp;
-	size_t mf_offset;
-	int ar_cnt, cnt, nalloc, next, pincnt, ret, wrote;
+	int is_tmp;
 
 	dbmp = dbmfp->dbmp;
 
@@ -250,14 +249,62 @@ memp_fsync(dbmfp)
 	if (F_ISSET(dbmfp, MP_READONLY))
 		return (0);
 
-	ret = 0;
 	LOCKREGION(dbmp);
-	if (F_ISSET(dbmfp->mfp, MP_TEMP))
-		ret = 1;
+	is_tmp = F_ISSET(dbmfp->mfp, MP_TEMP);
 	UNLOCKREGION(dbmp);
-	if (ret)
+	if (is_tmp)
 		return (0);
 
+	return (__memp_fsync(dbmfp));
+}
+
+/*
+ * __mp_xxx_fd --
+ *	Return a file descriptor for DB 1.85 compatibility locking.
+ *
+ * PUBLIC: int __mp_xxx_fd __P((DB_MPOOLFILE *, int *));
+ */
+int
+__mp_xxx_fd(dbmfp, fdp)
+	DB_MPOOLFILE *dbmfp;
+	int *fdp;
+{
+	int ret;
+
+	/*
+	 * This is a truly spectacular layering violation, intended ONLY to
+	 * support compatibility for the DB 1.85 DB->fd call.
+	 *
+	 * Sync the database file to disk, creating the file as necessary.
+	 *
+	 * We skip the MP_READONLY and MP_TEMP tests done by memp_fsync(3).
+	 * The MP_READONLY test isn't interesting because we will either
+	 * already have a file descriptor (we opened the database file for
+	 * reading) or we aren't readonly (we created the database which
+	 * requires write privileges).  The MP_TEMP test isn't interesting
+	 * because we want to write to the backing file regardless so that
+	 * we get a file descriptor to return.
+	 */
+	ret = dbmfp->fd == -1 ? __memp_fsync(dbmfp) : 0;
+
+	return ((*fdp = dbmfp->fd) == -1 ? ENOENT : ret);
+}
+
+/*
+ * __memp_fsync --
+ *	Mpool file internal sync function.
+ */
+static int
+__memp_fsync(dbmfp)
+	DB_MPOOLFILE *dbmfp;
+{
+	BH *bhp, **bharray;
+	DB_MPOOL *dbmp;
+	size_t mf_offset;
+	int ar_cnt, cnt, nalloc, next, pincnt, ret, wrote;
+
+	ret = 0;
+	dbmp = dbmfp->dbmp;
 	mf_offset = R_OFFSET(dbmp, dbmfp->mfp);
 
 	/*
@@ -359,7 +406,6 @@ err:	UNLOCKREGION(dbmp);
 	if (ret == 0)
 		return (pincnt == 0 ? __db_fsync(dbmfp->fd) : DB_INCOMPLETE);
 	return (ret);
-
 }
 
 /*
@@ -453,8 +499,8 @@ __bhcmp(p1, p2)
 {
 	BH *bhp1, *bhp2;
 
-	bhp1 = *(BH **)p1;
-	bhp2 = *(BH **)p2;
+	bhp1 = *(BH * const *)p1;
+	bhp2 = *(BH * const *)p2;
 
 	/* Sort by file (shared memory pool offset). */
 	if (bhp1->mf_offset < bhp2->mf_offset)