summary refs log tree commit diff
path: root/db2/mp
diff options
context:
space:
mode:
Diffstat (limited to 'db2/mp')
-rw-r--r--db2/mp/mp_bh.c592
-rw-r--r--db2/mp/mp_fget.c352
-rw-r--r--db2/mp/mp_fopen.c560
-rw-r--r--db2/mp/mp_fput.c153
-rw-r--r--db2/mp/mp_fset.c83
-rw-r--r--db2/mp/mp_open.c221
-rw-r--r--db2/mp/mp_pr.c304
-rw-r--r--db2/mp/mp_region.c330
-rw-r--r--db2/mp/mp_sync.c549
9 files changed, 0 insertions, 3144 deletions
diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c
deleted file mode 100644
index 12c53417d9..0000000000
--- a/db2/mp/mp_bh.c
+++ /dev/null
@@ -1,592 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- *	Sleepycat Software.  All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_bh.c	10.45 (Sleepycat) 11/25/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#include <unistd.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-static int __memp_upgrade __P((DB_MPOOL *, DB_MPOOLFILE *, MPOOLFILE *));
-
-/*
- * __memp_bhwrite --
- *	Write the page associated with a given bucket header.
- *
- * PUBLIC: int __memp_bhwrite
- * PUBLIC:     __P((DB_MPOOL *, MPOOLFILE *, BH *, int *, int *));
- */
-int
-__memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
-	DB_MPOOL *dbmp;
-	MPOOLFILE *mfp;
-	BH *bhp;
-	int *restartp, *wrotep;
-{
-	DB_MPOOLFILE *dbmfp;
-	DB_MPREG *mpreg;
-	int incremented, ret;
-
-	if (restartp != NULL)
-		*restartp = 0;
-	if (wrotep != NULL)
-		*wrotep = 0;
-	incremented = 0;
-
-	/*
-	 * Walk the process' DB_MPOOLFILE list and find a file descriptor for
-	 * the file.  We also check that the descriptor is open for writing.
-	 * If we find a descriptor on the file that's not open for writing, we
-	 * try and upgrade it to make it writeable.  If that fails, we're done.
-	 */
-	LOCKHANDLE(dbmp, dbmp->mutexp);
-	for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
-	    dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q))
-		if (dbmfp->mfp == mfp) {
-			if (F_ISSET(dbmfp, MP_READONLY) &&
-			    __memp_upgrade(dbmp, dbmfp, mfp)) {
-				UNLOCKHANDLE(dbmp, dbmp->mutexp);
-				return (0);
-			}
-
-			/*
-			 * Increment the reference count -- see the comment in
-			 * memp_fclose().
-			 */
-			++dbmfp->ref;
-			incremented = 1;
-			break;
-		}
-	UNLOCKHANDLE(dbmp, dbmp->mutexp);
-	if (dbmfp != NULL)
-		goto found;
-
-	/*
-	 * It's not a page from a file we've opened.  If the file requires
-	 * input/output processing, see if this process has ever registered
-	 * information as to how to write this type of file.  If not, there's
-	 * nothing we can do.
-	 */
-	if (mfp->ftype != 0) {
-		LOCKHANDLE(dbmp, dbmp->mutexp);
-		for (mpreg = LIST_FIRST(&dbmp->dbregq);
-		    mpreg != NULL; mpreg = LIST_NEXT(mpreg, q))
-			if (mpreg->ftype == mfp->ftype)
-				break;
-		UNLOCKHANDLE(dbmp, dbmp->mutexp);
-		if (mpreg == NULL)
-			return (0);
-	}
-
-	/*
-	 * Try and open the file, attaching to the underlying shared area.
-	 *
-	 * XXX
-	 * Don't try to attach to temporary files.  There are two problems in
-	 * trying to do that.  First, if we have different privileges than the
-	 * process that "owns" the temporary file, we might create the backing
-	 * disk file such that the owning process couldn't read/write its own
-	 * buffers, e.g., memp_trickle() running as root creating a file owned
-	 * as root, mode 600.  Second, if the temporary file has already been
-	 * created, we don't have any way of finding out what its real name is,
-	 * and, even if we did, it was already unlinked (so that it won't be
-	 * left if the process dies horribly).  This decision causes a problem,
-	 * however: if the temporary file consumes the entire buffer cache,
-	 * and the owner doesn't flush the buffers to disk, we could end up
-	 * with resource starvation, and the memp_trickle() thread couldn't do
-	 * anything about it.  That's a pretty unlikely scenario, though.
-	 *
-	 * XXX
-	 * There's no negative cache, so we may repeatedly try and open files
-	 * that we have previously tried (and failed) to open.
-	 *
-	 * Ignore any error, assume it's a permissions problem.
-	 */
-	if (F_ISSET(mfp, MP_TEMP))
-		return (0);
-
-	if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off),
-	    0, 0, mfp->stat.st_pagesize, 0, NULL, &dbmfp) != 0)
-		return (0);
-
-found:	ret = __memp_pgwrite(dbmfp, bhp, restartp, wrotep);
-
-	if (incremented) {
-		LOCKHANDLE(dbmp, dbmp->mutexp);
-		--dbmfp->ref;
-		UNLOCKHANDLE(dbmp, dbmp->mutexp);
-	}
-
-	return (ret);
-}
-
-/*
- * __memp_pgread --
- *	Read a page from a file.
- *
- * PUBLIC: int __memp_pgread __P((DB_MPOOLFILE *, BH *, int));
- */
-int
-__memp_pgread(dbmfp, bhp, can_create)
-	DB_MPOOLFILE *dbmfp;
-	BH *bhp;
-	int can_create;
-{
-	DB_IO db_io;
-	DB_MPOOL *dbmp;
-	MPOOLFILE *mfp;
-	size_t len, pagesize;
-	ssize_t nr;
-	int created, ret;
-
-	dbmp = dbmfp->dbmp;
-	mfp = dbmfp->mfp;
-	pagesize = mfp->stat.st_pagesize;
-
-	F_SET(bhp, BH_LOCKED | BH_TRASH);
-	LOCKBUFFER(dbmp, bhp);
-	UNLOCKREGION(dbmp);
-
-	/*
-	 * Temporary files may not yet have been created.  We don't create
-	 * them now, we create them when the pages have to be flushed.
-	 */
-	nr = 0;
-	if (dbmfp->fd == -1)
-		ret = 0;
-	else {
-		/*
-		 * Ignore read errors if we have permission to create the page.
-		 * Assume that the page doesn't exist, and that we'll create it
-		 * when we write it out.
-		 */
-		db_io.fd_io = dbmfp->fd;
-		db_io.fd_lock = dbmp->reginfo.fd;
-		db_io.mutexp =
-		    F_ISSET(dbmp, MP_LOCKHANDLE) ? dbmfp->mutexp : NULL;
-		db_io.pagesize = db_io.bytes = pagesize;
-		db_io.pgno = bhp->pgno;
-		db_io.buf = bhp->buf;
-
-		ret = __os_io(&db_io, DB_IO_READ, &nr);
-	}
-
-	created = 0;
-	if (nr < (ssize_t)pagesize) {
-		if (can_create)
-			created = 1;
-		else {
-			/* If we had a short read, ret may be 0. */
-			if (ret == 0)
-				ret = EIO;
-			__db_err(dbmp->dbenv,
-			    "%s: page %lu doesn't exist, create flag not set",
-			    __memp_fn(dbmfp), (u_long)bhp->pgno);
-			goto err;
-		}
-	}
-
-	/*
-	 * Clear any bytes we didn't read that need to be cleared.  If we're
-	 * running in diagnostic mode, smash any bytes on the page that are
-	 * unknown quantities for the caller.
-	 */
-	if (nr != (ssize_t)pagesize) {
-		len = mfp->clear_len == 0 ? pagesize : mfp->clear_len;
-		if (nr < (ssize_t)len)
-			memset(bhp->buf + nr, 0, len - nr);
-#ifdef DIAGNOSTIC
-		if (nr > (ssize_t)len)
-			len = nr;
-		if (len < pagesize)
-			memset(bhp->buf + len, 0xdb, pagesize - len);
-#endif
-	}
-
-	/* Call any pgin function. */
-	ret = mfp->ftype == 0 ? 0 : __memp_pg(dbmfp, bhp, 1);
-
-	/* Unlock the buffer and reacquire the region lock. */
-err:	UNLOCKBUFFER(dbmp, bhp);
-	LOCKREGION(dbmp);
-
-	/*
-	 * If no errors occurred, the data is now valid, clear the BH_TRASH
-	 * flag; regardless, clear the lock bit and let other threads proceed.
-	 */
-	F_CLR(bhp, BH_LOCKED);
-	if (ret == 0) {
-		F_CLR(bhp, BH_TRASH);
-
-		/* Update the statistics. */
-		if (created) {
-			++dbmp->mp->stat.st_page_create;
-			++mfp->stat.st_page_create;
-		} else {
-			++dbmp->mp->stat.st_page_in;
-			++mfp->stat.st_page_in;
-		}
-	}
-
-	return (ret);
-}
-
-/*
- * __memp_pgwrite --
- *	Write a page to a file.
- *
- * PUBLIC: int __memp_pgwrite __P((DB_MPOOLFILE *, BH *, int *, int *));
- */
-int
-__memp_pgwrite(dbmfp, bhp, restartp, wrotep)
-	DB_MPOOLFILE *dbmfp;
-	BH *bhp;
-	int *restartp, *wrotep;
-{
-	DB_ENV *dbenv;
-	DB_IO db_io;
-	DB_LOG *lg_info;
-	DB_LSN lsn;
-	DB_MPOOL *dbmp;
-	MPOOL *mp;
-	MPOOLFILE *mfp;
-	ssize_t nw;
-	int callpgin, ret, syncfail;
-	const char *fail;
-
-	dbmp = dbmfp->dbmp;
-	dbenv = dbmp->dbenv;
-	mp = dbmp->mp;
-	mfp = dbmfp->mfp;
-
-	if (restartp != NULL)
-		*restartp = 0;
-	if (wrotep != NULL)
-		*wrotep = 0;
-	callpgin = 0;
-
-	/*
-	 * Check the dirty bit -- this buffer may have been written since we
-	 * decided to write it.
-	 */
-	if (!F_ISSET(bhp, BH_DIRTY)) {
-		if (wrotep != NULL)
-			*wrotep = 1;
-		return (0);
-	}
-
-	LOCKBUFFER(dbmp, bhp);
-
-	/*
-	 * If there were two writers, we may have just been waiting while the
-	 * other writer completed I/O on this buffer.  Check the dirty bit one
-	 * more time.
-	 */
-	if (!F_ISSET(bhp, BH_DIRTY)) {
-		UNLOCKBUFFER(dbmp, bhp);
-
-		if (wrotep != NULL)
-			*wrotep = 1;
-		return (0);
-	}
-
-	F_SET(bhp, BH_LOCKED);
-	UNLOCKREGION(dbmp);
-
-	if (restartp != NULL)
-		*restartp = 1;
-
-	/* Copy the LSN off the page if we're going to need it. */
-	lg_info = dbenv->lg_info;
-	if (lg_info != NULL || F_ISSET(bhp, BH_WRITE))
-		memcpy(&lsn, bhp->buf + mfp->lsn_off, sizeof(DB_LSN));
-
-	/* Ensure the appropriate log records are on disk. */
-	if (lg_info != NULL && (ret = log_flush(lg_info, &lsn)) != 0)
-		goto err;
-
-	/*
-	 * Call any pgout function.  We set the callpgin flag so that we flag
-	 * that the contents of the buffer will need to be passed through pgin
-	 * before they are reused.
-	 */
-	if (mfp->ftype == 0)
-		ret = 0;
-	else {
-		callpgin = 1;
-		if ((ret = __memp_pg(dbmfp, bhp, 0)) != 0)
-			goto err;
-	}
-
-	/* Temporary files may not yet have been created. */
-	if (dbmfp->fd == -1) {
-		LOCKHANDLE(dbmp, dbmfp->mutexp);
-		if (dbmfp->fd == -1 && ((ret = __db_appname(dbenv,
-		    DB_APP_TMP, NULL, NULL, DB_CREATE | DB_EXCL | DB_TEMPORARY,
-		    &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1)) {
-			UNLOCKHANDLE(dbmp, dbmfp->mutexp);
-			__db_err(dbenv,
-			    "unable to create temporary backing file");
-			goto err;
-		}
-		UNLOCKHANDLE(dbmp, dbmfp->mutexp);
-	}
-
-	/* Write the page. */
-	db_io.fd_io = dbmfp->fd;
-	db_io.fd_lock = dbmp->reginfo.fd;
-	db_io.mutexp = F_ISSET(dbmp, MP_LOCKHANDLE) ? dbmfp->mutexp : NULL;
-	db_io.pagesize = db_io.bytes = mfp->stat.st_pagesize;
-	db_io.pgno = bhp->pgno;
-	db_io.buf = bhp->buf;
-	if ((ret = __os_io(&db_io, DB_IO_WRITE, &nw)) != 0) {
-		__db_panic(dbenv, ret);
-		fail = "write";
-		goto syserr;
-	}
-	if (nw != (ssize_t)mfp->stat.st_pagesize) {
-		ret = EIO;
-		fail = "write";
-		goto syserr;
-	}
-
-	if (wrotep != NULL)
-		*wrotep = 1;
-
-	/* Unlock the buffer and reacquire the region lock. */
-	UNLOCKBUFFER(dbmp, bhp);
-	LOCKREGION(dbmp);
-
-	/*
-	 * Clean up the flags based on a successful write.
-	 *
-	 * If we rewrote the page, it will need processing by the pgin
-	 * routine before reuse.
-	 */
-	if (callpgin)
-		F_SET(bhp, BH_CALLPGIN);
-	F_CLR(bhp, BH_DIRTY | BH_LOCKED);
-
-	/*
-	 * If we write a buffer for which a checkpoint is waiting, update
-	 * the count of pending buffers (both in the mpool as a whole and
-	 * for this file).  If the count for this file goes to zero, flush
-	 * the writes.
-	 *
-	 * XXX:
-	 * Don't lock the region around the sync, fsync(2) has no atomicity
-	 * issues.
-	 *
-	 * XXX:
-	 * We ignore errors from the sync -- it makes no sense to return an
-	 * error to the calling process, so set a flag causing the checkpoint
-	 * to be retried later.
-	 */
-	if (F_ISSET(bhp, BH_WRITE)) {
-		if (mfp->lsn_cnt == 1) {
-			UNLOCKREGION(dbmp);
-			syncfail = __os_fsync(dbmfp->fd) != 0;
-			LOCKREGION(dbmp);
-			if (syncfail)
-				F_SET(mp, MP_LSN_RETRY);
-
-		}
-
-		F_CLR(bhp, BH_WRITE);
-
-		/*
-		 * If the buffer just written has a larger LSN than the current
-		 * max LSN written for this checkpoint, update the saved value.
-		 */
-		if (log_compare(&lsn, &mp->lsn) > 0)
-			mp->lsn = lsn;
-
-		--mp->lsn_cnt;
-		--mfp->lsn_cnt;
-	}
-
-	/* Update the page clean/dirty statistics. */
-	++mp->stat.st_page_clean;
-	--mp->stat.st_page_dirty;
-
-	/* Update I/O statistics. */
-	++mp->stat.st_page_out;
-	++mfp->stat.st_page_out;
-
-	return (0);
-
-syserr:	__db_err(dbenv, "%s: %s failed for page %lu",
-	    __memp_fn(dbmfp), fail, (u_long)bhp->pgno);
-
-err:	/* Unlock the buffer and reacquire the region lock. */
-	UNLOCKBUFFER(dbmp, bhp);
-	LOCKREGION(dbmp);
-
-	/*
-	 * Clean up the flags based on a failure.
-	 *
-	 * The page remains dirty but we remove our lock.  If we rewrote the
-	 * page, it will need processing by the pgin routine before reuse.
-	 */
-	if (callpgin)
-		F_SET(bhp, BH_CALLPGIN);
-	F_CLR(bhp, BH_LOCKED);
-
-	return (ret);
-}
-
-/*
- * __memp_pg --
- *	Call the pgin/pgout routine.
- *
- * PUBLIC: int __memp_pg __P((DB_MPOOLFILE *, BH *, int));
- */
-int
-__memp_pg(dbmfp, bhp, is_pgin)
-	DB_MPOOLFILE *dbmfp;
-	BH *bhp;
-	int is_pgin;
-{
-	DBT dbt, *dbtp;
-	DB_MPOOL *dbmp;
-	DB_MPREG *mpreg;
-	MPOOLFILE *mfp;
-	int ftype, ret;
-
-	dbmp = dbmfp->dbmp;
-	mfp = dbmfp->mfp;
-
-	LOCKHANDLE(dbmp, dbmp->mutexp);
-
-	ftype = mfp->ftype;
-	for (mpreg = LIST_FIRST(&dbmp->dbregq);
-	    mpreg != NULL; mpreg = LIST_NEXT(mpreg, q)) {
-		if (ftype != mpreg->ftype)
-			continue;
-		if (mfp->pgcookie_len == 0)
-			dbtp = NULL;
-		else {
-			dbt.size = mfp->pgcookie_len;
-			dbt.data = R_ADDR(dbmp, mfp->pgcookie_off);
-			dbtp = &dbt;
-		}
-		UNLOCKHANDLE(dbmp, dbmp->mutexp);
-
-		if (is_pgin) {
-			if (mpreg->pgin != NULL && (ret =
-			    mpreg->pgin(bhp->pgno, bhp->buf, dbtp)) != 0)
-				goto err;
-		} else
-			if (mpreg->pgout != NULL && (ret =
-			    mpreg->pgout(bhp->pgno, bhp->buf, dbtp)) != 0)
-				goto err;
-		break;
-	}
-
-	if (mpreg == NULL)
-		UNLOCKHANDLE(dbmp, dbmp->mutexp);
-
-	return (0);
-
-err:	UNLOCKHANDLE(dbmp, dbmp->mutexp);
-	__db_err(dbmp->dbenv, "%s: %s failed for page %lu",
-	    __memp_fn(dbmfp), is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno);
-	return (ret);
-}
-
-/*
- * __memp_bhfree --
- *	Free a bucket header and its referenced data.
- *
- * PUBLIC: void __memp_bhfree __P((DB_MPOOL *, MPOOLFILE *, BH *, int));
- */
-void
-__memp_bhfree(dbmp, mfp, bhp, free_mem)
-	DB_MPOOL *dbmp;
-	MPOOLFILE *mfp;
-	BH *bhp;
-	int free_mem;
-{
-	size_t off;
-
-	/* Delete the buffer header from the hash bucket queue. */
-	off = BUCKET(dbmp->mp, R_OFFSET(dbmp, mfp), bhp->pgno);
-	SH_TAILQ_REMOVE(&dbmp->htab[off], bhp, hq, __bh);
-
-	/* Delete the buffer header from the LRU queue. */
-	SH_TAILQ_REMOVE(&dbmp->mp->bhq, bhp, q, __bh);
-
-	/*
-	 * If we're not reusing it immediately, free the buffer header
-	 * and data for real.
-	 */
-	if (free_mem) {
-		__db_shalloc_free(dbmp->addr, bhp);
-		--dbmp->mp->stat.st_page_clean;
-	}
-}
-
-/*
- * __memp_upgrade --
- *	Upgrade a file descriptor from readonly to readwrite.
- */
-static int
-__memp_upgrade(dbmp, dbmfp, mfp)
-	DB_MPOOL *dbmp;
-	DB_MPOOLFILE *dbmfp;
-	MPOOLFILE *mfp;
-{
-	int fd, ret;
-	char *rpath;
-
-	/*
-	 * !!!
-	 * We expect the handle to already be locked.
-	 */
-
-	/* Check to see if we've already upgraded. */
-	if (F_ISSET(dbmfp, MP_UPGRADE))
-		return (0);
-
-	/* Check to see if we've already failed. */
-	if (F_ISSET(dbmfp, MP_UPGRADE_FAIL))
-		return (1);
-
-	/*
-	 * Calculate the real name for this file and try to open it read/write.
-	 * We know we have a valid pathname for the file because it's the only
-	 * way we could have gotten a file descriptor of any kind.
-	 */
-	if ((ret = __db_appname(dbmp->dbenv, DB_APP_DATA,
-	    NULL, R_ADDR(dbmp, mfp->path_off), 0, NULL, &rpath)) != 0)
-		return (ret);
-	if (__db_open(rpath, 0, 0, 0, &fd) != 0) {
-		F_SET(dbmfp, MP_UPGRADE_FAIL);
-		ret = 1;
-	} else {
-		/* Swap the descriptors and set the upgrade flag. */
-		(void)__os_close(dbmfp->fd);
-		dbmfp->fd = fd;
-		F_SET(dbmfp, MP_UPGRADE);
-		ret = 0;
-	}
-	__os_freestr(rpath);
-	return (ret);
-}
diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c
deleted file mode 100644
index f159dc2d3e..0000000000
--- a/db2/mp/mp_fget.c
+++ /dev/null
@@ -1,352 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- *	Sleepycat Software.  All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_fget.c	10.53 (Sleepycat) 11/16/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-/*
- * memp_fget --
- *	Get a page from the file.
- */
-int
-memp_fget(dbmfp, pgnoaddr, flags, addrp)
-	DB_MPOOLFILE *dbmfp;
-	db_pgno_t *pgnoaddr;
-	u_int32_t flags;
-	void *addrp;
-{
-	BH *bhp;
-	DB_MPOOL *dbmp;
-	MPOOL *mp;
-	MPOOLFILE *mfp;
-	size_t bucket, mf_offset;
-	u_int32_t st_hsearch;
-	int b_incr, first, ret;
-
-	dbmp = dbmfp->dbmp;
-	mp = dbmp->mp;
-	mfp = dbmfp->mfp;
-
-	MP_PANIC_CHECK(dbmp);
-
-	/*
-	 * Validate arguments.
-	 *
-	 * !!!
-	 * Don't test for DB_MPOOL_CREATE and DB_MPOOL_NEW flags for readonly
-	 * files here, and create non-existent pages in readonly files if the
-	 * flags are set, later.  The reason is that the hash access method
-	 * wants to get empty pages that don't really exist in readonly files.
-	 * The only alternative is for hash to write the last "bucket" all the
-	 * time, which we don't want to do because one of our big goals in life
-	 * is to keep database files small.  It's sleazy as hell, but we catch
-	 * any attempt to actually write the file in memp_fput().
-	 */
-#define	OKFLAGS	(DB_MPOOL_CREATE | DB_MPOOL_LAST | DB_MPOOL_NEW)
-	if (flags != 0) {
-		if ((ret =
-		    __db_fchk(dbmp->dbenv, "memp_fget", flags, OKFLAGS)) != 0)
-			return (ret);
-
-		switch (flags) {
-		case DB_MPOOL_CREATE:
-		case DB_MPOOL_LAST:
-		case DB_MPOOL_NEW:
-		case 0:
-			break;
-		default:
-			return (__db_ferr(dbmp->dbenv, "memp_fget", 1));
-		}
-	}
-
-#ifdef DIAGNOSTIC
-	/*
-	 * XXX
-	 * We want to switch threads as often as possible.  Yield every time
-	 * we get a new page to ensure contention.
-	 */
-	if (DB_GLOBAL(db_pageyield))
-		__os_yield(1);
-#endif
-
-	/* Initialize remaining local variables. */
-	mf_offset = R_OFFSET(dbmp, mfp);
-	bhp = NULL;
-	st_hsearch = 0;
-	b_incr = ret = 0;
-
-	/* Determine the hash bucket where this page will live. */
-	bucket = BUCKET(mp, mf_offset, *pgnoaddr);
-
-	LOCKREGION(dbmp);
-
-	/*
-	 * Check for the last or last + 1 page requests.
-	 *
-	 * Examine and update the file's last_pgno value.  We don't care if
-	 * the last_pgno value immediately changes due to another thread --
-	 * at this instant in time, the value is correct.  We do increment the
-	 * current last_pgno value if the thread is asking for a new page,
-	 * however, to ensure that two threads creating pages don't get the
-	 * same one.
-	 */
-	if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) {
-		if (LF_ISSET(DB_MPOOL_NEW))
-			++mfp->last_pgno;
-		*pgnoaddr = mfp->last_pgno;
-		bucket = BUCKET(mp, mf_offset, mfp->last_pgno);
-
-		if (LF_ISSET(DB_MPOOL_NEW))
-			goto alloc;
-	}
-
-	/*
-	 * If mmap'ing the file and the page is not past the end of the file,
-	 * just return a pointer.
-	 *
-	 * The page may be past the end of the file, so check the page number
-	 * argument against the original length of the file.  If we previously
-	 * returned pages past the original end of the file, last_pgno will
-	 * have been updated to match the "new" end of the file, and checking
-	 * against it would return pointers past the end of the mmap'd region.
-	 *
-	 * If another process has opened the file for writing since we mmap'd
-	 * it, we will start playing the game by their rules, i.e. everything
-	 * goes through the cache.  All pages previously returned will be safe,
-	 * as long as the correct locking protocol was observed.
-	 *
-	 * XXX
-	 * We don't discard the map because we don't know when all of the
-	 * pages will have been discarded from the process' address space.
-	 * It would be possible to do so by reference counting the open
-	 * pages from the mmap, but it's unclear to me that it's worth it.
-	 */
-	if (dbmfp->addr != NULL && F_ISSET(mfp, MP_CAN_MMAP)) {
-		if (*pgnoaddr > mfp->orig_last_pgno) {
-			/*
-			 * !!!
-			 * See the comment above about non-existent pages and
-			 * the hash access method.
-			 */
-			if (!LF_ISSET(DB_MPOOL_CREATE)) {
-				__db_err(dbmp->dbenv,
-				    "%s: page %lu doesn't exist",
-				    __memp_fn(dbmfp), (u_long)*pgnoaddr);
-				ret = EINVAL;
-				goto err;
-			}
-		} else {
-			*(void **)addrp =
-			    R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
-			++mp->stat.st_map;
-			++mfp->stat.st_map;
-			goto done;
-		}
-	}
-
-	/* Search the hash chain for the page. */
-	for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
-	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
-		++st_hsearch;
-		if (bhp->pgno != *pgnoaddr || bhp->mf_offset != mf_offset)
-			continue;
-
-		/* Increment the reference count. */
-		if (bhp->ref == UINT16_T_MAX) {
-			__db_err(dbmp->dbenv,
-			    "%s: page %lu: reference count overflow",
-			    __memp_fn(dbmfp), (u_long)bhp->pgno);
-			ret = EINVAL;
-			goto err;
-		}
-
-		/*
-		 * Increment the reference count.  We may discard the region
-		 * lock as we evaluate and/or read the buffer, so we need to
-		 * ensure that it doesn't move and that its contents remain
-		 * unchanged.
-		 */
-		++bhp->ref;
-		b_incr = 1;
-
-		/*
-	 	 * Any buffer we find might be trouble.
-		 *
-		 * BH_LOCKED --
-		 * I/O is in progress.  Because we've incremented the buffer
-		 * reference count, we know the buffer can't move.  Unlock
-		 * the region lock, wait for the I/O to complete, and reacquire
-		 * the region.
-		 */
-		for (first = 1; F_ISSET(bhp, BH_LOCKED); first = 0) {
-			UNLOCKREGION(dbmp);
-
-			/*
-			 * Explicitly yield the processor if it's not the first
-			 * pass through this loop -- if we don't, we might end
-			 * up running to the end of our CPU quantum as we will
-			 * simply be swapping between the two locks.
-			 */
-			if (!first)
-				__os_yield(1);
-
-			LOCKBUFFER(dbmp, bhp);
-			/* Wait for I/O to finish... */
-			UNLOCKBUFFER(dbmp, bhp);
-			LOCKREGION(dbmp);
-		}
-
-		/*
-		 * BH_TRASH --
-		 * The contents of the buffer are garbage.  Shouldn't happen,
-		 * and this read is likely to fail, but might as well try.
-		 */
-		if (F_ISSET(bhp, BH_TRASH))
-			goto reread;
-
-		/*
-		 * BH_CALLPGIN --
-		 * The buffer was converted so it could be written, and the
-		 * contents need to be converted again.
-		 */
-		if (F_ISSET(bhp, BH_CALLPGIN)) {
-			if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
-				goto err;
-			F_CLR(bhp, BH_CALLPGIN);
-		}
-
-		++mp->stat.st_cache_hit;
-		++mfp->stat.st_cache_hit;
-		*(void **)addrp = bhp->buf;
-		goto done;
-	}
-
-alloc:	/* Allocate new buffer header and data space. */
-	if ((ret = __memp_alloc(dbmp, sizeof(BH) -
-	    sizeof(u_int8_t) + mfp->stat.st_pagesize, NULL, &bhp)) != 0)
-		goto err;
-
-#ifdef DIAGNOSTIC
-	if ((ALIGNTYPE)bhp->buf & (sizeof(size_t) - 1)) {
-		__db_err(dbmp->dbenv,
-		    "Internal error: BH data NOT size_t aligned.");
-		ret = EINVAL;
-		goto err;
-	}
-#endif
-	/* Initialize the BH fields. */
-	memset(bhp, 0, sizeof(BH));
-	LOCKINIT(dbmp, &bhp->mutex);
-	bhp->ref = 1;
-	bhp->pgno = *pgnoaddr;
-	bhp->mf_offset = mf_offset;
-
-	/*
-	 * Prepend the bucket header to the head of the appropriate MPOOL
-	 * bucket hash list.  Append the bucket header to the tail of the
-	 * MPOOL LRU chain.
-	 */
-	SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, hq, __bh);
-	SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
-
-	/*
-	 * If we created the page, zero it out and continue.
-	 *
-	 * !!!
-	 * Note: DB_MPOOL_NEW specifically doesn't call the pgin function.
-	 * If DB_MPOOL_CREATE is used, then the application's pgin function
-	 * has to be able to handle pages of 0's -- if it uses DB_MPOOL_NEW,
-	 * it can detect all of its page creates, and not bother.
-	 *
-	 * Otherwise, read the page into memory, optionally creating it if
-	 * DB_MPOOL_CREATE is set.
-	 */
-	if (LF_ISSET(DB_MPOOL_NEW)) {
-		if (mfp->clear_len == 0)
-			memset(bhp->buf, 0, mfp->stat.st_pagesize);
-		else {
-			memset(bhp->buf, 0, mfp->clear_len);
-#ifdef DIAGNOSTIC
-			memset(bhp->buf + mfp->clear_len, 0xdb,
-			    mfp->stat.st_pagesize - mfp->clear_len);
-#endif
-		}
-
-		++mp->stat.st_page_create;
-		++mfp->stat.st_page_create;
-	} else {
-		/*
-		 * It's possible for the read function to fail, which means
-		 * that we fail as well.  Note, the __memp_pgread() function
-		 * discards the region lock, so the buffer must be pinned
-		 * down so that it cannot move and its contents are unchanged.
-		 */
-reread:		if ((ret = __memp_pgread(dbmfp,
-		    bhp, LF_ISSET(DB_MPOOL_CREATE))) != 0) {
-			/*
-			 * !!!
-			 * Discard the buffer unless another thread is waiting
-			 * on our I/O to complete.  Regardless, the header has
-			 * the BH_TRASH flag set.
-			 */
-			if (bhp->ref == 1)
-				__memp_bhfree(dbmp, mfp, bhp, 1);
-			goto err;
-		}
-
-		++mp->stat.st_cache_miss;
-		++mfp->stat.st_cache_miss;
-	}
-
-	/*
-	 * If we're returning a page after our current notion of the last-page,
-	 * update our information.  Note, there's no way to un-instantiate this
-	 * page, it's going to exist whether it's returned to us dirty or not.
-	 */
-	if (bhp->pgno > mfp->last_pgno)
-		mfp->last_pgno = bhp->pgno;
-
-	++mp->stat.st_page_clean;
-	*(void **)addrp = bhp->buf;
-
-done:	/* Update the chain search statistics. */
-	if (st_hsearch) {
-		++mp->stat.st_hash_searches;
-		if (st_hsearch > mp->stat.st_hash_longest)
-			mp->stat.st_hash_longest = st_hsearch;
-		mp->stat.st_hash_examined += st_hsearch;
-	}
-
-	++dbmfp->pinref;
-
-	UNLOCKREGION(dbmp);
-
-	return (0);
-
-err:	/* Discard our reference. */
-	if (b_incr)
-		--bhp->ref;
-	UNLOCKREGION(dbmp);
-
-	*(void **)addrp = NULL;
-	return (ret);
-}
diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c
deleted file mode 100644
index dd02662fd8..0000000000
--- a/db2/mp/mp_fopen.c
+++ /dev/null
@@ -1,560 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- *	Sleepycat Software.  All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_fopen.c	10.60 (Sleepycat) 1/1/99";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
-static int __memp_mf_open __P((DB_MPOOL *,
-    const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **));
-
-/*
- * memp_fopen --
- *	Open a backing file for the memory pool.
- */
-int
-memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp)
-	DB_MPOOL *dbmp;
-	const char *path;
-	u_int32_t flags;
-	int mode;
-	size_t pagesize;
-	DB_MPOOL_FINFO *finfop;
-	DB_MPOOLFILE **retp;
-{
-	int ret;
-
-	MP_PANIC_CHECK(dbmp);
-
-	/* Validate arguments. */
-	if ((ret = __db_fchk(dbmp->dbenv,
-	    "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0)
-		return (ret);
-
-	/* Require a non-zero pagesize. */
-	if (pagesize == 0) {
-		__db_err(dbmp->dbenv, "memp_fopen: pagesize not specified");
-		return (EINVAL);
-	}
-	if (finfop != NULL && finfop->clear_len > pagesize)
-		return (EINVAL);
-
-	return (__memp_fopen(dbmp,
-	    NULL, path, flags, mode, pagesize, 1, finfop, retp));
-}
-
-/*
- * __memp_fopen --
- *	Open a backing file for the memory pool; internal version.
- *
- * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *,
- * PUBLIC:    u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
- */
-int
-__memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp)
-	DB_MPOOL *dbmp;
-	MPOOLFILE *mfp;
-	const char *path;
-	u_int32_t flags;
-	int mode, needlock;
-	size_t pagesize;
-	DB_MPOOL_FINFO *finfop;
-	DB_MPOOLFILE **retp;
-{
-	DB_ENV *dbenv;
-	DB_MPOOLFILE *dbmfp;
-	DB_MPOOL_FINFO finfo;
-	db_pgno_t last_pgno;
-	size_t maxmap;
-	u_int32_t mbytes, bytes;
-	int ret;
-	u_int8_t idbuf[DB_FILE_ID_LEN];
-	char *rpath;
-
-	dbenv = dbmp->dbenv;
-	ret = 0;
-	rpath = NULL;
-
-	/*
-	 * If mfp is provided, we take the DB_MPOOL_FINFO information from
-	 * the mfp.  We don't bother initializing everything, because some
-	 * of them are expensive to acquire.  If no mfp is provided and the
-	 * finfop argument is NULL, we default the values.
-	 */
-	if (finfop == NULL) {
-		memset(&finfo, 0, sizeof(finfo));
-		if (mfp != NULL) {
-			finfo.ftype = mfp->ftype;
-			finfo.pgcookie = NULL;
-			finfo.fileid = NULL;
-			finfo.lsn_offset = mfp->lsn_off;
-			finfo.clear_len = mfp->clear_len;
-		} else {
-			finfo.ftype = 0;
-			finfo.pgcookie = NULL;
-			finfo.fileid = NULL;
-			finfo.lsn_offset = -1;
-			finfo.clear_len = 0;
-		}
-		finfop = &finfo;
-	}
-
-	/* Allocate and initialize the per-process structure. */
-	if ((ret = __os_calloc(1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0)
-		return (ret);
-	dbmfp->dbmp = dbmp;
-	dbmfp->fd = -1;
-	dbmfp->ref = 1;
-	if (LF_ISSET(DB_RDONLY))
-		F_SET(dbmfp, MP_READONLY);
-
-	if (path == NULL) {
-		if (LF_ISSET(DB_RDONLY)) {
-			__db_err(dbenv,
-			    "memp_fopen: temporary files can't be readonly");
-			ret = EINVAL;
-			goto err;
-		}
-		last_pgno = 0;
-	} else {
-		/* Get the real name for this file and open it. */
-		if ((ret = __db_appname(dbenv,
-		    DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0)
-			goto err;
-		if ((ret = __db_open(rpath,
-		   LF_ISSET(DB_CREATE | DB_RDONLY),
-		   DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) {
-			__db_err(dbenv, "%s: %s", rpath, strerror(ret));
-			goto err;
-		}
-
-		/*
-		 * Don't permit files that aren't a multiple of the pagesize,
-		 * and find the number of the last page in the file, all the
-		 * time being careful not to overflow 32 bits.
-		 *
-		 * !!!
-		 * We can't use off_t's here, or in any code in the mainline
-		 * library for that matter.  (We have to use them in the os
-		 * stubs, of course, as there are system calls that take them
-		 * as arguments.)  The reason is that some customers build in
-		 * environments where an off_t is 32-bits, but still run where
-		 * offsets are 64-bits, and they pay us a lot of money.
-		 */
-		if ((ret = __os_ioinfo(rpath,
-		    dbmfp->fd, &mbytes, &bytes, NULL)) != 0) {
-			__db_err(dbenv, "%s: %s", rpath, strerror(ret));
-			goto err;
-		}
-
-		/* Page sizes have to be a power-of-two, ignore mbytes. */
-		if (bytes % pagesize != 0) {
-			__db_err(dbenv,
-			    "%s: file size not a multiple of the pagesize",
-			    rpath);
-			ret = EINVAL;
-			goto err;
-		}
-
-		last_pgno = mbytes * (MEGABYTE / pagesize);
-		last_pgno += bytes / pagesize;
-
-		/* Correction: page numbers are zero-based, not 1-based. */
-		if (last_pgno != 0)
-			--last_pgno;
-
-		/*
-		 * Get the file id if we weren't given one.  Generated file id's
-		 * don't use timestamps, otherwise there'd be no chance of any
-		 * other process joining the party.
-		 */
-		if (finfop->fileid == NULL) {
-			if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0)
-				goto err;
-			finfop->fileid = idbuf;
-		}
-	}
-
-	/*
-	 * If we weren't provided an underlying shared object to join with,
-	 * find/allocate the shared file objects.  Also allocate space for
-	 * for the per-process thread lock.
-	 */
-	if (needlock)
-		LOCKREGION(dbmp);
-
-	if (mfp == NULL)
-		ret = __memp_mf_open(dbmp,
-		    path, pagesize, last_pgno, finfop, &mfp);
-	else {
-		++mfp->ref;
-		ret = 0;
-	}
-	if (ret == 0 &&
-	    F_ISSET(dbmp, MP_LOCKHANDLE) && (ret =
-	    __memp_alloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0)
-		LOCKINIT(dbmp, dbmfp->mutexp);
-
-	if (needlock)
-		UNLOCKREGION(dbmp);
-	if (ret != 0)
-		goto err;
-
-	dbmfp->mfp = mfp;
-
-	/*
-	 * If a file:
-	 *	+ is read-only
-	 *	+ isn't temporary
-	 *	+ doesn't require any pgin/pgout support
-	 *	+ the DB_NOMMAP flag wasn't set
-	 *	+ and is less than mp_mmapsize bytes in size
-	 *
-	 * we can mmap it instead of reading/writing buffers.  Don't do error
-	 * checking based on the mmap call failure.  We want to do normal I/O
-	 * on the file if the reason we failed was because the file was on an
-	 * NFS mounted partition, and we can fail in buffer I/O just as easily
-	 * as here.
-	 *
-	 * XXX
-	 * We'd like to test to see if the file is too big to mmap.  Since we
-	 * don't know what size or type off_t's or size_t's are, or the largest
-	 * unsigned integral type is, or what random insanity the local C
-	 * compiler will perpetrate, doing the comparison in a portable way is
-	 * flatly impossible.  Hope that mmap fails if the file is too large.
-	 */
-#define	DB_MAXMMAPSIZE	(10 * 1024 * 1024)	/* 10 Mb. */
-	if (F_ISSET(mfp, MP_CAN_MMAP)) {
-		if (!F_ISSET(dbmfp, MP_READONLY))
-			F_CLR(mfp, MP_CAN_MMAP);
-		if (path == NULL)
-			F_CLR(mfp, MP_CAN_MMAP);
-		if (finfop->ftype != 0)
-			F_CLR(mfp, MP_CAN_MMAP);
-		if (LF_ISSET(DB_NOMMAP))
-			F_CLR(mfp, MP_CAN_MMAP);
-		maxmap = dbenv == NULL || dbenv->mp_mmapsize == 0 ?
-		    DB_MAXMMAPSIZE : dbenv->mp_mmapsize;
-		if (mbytes > maxmap / MEGABYTE ||
-		    (mbytes == maxmap / MEGABYTE && bytes >= maxmap % MEGABYTE))
-			F_CLR(mfp, MP_CAN_MMAP);
-	}
-	dbmfp->addr = NULL;
-	if (F_ISSET(mfp, MP_CAN_MMAP)) {
-		dbmfp->len = (size_t)mbytes * MEGABYTE + bytes;
-		if (__db_mapfile(rpath,
-		    dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) {
-			dbmfp->addr = NULL;
-			F_CLR(mfp, MP_CAN_MMAP);
-		}
-	}
-	if (rpath != NULL)
-		__os_freestr(rpath);
-
-	LOCKHANDLE(dbmp, dbmp->mutexp);
-	TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q);
-	UNLOCKHANDLE(dbmp, dbmp->mutexp);
-
-	*retp = dbmfp;
-	return (0);
-
-err:	/*
-	 * Note that we do not have to free the thread mutex, because we
-	 * never get to here after we have successfully allocated it.
-	 */
-	if (rpath != NULL)
-		__os_freestr(rpath);
-	if (dbmfp->fd != -1)
-		(void)__os_close(dbmfp->fd);
-	if (dbmfp != NULL)
-		__os_free(dbmfp, sizeof(DB_MPOOLFILE));
-	return (ret);
-}
-
-/*
- * __memp_mf_open --
- *	Open an MPOOLFILE.
- */
-static int
-__memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp)
-	DB_MPOOL *dbmp;
-	const char *path;
-	size_t pagesize;
-	db_pgno_t last_pgno;
-	DB_MPOOL_FINFO *finfop;
-	MPOOLFILE **retp;
-{
-	MPOOLFILE *mfp;
-	int ret;
-	void *p;
-
-#define	ISTEMPORARY	(path == NULL)
-
-	/*
-	 * Walk the list of MPOOLFILE's, looking for a matching file.
-	 * Temporary files can't match previous files.
-	 */
-	if (!ISTEMPORARY)
-		for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
-		    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
-			if (F_ISSET(mfp, MP_TEMP))
-				continue;
-			if (!memcmp(finfop->fileid,
-			    R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
-				if (finfop->clear_len != mfp->clear_len ||
-				    finfop->ftype != mfp->ftype ||
-				    pagesize != mfp->stat.st_pagesize) {
-					__db_err(dbmp->dbenv,
-			    "%s: ftype, clear length or pagesize changed",
-					    path);
-					return (EINVAL);
-				}
-
-				/* Found it: increment the reference count. */
-				++mfp->ref;
-				*retp = mfp;
-				return (0);
-			}
-		}
-
-	/* Allocate a new MPOOLFILE. */
-	if ((ret = __memp_alloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
-		return (ret);
-	*retp = mfp;
-
-	/* Initialize the structure. */
-	memset(mfp, 0, sizeof(MPOOLFILE));
-	mfp->ref = 1;
-	mfp->ftype = finfop->ftype;
-	mfp->lsn_off = finfop->lsn_offset;
-	mfp->clear_len = finfop->clear_len;
-
-	/*
-	 * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget,
-	 * we have to know the last page in the file.  Figure it out and save
-	 * it away.
-	 */
-	mfp->stat.st_pagesize = pagesize;
-	mfp->orig_last_pgno = mfp->last_pgno = last_pgno;
-
-	if (ISTEMPORARY)
-		F_SET(mfp, MP_TEMP);
-	else {
-		/* Copy the file path into shared memory. */
-		if ((ret = __memp_alloc(dbmp,
-		    strlen(path) + 1, &mfp->path_off, &p)) != 0)
-			goto err;
-		memcpy(p, path, strlen(path) + 1);
-
-		/* Copy the file identification string into shared memory. */
-		if ((ret = __memp_alloc(dbmp,
-		    DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
-			goto err;
-		memcpy(p, finfop->fileid, DB_FILE_ID_LEN);
-
-		F_SET(mfp, MP_CAN_MMAP);
-	}
-
-	/* Copy the page cookie into shared memory. */
-	if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) {
-		mfp->pgcookie_len = 0;
-		mfp->pgcookie_off = 0;
-	} else {
-		if ((ret = __memp_alloc(dbmp,
-		    finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
-			goto err;
-		memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size);
-		mfp->pgcookie_len = finfop->pgcookie->size;
-	}
-
-	/* Prepend the MPOOLFILE to the list of MPOOLFILE's. */
-	SH_TAILQ_INSERT_HEAD(&dbmp->mp->mpfq, mfp, q, __mpoolfile);
-
-	if (0) {
-err:		if (mfp->path_off != 0)
-			__db_shalloc_free(dbmp->addr,
-			    R_ADDR(dbmp, mfp->path_off));
-		if (mfp->fileid_off != 0)
-			__db_shalloc_free(dbmp->addr,
-			    R_ADDR(dbmp, mfp->fileid_off));
-		if (mfp != NULL)
-			__db_shalloc_free(dbmp->addr, mfp);
-		mfp = NULL;
-	}
-	return (0);
-}
-
-/*
- * memp_fclose --
- *	Close a backing file for the memory pool.
- */
-int
-memp_fclose(dbmfp)
-	DB_MPOOLFILE *dbmfp;
-{
-	DB_MPOOL *dbmp;
-	int ret, t_ret;
-
-	dbmp = dbmfp->dbmp;
-	ret = 0;
-
-	MP_PANIC_CHECK(dbmp);
-
-	for (;;) {
-		LOCKHANDLE(dbmp, dbmp->mutexp);
-
-		/*
-		 * We have to reference count DB_MPOOLFILE structures as other
-		 * threads may be using them.  The problem only happens if the
-		 * application makes a bad design choice.  Here's the path:
-		 *
-		 * Thread A opens a database.
-		 * Thread B uses thread A's DB_MPOOLFILE to write a buffer
-		 *    in order to free up memory in the mpool cache.
-		 * Thread A closes the database while thread B is using the
-		 *    DB_MPOOLFILE structure.
-		 *
-		 * By opening all databases before creating the threads, and
-		 * closing them after the threads have exited, applications
-		 * get better performance and avoid the problem path entirely.
-		 *
-		 * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer
-		 * is a short-term lock, even in worst case, since we better be
-		 * the only thread of control using the DB_MPOOLFILE structure
-		 * to read pages *into* the cache.  Wait until we're the only
-		 * reference holder and remove the DB_MPOOLFILE structure from
-		 * the list, so nobody else can even find it.
-		 */
-		if (dbmfp->ref == 1) {
-			TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q);
-			break;
-		}
-		UNLOCKHANDLE(dbmp, dbmp->mutexp);
-
-		(void)__os_sleep(1, 0);
-	}
-	UNLOCKHANDLE(dbmp, dbmp->mutexp);
-
-	/* Complain if pinned blocks never returned. */
-	if (dbmfp->pinref != 0)
-		__db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned",
-		    __memp_fn(dbmfp), (u_long)dbmfp->pinref);
-
-	/* Close the underlying MPOOLFILE. */
-	(void)__memp_mf_close(dbmp, dbmfp);
-
-	/* Discard any mmap information. */
-	if (dbmfp->addr != NULL &&
-	    (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0)
-		__db_err(dbmp->dbenv,
-		    "%s: %s", __memp_fn(dbmfp), strerror(ret));
-
-	/* Close the file; temporary files may not yet have been created. */
-	if (dbmfp->fd != -1 && (t_ret = __os_close(dbmfp->fd)) != 0) {
-		__db_err(dbmp->dbenv,
-		    "%s: %s", __memp_fn(dbmfp), strerror(t_ret));
-		if (ret != 0)
-			t_ret = ret;
-	}
-
-	/* Free memory. */
-	if (dbmfp->mutexp != NULL) {
-		LOCKREGION(dbmp);
-		__db_shalloc_free(dbmp->addr, dbmfp->mutexp);
-		UNLOCKREGION(dbmp);
-	}
-
-	/* Discard the DB_MPOOLFILE structure. */
-	__os_free(dbmfp, sizeof(DB_MPOOLFILE));
-
-	return (ret);
-}
-
-/*
- * __memp_mf_close --
- *	Close down an MPOOLFILE.
- */
-static int
-__memp_mf_close(dbmp, dbmfp)
-	DB_MPOOL *dbmp;
-	DB_MPOOLFILE *dbmfp;
-{
-	BH *bhp, *nbhp;
-	MPOOL *mp;
-	MPOOLFILE *mfp;
-	size_t mf_offset;
-
-	mp = dbmp->mp;
-	mfp = dbmfp->mfp;
-
-	LOCKREGION(dbmp);
-
-	/* If more than a single reference, simply decrement. */
-	if (mfp->ref > 1) {
-		--mfp->ref;
-		goto ret1;
-	}
-
-	/*
-	 * Move any BH's held by the file to the free list.  We don't free the
-	 * memory itself because we may be discarding the memory pool, and it's
-	 * fairly expensive to reintegrate the buffers back into the region for
-	 * no purpose.
-	 */
-	mf_offset = R_OFFSET(dbmp, mfp);
-	for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
-		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
-
-#ifdef DEBUG_NO_DIRTY
-		/* Complain if we find any blocks that were left dirty. */
-		if (F_ISSET(bhp, BH_DIRTY))
-			__db_err(dbmp->dbenv,
-			    "%s: close: pgno %lu left dirty; ref %lu",
-			    __memp_fn(dbmfp),
-			    (u_long)bhp->pgno, (u_long)bhp->ref);
-#endif
-
-		if (bhp->mf_offset == mf_offset) {
-			if (F_ISSET(bhp, BH_DIRTY)) {
-				++mp->stat.st_page_clean;
-				--mp->stat.st_page_dirty;
-			}
-			__memp_bhfree(dbmp, mfp, bhp, 0);
-			SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh);
-		}
-	}
-
-	/* Delete from the list of MPOOLFILEs. */
-	SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);
-
-	/* Free the space. */
-	if (mfp->path_off != 0)
-		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
-	if (mfp->fileid_off != 0)
-		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));
-	if (mfp->pgcookie_off != 0)
-		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off));
-	__db_shalloc_free(dbmp->addr, mfp);
-
-ret1:	UNLOCKREGION(dbmp);
-	return (0);
-}
diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c
deleted file mode 100644
index c551f97380..0000000000
--- a/db2/mp/mp_fput.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- *	Sleepycat Software.  All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_fput.c	10.24 (Sleepycat) 9/27/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-/*
- * memp_fput --
- *	Mpool file put function.
- */
-int
-memp_fput(dbmfp, pgaddr, flags)
-	DB_MPOOLFILE *dbmfp;
-	void *pgaddr;
-	u_int32_t flags;
-{
-	BH *bhp;
-	DB_MPOOL *dbmp;
-	MPOOL *mp;
-	int wrote, ret;
-
-	dbmp = dbmfp->dbmp;
-	mp = dbmp->mp;
-
-	MP_PANIC_CHECK(dbmp);
-
-	/* Validate arguments. */
-	if (flags) {
-		if ((ret = __db_fchk(dbmp->dbenv, "memp_fput", flags,
-		    DB_MPOOL_CLEAN | DB_MPOOL_DIRTY | DB_MPOOL_DISCARD)) != 0)
-			return (ret);
-		if ((ret = __db_fcchk(dbmp->dbenv, "memp_fput",
-		    flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0)
-			return (ret);
-
-		if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
-			__db_err(dbmp->dbenv,
-			    "%s: dirty flag set for readonly file page",
-			    __memp_fn(dbmfp));
-			return (EACCES);
-		}
-	}
-
-	LOCKREGION(dbmp);
-
-	/* Decrement the pinned reference count. */
-	if (dbmfp->pinref == 0)
-		__db_err(dbmp->dbenv,
-		    "%s: put: more blocks returned than retrieved",
-		    __memp_fn(dbmfp));
-	else
-		--dbmfp->pinref;
-
-	/*
-	 * If we're mapping the file, there's nothing to do.  Because we can
-	 * stop mapping the file at any time, we have to check on each buffer
-	 * to see if the address we gave the application was part of the map
-	 * region.
-	 */
-	if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr &&
-	    (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len) {
-		UNLOCKREGION(dbmp);
-		return (0);
-	}
-
-	/* Convert the page address to a buffer header. */
-	bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf));
-
-	/* Set/clear the page bits. */
-	if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) {
-		++mp->stat.st_page_clean;
-		--mp->stat.st_page_dirty;
-		F_CLR(bhp, BH_DIRTY);
-	}
-	if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) {
-		--mp->stat.st_page_clean;
-		++mp->stat.st_page_dirty;
-		F_SET(bhp, BH_DIRTY);
-	}
-	if (LF_ISSET(DB_MPOOL_DISCARD))
-		F_SET(bhp, BH_DISCARD);
-
-	/*
-	 * Check for a reference count going to zero.  This can happen if the
-	 * application returns a page twice.
-	 */
-	if (bhp->ref == 0) {
-		__db_err(dbmp->dbenv, "%s: page %lu: unpinned page returned",
-		    __memp_fn(dbmfp), (u_long)bhp->pgno);
-		UNLOCKREGION(dbmp);
-		return (EINVAL);
-	}
-
-	/*
-	 * If more than one reference to the page, we're done.  Ignore the
-	 * discard flags (for now) and leave it at its position in the LRU
-	 * chain.  The rest gets done at last reference close.
-	 */
-	if (--bhp->ref > 0) {
-		UNLOCKREGION(dbmp);
-		return (0);
-	}
-
-	/*
-	 * If this buffer is scheduled for writing because of a checkpoint, we
-	 * need to write it (if we marked it dirty), or update the checkpoint
-	 * counters (if we didn't mark it dirty).  If we try to write it and
-	 * can't, that's not necessarily an error, but set a flag so that the
-	 * next time the memp_sync function runs we try writing it there, as
-	 * the checkpoint application better be able to write all of the files.
-	 */
-	if (F_ISSET(bhp, BH_WRITE)) {
-		if (F_ISSET(bhp, BH_DIRTY)) {
-			if (__memp_bhwrite(dbmp,
-			    dbmfp->mfp, bhp, NULL, &wrote) != 0 || !wrote)
-				F_SET(mp, MP_LSN_RETRY);
-		} else {
-			F_CLR(bhp, BH_WRITE);
-
-			--dbmfp->mfp->lsn_cnt;
-			--mp->lsn_cnt;
-		}
-	}
-
-	/* Move the buffer to the head/tail of the LRU chain. */
-	SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh);
-	if (F_ISSET(bhp, BH_DISCARD))
-		SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh);
-	else
-		SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
-
-
-	UNLOCKREGION(dbmp);
-	return (0);
-}
diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c
deleted file mode 100644
index 1940d3b198..0000000000
--- a/db2/mp/mp_fset.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- *	Sleepycat Software.  All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_fset.c	10.16 (Sleepycat) 9/27/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-/*
- * memp_fset --
- *	Mpool page set-flag routine.
- */
-int
-memp_fset(dbmfp, pgaddr, flags)
-	DB_MPOOLFILE *dbmfp;
-	void *pgaddr;
-	u_int32_t flags;
-{
-	BH *bhp;
-	DB_MPOOL *dbmp;
-	MPOOL *mp;
-	int ret;
-
-	dbmp = dbmfp->dbmp;
-	mp = dbmp->mp;
-
-	MP_PANIC_CHECK(dbmp);
-
-	/* Validate arguments. */
-	if (flags == 0)
-		return (__db_ferr(dbmp->dbenv, "memp_fset", 1));
-
-	if ((ret = __db_fchk(dbmp->dbenv, "memp_fset", flags,
-	    DB_MPOOL_DIRTY | DB_MPOOL_CLEAN | DB_MPOOL_DISCARD)) != 0)
-		return (ret);
-	if ((ret = __db_fcchk(dbmp->dbenv, "memp_fset",
-	    flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0)
-		return (ret);
-
-	if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
-		__db_err(dbmp->dbenv,
-		    "%s: dirty flag set for readonly file page",
-		    __memp_fn(dbmfp));
-		return (EACCES);
-	}
-
-	/* Convert the page address to a buffer header. */
-	bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf));
-
-	LOCKREGION(dbmp);
-
-	if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) {
-		++mp->stat.st_page_clean;
-		--mp->stat.st_page_dirty;
-		F_CLR(bhp, BH_DIRTY);
-	}
-	if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) {
-		--mp->stat.st_page_clean;
-		++mp->stat.st_page_dirty;
-		F_SET(bhp, BH_DIRTY);
-	}
-	if (LF_ISSET(DB_MPOOL_DISCARD))
-		F_SET(bhp, BH_DISCARD);
-
-	UNLOCKREGION(dbmp);
-	return (0);
-}
diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c
deleted file mode 100644
index 4c90fc438f..0000000000
--- a/db2/mp/mp_open.c
+++ /dev/null
@@ -1,221 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- *	Sleepycat Software.  All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_open.c	10.27 (Sleepycat) 10/1/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-/*
- * memp_open --
- *	Initialize and/or join a memory pool.
- */
-int
-memp_open(path, flags, mode, dbenv, retp)
-	const char *path;
-	u_int32_t flags;
-	int mode;
-	DB_ENV *dbenv;
-	DB_MPOOL **retp;
-{
-	DB_MPOOL *dbmp;
-	size_t cachesize;
-	int is_private, ret;
-
-	/* Validate arguments. */
-#ifdef HAVE_SPINLOCKS
-#define	OKFLAGS	(DB_CREATE | DB_MPOOL_PRIVATE | DB_NOMMAP | DB_THREAD)
-#else
-#define	OKFLAGS	(DB_CREATE | DB_MPOOL_PRIVATE | DB_NOMMAP)
-#endif
-	if ((ret = __db_fchk(dbenv, "memp_open", flags, OKFLAGS)) != 0)
-		return (ret);
-
-	/* Extract fields from DB_ENV structure. */
-	cachesize = dbenv == NULL ? 0 : dbenv->mp_size;
-
-	/* Create and initialize the DB_MPOOL structure. */
-	if ((ret = __os_calloc(1, sizeof(DB_MPOOL), &dbmp)) != 0)
-		return (ret);
-	LIST_INIT(&dbmp->dbregq);
-	TAILQ_INIT(&dbmp->dbmfq);
-
-	dbmp->dbenv = dbenv;
-
-	/* Decide if it's possible for anyone else to access the pool. */
-	is_private =
-	    (dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE);
-
-	/*
-	 * Map in the region.  We do locking regardless, as portions of it are
-	 * implemented in common code (if we put the region in a file, that is).
-	 */
-	F_SET(dbmp, MP_LOCKREGION);
-	if ((ret = __memp_ropen(dbmp,
-	    path, cachesize, mode, is_private, LF_ISSET(DB_CREATE))) != 0)
-		goto err;
-	F_CLR(dbmp, MP_LOCKREGION);
-
-	/*
-	 * If there's concurrent access, then we have to lock the region.
-	 * If it's threaded, then we have to lock both the handles and the
-	 * region, and we need to allocate a mutex for that purpose.
-	 */
-	if (!is_private)
-		F_SET(dbmp, MP_LOCKREGION);
-	if (LF_ISSET(DB_THREAD)) {
-		F_SET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION);
-		LOCKREGION(dbmp);
-		ret = __memp_alloc(dbmp,
-		    sizeof(db_mutex_t), NULL, &dbmp->mutexp);
-		UNLOCKREGION(dbmp);
-		if (ret != 0) {
-			(void)memp_close(dbmp);
-			goto err;
-		}
-		LOCKINIT(dbmp, dbmp->mutexp);
-	}
-
-	*retp = dbmp;
-	return (0);
-
-err:	if (dbmp != NULL)
-		__os_free(dbmp, sizeof(DB_MPOOL));
-	return (ret);
-}
-
-/*
- * memp_close --
- *	Close a memory pool.
- */
-int
-memp_close(dbmp)
-	DB_MPOOL *dbmp;
-{
-	DB_MPOOLFILE *dbmfp;
-	DB_MPREG *mpreg;
-	int ret, t_ret;
-
-	ret = 0;
-
-	MP_PANIC_CHECK(dbmp);
-
-	/* Discard DB_MPREGs. */
-	while ((mpreg = LIST_FIRST(&dbmp->dbregq)) != NULL) {
-		LIST_REMOVE(mpreg, q);
-		__os_free(mpreg, sizeof(DB_MPREG));
-	}
-
-	/* Discard DB_MPOOLFILEs. */
-	while ((dbmfp = TAILQ_FIRST(&dbmp->dbmfq)) != NULL)
-		if ((t_ret = memp_fclose(dbmfp)) != 0 && ret == 0)
-			ret = t_ret;
-
-	/* Discard thread mutex. */
-	if (F_ISSET(dbmp, MP_LOCKHANDLE)) {
-		LOCKREGION(dbmp);
-		__db_shalloc_free(dbmp->addr, dbmp->mutexp);
-		UNLOCKREGION(dbmp);
-	}
-
-	/* Close the region. */
-	if ((t_ret = __db_rdetach(&dbmp->reginfo)) != 0 && ret == 0)
-		ret = t_ret;
-
-	if (dbmp->reginfo.path != NULL)
-		__os_freestr(dbmp->reginfo.path);
-	__os_free(dbmp, sizeof(DB_MPOOL));
-
-	return (ret);
-}
-
-/*
- * __memp_panic --
- *	Panic a memory pool.
- *
- * PUBLIC: void __memp_panic __P((DB_ENV *));
- */
-void
-__memp_panic(dbenv)
-	DB_ENV *dbenv;
-{
-	if (dbenv->mp_info != NULL)
-		dbenv->mp_info->mp->rlayout.panic = 1;
-}
-
-/*
- * memp_unlink --
- *	Exit a memory pool.
- */
-int
-memp_unlink(path, force, dbenv)
-	const char *path;
-	int force;
-	DB_ENV *dbenv;
-{
-	REGINFO reginfo;
-	int ret;
-
-	memset(&reginfo, 0, sizeof(reginfo));
-	reginfo.dbenv = dbenv;
-	reginfo.appname = DB_APP_NONE;
-	if (path != NULL && (ret = __os_strdup(path, &reginfo.path)) != 0)
-		return (ret);
-	reginfo.file = DB_DEFAULT_MPOOL_FILE;
-	ret = __db_runlink(&reginfo, force);
-	if (reginfo.path != NULL)
-		__os_freestr(reginfo.path);
-	return (ret);
-}
-
-/*
- * memp_register --
- *	Register a file type's pgin, pgout routines.
- */
-int
-memp_register(dbmp, ftype, pgin, pgout)
-	DB_MPOOL *dbmp;
-	int ftype;
-	int (*pgin) __P((db_pgno_t, void *, DBT *));
-	int (*pgout) __P((db_pgno_t, void *, DBT *));
-{
-	DB_MPREG *mpr;
-	int ret;
-
-	MP_PANIC_CHECK(dbmp);
-
-	if ((ret = __os_malloc(sizeof(DB_MPREG), NULL, &mpr)) != 0)
-		return (ret);
-
-	mpr->ftype = ftype;
-	mpr->pgin = pgin;
-	mpr->pgout = pgout;
-
-	/*
-	 * Insert at the head.  Because we do a linear walk, we'll find
-	 * the most recent registry in the case of multiple entries, so
-	 * we don't have to check for multiple registries.
-	 */
-	LOCKHANDLE(dbmp, dbmp->mutexp);
-	LIST_INSERT_HEAD(&dbmp->dbregq, mpr, q);
-	UNLOCKHANDLE(dbmp, dbmp->mutexp);
-
-	return (0);
-}
diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c
deleted file mode 100644
index 84c782e781..0000000000
--- a/db2/mp/mp_pr.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- *	Sleepycat Software.  All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_pr.c	10.30 (Sleepycat) 10/1/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#endif
-
-#include "db_int.h"
-#include "db_page.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "db_auto.h"
-#include "db_ext.h"
-#include "common_ext.h"
-
-static void __memp_pbh __P((DB_MPOOL *, BH *, size_t *, FILE *));
-
-/*
- * memp_stat --
- *	Display MPOOL statistics.
- */
-int
-memp_stat(dbmp, gspp, fspp, db_malloc)
-	DB_MPOOL *dbmp;
-	DB_MPOOL_STAT **gspp;
-	DB_MPOOL_FSTAT ***fspp;
-	void *(*db_malloc) __P((size_t));
-{
-	DB_MPOOL_FSTAT **tfsp;
-	MPOOLFILE *mfp;
-	size_t len, nlen;
-	int ret;
-	char *name;
-
-	MP_PANIC_CHECK(dbmp);
-
-	/* Allocate space for the global statistics. */
-	if (gspp != NULL) {
-		*gspp = NULL;
-
-		if ((ret = __os_malloc(sizeof(**gspp), db_malloc, gspp)) != 0)
-			return (ret);
-
-		LOCKREGION(dbmp);
-
-		/* Copy out the global statistics. */
-		**gspp = dbmp->mp->stat;
-		(*gspp)->st_hash_buckets = dbmp->mp->htab_buckets;
-		(*gspp)->st_region_wait =
-		    dbmp->mp->rlayout.lock.mutex_set_wait;
-		(*gspp)->st_region_nowait =
-		    dbmp->mp->rlayout.lock.mutex_set_nowait;
-		(*gspp)->st_refcnt = dbmp->mp->rlayout.refcnt;
-		(*gspp)->st_regsize = dbmp->mp->rlayout.size;
-
-		UNLOCKREGION(dbmp);
-	}
-
-	if (fspp != NULL) {
-		*fspp = NULL;
-
-		LOCKREGION(dbmp);
-
-		/* Count the MPOOLFILE structures. */
-		for (len = 0,
-		    mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
-		    mfp != NULL;
-		    ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
-			;
-
-		UNLOCKREGION(dbmp);
-
-		if (len == 0)
-			return (0);
-
-		/* Allocate space for the pointers. */
-		len = (len + 1) * sizeof(DB_MPOOL_FSTAT *);
-		if ((ret = __os_malloc(len, db_malloc, fspp)) != 0)
-			return (ret);
-
-		LOCKREGION(dbmp);
-
-		/* Build each individual entry. */
-		for (tfsp = *fspp,
-		    mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
-		    mfp != NULL;
-		    ++tfsp, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
-			name = __memp_fns(dbmp, mfp);
-			nlen = strlen(name);
-			len = sizeof(DB_MPOOL_FSTAT) + nlen + 1;
-			if ((ret = __os_malloc(len, db_malloc, tfsp)) != 0)
-				return (ret);
-			**tfsp = mfp->stat;
-			(*tfsp)->file_name = (char *)
-			    (u_int8_t *)*tfsp + sizeof(DB_MPOOL_FSTAT);
-			memcpy((*tfsp)->file_name, name, nlen + 1);
-		}
-		*tfsp = NULL;
-
-		UNLOCKREGION(dbmp);
-	}
-	return (0);
-}
-
-/*
- * __memp_fn --
- *	On errors we print whatever is available as the file name.
- *
- * PUBLIC: char * __memp_fn __P((DB_MPOOLFILE *));
- */
-char *
-__memp_fn(dbmfp)
-	DB_MPOOLFILE *dbmfp;
-{
-	return (__memp_fns(dbmfp->dbmp, dbmfp->mfp));
-}
-
-/*
- * __memp_fns --
- *	On errors we print whatever is available as the file name.
- *
- * PUBLIC: char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *));
- *
- */
-char *
-__memp_fns(dbmp, mfp)
-	DB_MPOOL *dbmp;
-	MPOOLFILE *mfp;
-{
-	if (mfp->path_off == 0)
-		return ((char *)"temporary");
-
-	return ((char *)R_ADDR(dbmp, mfp->path_off));
-}
-
-#define	FMAP_ENTRIES	200			/* Files we map. */
-
-#define	MPOOL_DUMP_HASH	0x01			/* Debug hash chains. */
-#define	MPOOL_DUMP_LRU	0x02			/* Debug LRU chains. */
-#define	MPOOL_DUMP_MEM	0x04			/* Debug region memory. */
-#define	MPOOL_DUMP_ALL	0x07			/* Debug all. */
-
-
-/*
- * __memp_dump_region --
- *	Display MPOOL structures.
- *
- * PUBLIC: void __memp_dump_region __P((DB_MPOOL *, char *, FILE *));
- */
-void
-__memp_dump_region(dbmp, area, fp)
-	DB_MPOOL *dbmp;
-	char *area;
-	FILE *fp;
-{
-	BH *bhp;
-	DB_HASHTAB *htabp;
-	DB_MPOOLFILE *dbmfp;
-	MPOOL *mp;
-	MPOOLFILE *mfp;
-	size_t bucket, fmap[FMAP_ENTRIES + 1];
-	u_int32_t flags;
-	int cnt;
-
-	/* Make it easy to call from the debugger. */
-	if (fp == NULL)
-		fp = stderr;
-
-	for (flags = 0; *area != '\0'; ++area)
-		switch (*area) {
-		case 'A':
-			LF_SET(MPOOL_DUMP_ALL);
-			break;
-		case 'h':
-			LF_SET(MPOOL_DUMP_HASH);
-			break;
-		case 'l':
-			LF_SET(MPOOL_DUMP_LRU);
-			break;
-		case 'm':
-			LF_SET(MPOOL_DUMP_MEM);
-			break;
-		}
-
-	LOCKREGION(dbmp);
-
-	mp = dbmp->mp;
-
-	/* Display MPOOL structures. */
-	(void)fprintf(fp, "%s\nPool (region addr 0x%lx, alloc addr 0x%lx)\n",
-	    DB_LINE, (u_long)dbmp->reginfo.addr, (u_long)dbmp->addr);
-
-	/* Display the MPOOLFILE structures. */
-	cnt = 0;
-	for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
-	    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile), ++cnt) {
-		(void)fprintf(fp, "file #%d: %s: refs %lu, type %ld, %s\n",
-		    cnt + 1, __memp_fns(dbmp, mfp), (u_long)mfp->ref,
-		    (long)mfp->ftype,
-		    F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write");
-		    if (cnt < FMAP_ENTRIES)
-			fmap[cnt] = R_OFFSET(dbmp, mfp);
-	}
-
-	for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
-	    dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q), ++cnt) {
-		(void)fprintf(fp, "file #%d: %s: fd: %d: per-process, %s\n",
-		    cnt + 1, __memp_fn(dbmfp), dbmfp->fd,
-		    F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write");
-		    if (cnt < FMAP_ENTRIES)
-			fmap[cnt] = R_OFFSET(dbmp, mfp);
-	}
-	if (cnt < FMAP_ENTRIES)
-		fmap[cnt] = INVALID;
-	else
-		fmap[FMAP_ENTRIES] = INVALID;
-
-	/* Display the hash table list of BH's. */
-	if (LF_ISSET(MPOOL_DUMP_HASH)) {
-		(void)fprintf(fp,
-	    "%s\nBH hash table (%lu hash slots)\npageno, file, ref, address\n",
-		    DB_LINE, (u_long)mp->htab_buckets);
-		for (htabp = dbmp->htab,
-		    bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) {
-			if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL)
-				(void)fprintf(fp, "%lu:\n", (u_long)bucket);
-			for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
-			    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
-				__memp_pbh(dbmp, bhp, fmap, fp);
-		}
-	}
-
-	/* Display the LRU list of BH's. */
-	if (LF_ISSET(MPOOL_DUMP_LRU)) {
-		(void)fprintf(fp, "%s\nBH LRU list\n", DB_LINE);
-		(void)fprintf(fp, "pageno, file, ref, address\n");
-		for (bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh);
-		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
-			__memp_pbh(dbmp, bhp, fmap, fp);
-	}
-
-	if (LF_ISSET(MPOOL_DUMP_MEM))
-		__db_shalloc_dump(dbmp->addr, fp);
-
-	UNLOCKREGION(dbmp);
-
-	/* Flush in case we're debugging. */
-	(void)fflush(fp);
-}
-
-/*
- * __memp_pbh --
- *	Display a BH structure.
- */
-static void
-__memp_pbh(dbmp, bhp, fmap, fp)
-	DB_MPOOL *dbmp;
-	BH *bhp;
-	size_t *fmap;
-	FILE *fp;
-{
-	static const FN fn[] = {
-		{ BH_CALLPGIN,	"callpgin" },
-		{ BH_DIRTY,	"dirty" },
-		{ BH_DISCARD,	"discard" },
-		{ BH_LOCKED,	"locked" },
-		{ BH_TRASH,	"trash" },
-		{ BH_WRITE,	"write" },
-		{ 0 },
-	};
-	int i;
-
-	for (i = 0; i < FMAP_ENTRIES; ++i)
-		if (fmap[i] == INVALID || fmap[i] == bhp->mf_offset)
-			break;
-
-	if (fmap[i] == INVALID)
-		(void)fprintf(fp, "  %4lu, %lu, %2lu, %lu",
-		    (u_long)bhp->pgno, (u_long)bhp->mf_offset,
-		    (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp));
-	else
-		(void)fprintf(fp, "  %4lu,   #%d,  %2lu, %lu",
-		    (u_long)bhp->pgno, i + 1,
-		    (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp));
-
-	__db_prflags(bhp->flags, fn, fp);
-
-	(void)fprintf(fp, "\n");
-}
diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c
deleted file mode 100644
index b9c92f2e13..0000000000
--- a/db2/mp/mp_region.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- *	Sleepycat Software.  All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_region.c	10.35 (Sleepycat) 12/11/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <string.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-/*
- * __memp_reg_alloc --
- *	Allocate some space in the mpool region, with locking.
- *
- * PUBLIC: int __memp_reg_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
- */
-int
-__memp_reg_alloc(dbmp, len, offsetp, retp)
-	DB_MPOOL *dbmp;
-	size_t len, *offsetp;
-	void *retp;
-{
-	int ret;
-
-	LOCKREGION(dbmp);
-	ret = __memp_alloc(dbmp, len, offsetp, retp);
-	UNLOCKREGION(dbmp);
-	return (ret);
-}
-
-/*
- * __memp_alloc --
- *	Allocate some space in the mpool region.
- *
- * PUBLIC: int __memp_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
- */
-int
-__memp_alloc(dbmp, len, offsetp, retp)
-	DB_MPOOL *dbmp;
-	size_t len, *offsetp;
-	void *retp;
-{
-	BH *bhp, *nbhp;
-	MPOOL *mp;
-	MPOOLFILE *mfp;
-	size_t fsize, total;
-	int nomore, restart, ret, wrote;
-	void *p;
-
-	mp = dbmp->mp;
-
-	nomore = 0;
-alloc:	if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
-		if (offsetp != NULL)
-			*offsetp = R_OFFSET(dbmp, p);
-		*(void **)retp = p;
-		return (0);
-	}
-	if (nomore) {
-		__db_err(dbmp->dbenv,
-	    "Unable to allocate %lu bytes from mpool shared region: %s\n",
-		    (u_long)len, strerror(ret));
-		return (ret);
-	}
-
-	/* Look for a buffer on the free list that's the right size. */
-	for (bhp =
-	    SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
-		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
-
-		if (__db_shsizeof(bhp) == len) {
-			SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
-			if (offsetp != NULL)
-				*offsetp = R_OFFSET(dbmp, bhp);
-			*(void **)retp = bhp;
-			return (0);
-		}
-	}
-
-	/* Discard from the free list until we've freed enough memory. */
-	total = 0;
-	for (bhp =
-	    SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
-		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
-
-		SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
-		__db_shalloc_free(dbmp->addr, bhp);
-		--mp->stat.st_page_clean;
-
-		/*
-		 * Retry as soon as we've freed up sufficient space.  If we
-		 * will have to coalesce memory to satisfy the request, don't
-		 * try until it's likely (possible?) that we'll succeed.
-		 */
-		total += fsize = __db_shsizeof(bhp);
-		if (fsize >= len || total >= 3 * len)
-			goto alloc;
-	}
-
-retry:	/* Find a buffer we can flush; pure LRU. */
-	restart = total = 0;
-	for (bhp =
-	    SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
-		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
-
-		/* Ignore pinned or locked (I/O in progress) buffers. */
-		if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
-			continue;
-
-		/* Find the associated MPOOLFILE. */
-		mfp = R_ADDR(dbmp, bhp->mf_offset);
-
-		/*
-		 * Write the page if it's dirty.
-		 *
-		 * If we wrote the page, fall through and free the buffer.  We
-		 * don't have to rewalk the list to acquire the buffer because
-		 * it was never available for any other process to modify it.
-		 * If we didn't write the page, but we discarded and reacquired
-		 * the region lock, restart the buffer list walk.  If we neither
-		 * wrote the buffer nor discarded the region lock, continue down
-		 * the buffer list.
-		 */
-		if (F_ISSET(bhp, BH_DIRTY)) {
-			if ((ret = __memp_bhwrite(dbmp,
-			    mfp, bhp, &restart, &wrote)) != 0)
-				return (ret);
-
-			/*
-			 * It's possible that another process wants this buffer
-			 * and incremented the ref count while we were writing
-			 * it.
-			 */
-			if (bhp->ref != 0)
-				goto retry;
-
-			if (wrote)
-				++mp->stat.st_rw_evict;
-			else {
-				if (restart)
-					goto retry;
-				continue;
-			}
-		} else
-			++mp->stat.st_ro_evict;
-
-		/*
-		 * Check to see if the buffer is the size we're looking for.
-		 * If it is, simply reuse it.
-		 */
-		total += fsize = __db_shsizeof(bhp);
-		if (fsize == len) {
-			__memp_bhfree(dbmp, mfp, bhp, 0);
-
-			if (offsetp != NULL)
-				*offsetp = R_OFFSET(dbmp, bhp);
-			*(void **)retp = bhp;
-			return (0);
-		}
-
-		/* Free the buffer. */
-		__memp_bhfree(dbmp, mfp, bhp, 1);
-
-		/*
-		 * Retry as soon as we've freed up sufficient space.  If we
-		 * have to coalesce of memory to satisfy the request, don't
-		 * try until it's likely (possible?) that we'll succeed.
-		 */
-		if (fsize >= len || total >= 3 * len)
-			goto alloc;
-
-		/* Restart the walk if we discarded the region lock. */
-		if (restart)
-			goto retry;
-	}
-	nomore = 1;
-	goto alloc;
-}
-
-/*
- * __memp_ropen --
- *	Attach to, and optionally create, the mpool region.
- *
- * PUBLIC: int __memp_ropen
- * PUBLIC:    __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
- */
-int
-__memp_ropen(dbmp, path, cachesize, mode, is_private, flags)
-	DB_MPOOL *dbmp;
-	const char *path;
-	size_t cachesize;
-	int mode, is_private;
-	u_int32_t flags;
-{
-	MPOOL *mp;
-	size_t rlen;
-	int defcache, ret;
-
-	/*
-	 * Unlike other DB subsystems, mpool can't simply grow the region
-	 * because it returns pointers into the region to its clients.  To
-	 * "grow" the region, we'd have to allocate a new region and then
-	 * store a region number in the structures that reference regional
-	 * objects.  It's reasonable that we fail regardless, as clients
-	 * shouldn't have every page in the region pinned, so the only
-	 * "failure" mode should be a performance penalty because we don't
-	 * find a page in the cache that we'd like to have found.
-	 *
-	 * Up the user's cachesize by 25% to account for our overhead.
-	 */
-	defcache = 0;
-	if (cachesize < DB_CACHESIZE_MIN) {
-		if (cachesize == 0) {
-			defcache = 1;
-			cachesize = DB_CACHESIZE_DEF;
-		} else
-			cachesize = DB_CACHESIZE_MIN;
-	}
-	rlen = cachesize + cachesize / 4;
-
-	/*
-	 * Map in the region.
-	 *
-	 * If it's a private mpool, use malloc, it's a lot faster than
-	 * instantiating a region.
-	 */
-	dbmp->reginfo.dbenv = dbmp->dbenv;
-	dbmp->reginfo.appname = DB_APP_NONE;
-	if (path == NULL)
-		dbmp->reginfo.path = NULL;
-	else
-		if ((ret = __os_strdup(path, &dbmp->reginfo.path)) != 0)
-			return (ret);
-	dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE;
-	dbmp->reginfo.mode = mode;
-	dbmp->reginfo.size = rlen;
-	dbmp->reginfo.dbflags = flags;
-	dbmp->reginfo.flags = 0;
-	if (defcache)
-		F_SET(&dbmp->reginfo, REGION_SIZEDEF);
-
-	/*
-	 * If we're creating a temporary region, don't use any standard
-	 * naming.
-	 */
-	if (is_private) {
-		dbmp->reginfo.appname = DB_APP_TMP;
-		dbmp->reginfo.file = NULL;
-		F_SET(&dbmp->reginfo, REGION_PRIVATE);
-	}
-
-	if ((ret = __db_rattach(&dbmp->reginfo)) != 0) {
-		if (dbmp->reginfo.path != NULL)
-			__os_freestr(dbmp->reginfo.path);
-		return (ret);
-	}
-
-	/*
-	 * The MPOOL structure is first in the region, the rest of the region
-	 * is free space.
-	 */
-	dbmp->mp = dbmp->reginfo.addr;
-	dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL);
-
-	/* Initialize a created region. */
-	if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) {
-		mp = dbmp->mp;
-		SH_TAILQ_INIT(&mp->bhq);
-		SH_TAILQ_INIT(&mp->bhfq);
-		SH_TAILQ_INIT(&mp->mpfq);
-
-		__db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
-
-		/*
-		 * Assume we want to keep the hash chains with under 10 pages
-		 * on each chain.  We don't know the pagesize in advance, and
-		 * it may differ for different files.  Use a pagesize of 1K for
-		 * the calculation -- we walk these chains a lot, they should
-		 * be short.
-		 */
-		mp->htab_buckets =
-		    __db_tablesize((cachesize / (1 * 1024)) / 10);
-
-		/* Allocate hash table space and initialize it. */
-		if ((ret = __db_shalloc(dbmp->addr,
-		    mp->htab_buckets * sizeof(DB_HASHTAB),
-		    0, &dbmp->htab)) != 0)
-			goto err;
-		__db_hashinit(dbmp->htab, mp->htab_buckets);
-		mp->htab = R_OFFSET(dbmp, dbmp->htab);
-
-		ZERO_LSN(mp->lsn);
-		mp->lsn_cnt = 0;
-
-		memset(&mp->stat, 0, sizeof(mp->stat));
-		mp->stat.st_cachesize = cachesize;
-
-		mp->flags = 0;
-	}
-
-	/* Get the local hash table address. */
-	dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
-
-	UNLOCKREGION(dbmp);
-	return (0);
-
-err:	UNLOCKREGION(dbmp);
-	(void)__db_rdetach(&dbmp->reginfo);
-	if (F_ISSET(&dbmp->reginfo, REGION_CREATED))
-		(void)memp_unlink(path, 1, dbmp->dbenv);
-
-	if (dbmp->reginfo.path != NULL)
-		__os_freestr(dbmp->reginfo.path);
-	return (ret);
-}
diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c
deleted file mode 100644
index 535348517c..0000000000
--- a/db2/mp/mp_sync.c
+++ /dev/null
@@ -1,549 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- *	Sleepycat Software.  All rights reserved.
- */
-#include "config.h"
-
-#ifndef lint
-static const char sccsid[] = "@(#)mp_sync.c	10.31 (Sleepycat) 12/11/98";
-#endif /* not lint */
-
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <errno.h>
-#include <stdlib.h>
-#endif
-
-#include "db_int.h"
-#include "shqueue.h"
-#include "db_shash.h"
-#include "mp.h"
-#include "common_ext.h"
-
-static int __bhcmp __P((const void *, const void *));
-static int __memp_fsync __P((DB_MPOOLFILE *));
-
-/*
- * memp_sync --
- *	Mpool sync function.
- */
-int
-memp_sync(dbmp, lsnp)
-	DB_MPOOL *dbmp;
-	DB_LSN *lsnp;
-{
-	BH *bhp, **bharray;
-	DB_ENV *dbenv;
-	MPOOL *mp;
-	MPOOLFILE *mfp;
-	int ar_cnt, nalloc, next, maxpin, ret, wrote;
-
-	MP_PANIC_CHECK(dbmp);
-
-	dbenv = dbmp->dbenv;
-	mp = dbmp->mp;
-
-	if (dbenv->lg_info == NULL) {
-		__db_err(dbenv, "memp_sync: requires logging");
-		return (EINVAL);
-	}
-
-	/*
-	 * We try and write the buffers in page order: it should reduce seeks
-	 * by the underlying filesystem and possibly reduce the actual number
-	 * of writes.  We don't want to hold the region lock while we write
-	 * the buffers, so only hold it lock while we create a list.  Get a
-	 * good-size block of memory to hold buffer pointers, we don't want
-	 * to run out.
-	 */
-	LOCKREGION(dbmp);
-	nalloc = mp->stat.st_page_dirty + mp->stat.st_page_dirty / 2 + 10;
-	UNLOCKREGION(dbmp);
-
-	if ((ret = __os_malloc(nalloc * sizeof(BH *), NULL, &bharray)) != 0)
-		return (ret);
-
-	LOCKREGION(dbmp);
-
-	/*
-	 * If the application is asking about a previous call to memp_sync(),
-	 * and we haven't found any buffers that the application holding the
-	 * pin couldn't write, return yes or no based on the current count.
-	 * Note, if the application is asking about a LSN *smaller* than one
-	 * we've already handled or are currently handling, then we return a
-	 * result based on the count for the larger LSN.
-	 */
-	if (!F_ISSET(mp, MP_LSN_RETRY) && log_compare(lsnp, &mp->lsn) <= 0) {
-		if (mp->lsn_cnt == 0) {
-			*lsnp = mp->lsn;
-			ret = 0;
-		} else
-			ret = DB_INCOMPLETE;
-		goto done;
-	}
-
-	/* Else, it's a new checkpoint. */
-	F_CLR(mp, MP_LSN_RETRY);
-
-	/*
-	 * Save the LSN.  We know that it's a new LSN or larger than the one
-	 * for which we were already doing a checkpoint.  (BTW, I don't expect
-	 * to see multiple LSN's from the same or multiple processes, but You
-	 * Just Never Know.  Responding as if they all called with the largest
-	 * of the LSNs specified makes everything work.)
-	 *
-	 * We don't currently use the LSN we save.  We could potentially save
-	 * the last-written LSN in each buffer header and use it to determine
-	 * what buffers need to be written.  The problem with this is that it's
-	 * sizeof(LSN) more bytes of buffer header.  We currently write all the
-	 * dirty buffers instead.
-	 *
-	 * Walk the list of shared memory segments clearing the count of
-	 * buffers waiting to be written.
-	 */
-	mp->lsn = *lsnp;
-	mp->lsn_cnt = 0;
-	for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
-	    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
-		mfp->lsn_cnt = 0;
-
-	/*
-	 * Walk the list of buffers and mark all dirty buffers to be written
-	 * and all pinned buffers to be potentially written (we can't know if
-	 * we'll need to write them until the holding process returns them to
-	 * the cache).  We do this in one pass while holding the region locked
-	 * so that processes can't make new buffers dirty, causing us to never
-	 * finish.  Since the application may have restarted the sync, clear
-	 * any BH_WRITE flags that appear to be left over from previous calls.
-	 *
-	 * We don't want to pin down the entire buffer cache, otherwise we'll
-	 * starve threads needing new pages.  Don't pin down more than 80% of
-	 * the cache.
-	 *
-	 * Keep a count of the total number of buffers we need to write in
-	 * MPOOL->lsn_cnt, and for each file, in MPOOLFILE->lsn_count.
-	 */
-	ar_cnt = 0;
-	maxpin = ((mp->stat.st_page_dirty + mp->stat.st_page_clean) * 8) / 10;
-	for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
-	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
-		if (F_ISSET(bhp, BH_DIRTY) || bhp->ref != 0) {
-			F_SET(bhp, BH_WRITE);
-
-			++mp->lsn_cnt;
-
-			mfp = R_ADDR(dbmp, bhp->mf_offset);
-			++mfp->lsn_cnt;
-
-			/*
-			 * If the buffer isn't in use, we should be able to
-			 * write it immediately, so increment the reference
-			 * count to lock it and its contents down, and then
-			 * save a reference to it.
-			 *
-			 * If we've run out space to store buffer references,
-			 * we're screwed.  We don't want to realloc the array
-			 * while holding a region lock, so we set the flag to
-			 * force the checkpoint to be done again, from scratch,
-			 * later.
-			 *
-			 * If we've pinned down too much of the cache stop, and
-			 * set a flag to force the checkpoint to be tried again
-			 * later.
-			 */
-			if (bhp->ref == 0) {
-				++bhp->ref;
-				bharray[ar_cnt] = bhp;
-				if (++ar_cnt >= nalloc || ar_cnt >= maxpin) {
-					F_SET(mp, MP_LSN_RETRY);
-					break;
-				}
-			}
-		} else
-			if (F_ISSET(bhp, BH_WRITE))
-				F_CLR(bhp, BH_WRITE);
-
-	/* If there no buffers we can write immediately, we're done. */
-	if (ar_cnt == 0) {
-		ret = mp->lsn_cnt ? DB_INCOMPLETE : 0;
-		goto done;
-	}
-
-	UNLOCKREGION(dbmp);
-
-	/* Sort the buffers we're going to write. */
-	qsort(bharray, ar_cnt, sizeof(BH *), __bhcmp);
-
-	LOCKREGION(dbmp);
-
-	/* Walk the array, writing buffers. */
-	for (next = 0; next < ar_cnt; ++next) {
-		/*
-		 * It's possible for a thread to have gotten the buffer since
-		 * we listed it for writing.  If the reference count is still
-		 * 1, we're the only ones using the buffer, go ahead and write.
-		 * If it's >1, then skip the buffer and assume that it will be
-		 * written when it's returned to the cache.
-		 */
-		if (bharray[next]->ref > 1) {
-			--bharray[next]->ref;
-			continue;
-		}
-
-		/* Write the buffer. */
-		mfp = R_ADDR(dbmp, bharray[next]->mf_offset);
-		ret = __memp_bhwrite(dbmp, mfp, bharray[next], NULL, &wrote);
-
-		/* Release the buffer. */
-		--bharray[next]->ref;
-
-		/* If there's an error, release the rest of the buffers. */
-		if (ret != 0 || !wrote) {
-			/*
-			 * Any process syncing the shared memory buffer pool
-			 * had better be able to write to any underlying file.
-			 * Be understanding, but firm, on this point.
-			 */
-			if (ret == 0) {
-				__db_err(dbenv, "%s: unable to flush page: %lu",
-				    __memp_fns(dbmp, mfp),
-				    (u_long)bharray[next]->pgno);
-				ret = EPERM;
-			}
-
-			while (++next < ar_cnt)
-				--bharray[next]->ref;
-			goto err;
-		}
-	}
-	ret = mp->lsn_cnt != 0 ||
-	    F_ISSET(mp, MP_LSN_RETRY) ? DB_INCOMPLETE : 0;
-
-done:
-	if (0) {
-err:		/*
-		 * On error, clear:
-		 *	MPOOL->lsn_cnt (the total sync count)
-		 *	MPOOLFILE->lsn_cnt (the per-file sync count)
-		 *	BH_WRITE flag (the scheduled for writing flag)
-		 */
-		mp->lsn_cnt = 0;
-		for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
-		    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
-			mfp->lsn_cnt = 0;
-		for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
-		    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
-			F_CLR(bhp, BH_WRITE);
-	}
-	UNLOCKREGION(dbmp);
-	__os_free(bharray, nalloc * sizeof(BH *));
-	return (ret);
-}
-
-/*
- * memp_fsync --
- *	Mpool file sync function.
- */
-int
-memp_fsync(dbmfp)
-	DB_MPOOLFILE *dbmfp;
-{
-	DB_MPOOL *dbmp;
-	int is_tmp;
-
-	dbmp = dbmfp->dbmp;
-
-	MP_PANIC_CHECK(dbmp);
-
-	/*
-	 * If this handle doesn't have a file descriptor that's open for
-	 * writing, or if the file is a temporary, there's no reason to
-	 * proceed further.
-	 */
-	if (F_ISSET(dbmfp, MP_READONLY))
-		return (0);
-
-	LOCKREGION(dbmp);
-	is_tmp = F_ISSET(dbmfp->mfp, MP_TEMP);
-	UNLOCKREGION(dbmp);
-	if (is_tmp)
-		return (0);
-
-	return (__memp_fsync(dbmfp));
-}
-
-/*
- * __mp_xxx_fd --
- *	Return a file descriptor for DB 1.85 compatibility locking.
- *
- * PUBLIC: int __mp_xxx_fd __P((DB_MPOOLFILE *, int *));
- */
-int
-__mp_xxx_fd(dbmfp, fdp)
-	DB_MPOOLFILE *dbmfp;
-	int *fdp;
-{
-	int ret;
-
-	/*
-	 * This is a truly spectacular layering violation, intended ONLY to
-	 * support compatibility for the DB 1.85 DB->fd call.
-	 *
-	 * Sync the database file to disk, creating the file as necessary.
-	 *
-	 * We skip the MP_READONLY and MP_TEMP tests done by memp_fsync(3).
-	 * The MP_READONLY test isn't interesting because we will either
-	 * already have a file descriptor (we opened the database file for
-	 * reading) or we aren't readonly (we created the database which
-	 * requires write privileges).  The MP_TEMP test isn't interesting
-	 * because we want to write to the backing file regardless so that
-	 * we get a file descriptor to return.
-	 */
-	ret = dbmfp->fd == -1 ? __memp_fsync(dbmfp) : 0;
-
-	return ((*fdp = dbmfp->fd) == -1 ? ENOENT : ret);
-}
-
-/*
- * __memp_fsync --
- *	Mpool file internal sync function.
- */
-static int
-__memp_fsync(dbmfp)
-	DB_MPOOLFILE *dbmfp;
-{
-	BH *bhp, **bharray;
-	DB_MPOOL *dbmp;
-	MPOOL *mp;
-	size_t mf_offset;
-	int ar_cnt, incomplete, nalloc, next, ret, wrote;
-
-	ret = 0;
-	dbmp = dbmfp->dbmp;
-	mp = dbmp->mp;
-	mf_offset = R_OFFSET(dbmp, dbmfp->mfp);
-
-	/*
-	 * We try and write the buffers in page order: it should reduce seeks
-	 * by the underlying filesystem and possibly reduce the actual number
-	 * of writes.  We don't want to hold the region lock while we write
-	 * the buffers, so only hold it lock while we create a list.  Get a
-	 * good-size block of memory to hold buffer pointers, we don't want
-	 * to run out.
-	 */
-	LOCKREGION(dbmp);
-	nalloc = mp->stat.st_page_dirty + mp->stat.st_page_dirty / 2 + 10;
-	UNLOCKREGION(dbmp);
-
-	if ((ret = __os_malloc(nalloc * sizeof(BH *), NULL, &bharray)) != 0)
-		return (ret);
-
-	LOCKREGION(dbmp);
-
-	/*
-	 * Walk the LRU list of buffer headers, and get a list of buffers to
-	 * write for this MPOOLFILE.
-	 */
-	ar_cnt = incomplete = 0;
-	for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
-	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) {
-		if (!F_ISSET(bhp, BH_DIRTY) || bhp->mf_offset != mf_offset)
-			continue;
-		if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED)) {
-			incomplete = 1;
-			continue;
-		}
-
-		++bhp->ref;
-		bharray[ar_cnt] = bhp;
-
-		/*
-		 * If we've run out space to store buffer references, we're
-		 * screwed, as we don't want to realloc the array holding a
-		 * region lock.  Set the incomplete flag -- the only way we
-		 * can get here is if the file is active in the buffer cache,
-		 * which is the same thing as finding pinned buffers.
-		 */
-		if (++ar_cnt >= nalloc) {
-			incomplete = 1;
-			break;
-		}
-	}
-
-	UNLOCKREGION(dbmp);
-
-	/* Sort the buffers we're going to write. */
-	if (ar_cnt != 0)
-		qsort(bharray, ar_cnt, sizeof(BH *), __bhcmp);
-
-	LOCKREGION(dbmp);
-
-	/* Walk the array, writing buffers. */
-	for (next = 0; next < ar_cnt; ++next) {
-		/*
-		 * It's possible for a thread to have gotten the buffer since
-		 * we listed it for writing.  If the reference count is still
-		 * 1, we're the only ones using the buffer, go ahead and write.
-		 * If it's >1, then skip the buffer.
-		 */
-		if (bharray[next]->ref > 1) {
-			incomplete = 1;
-
-			--bharray[next]->ref;
-			continue;
-		}
-
-		/* Write the buffer. */
-		ret = __memp_pgwrite(dbmfp, bharray[next], NULL, &wrote);
-
-		/* Release the buffer. */
-		--bharray[next]->ref;
-
-		/* If there's an error, release the rest of the buffers. */
-		if (ret != 0) {
-			while (++next < ar_cnt)
-				--bharray[next]->ref;
-			goto err;
-		}
-
-		/*
-		 * If we didn't write the buffer for some reason, don't return
-		 * success.
-		 */
-		if (!wrote)
-			incomplete = 1;
-	}
-
-err:	UNLOCKREGION(dbmp);
-
-	__os_free(bharray, nalloc * sizeof(BH *));
-
-	/*
-	 * Sync the underlying file as the last thing we do, so that the OS
-	 * has maximal opportunity to flush buffers before we request it.
-	 *
-	 * XXX:
-	 * Don't lock the region around the sync, fsync(2) has no atomicity
-	 * issues.
-	 */
-	if (ret == 0)
-		return (incomplete ? DB_INCOMPLETE : __os_fsync(dbmfp->fd));
-	return (ret);
-}
-
-/*
- * memp_trickle --
- *	Keep a specified percentage of the buffers clean.
- */
-int
-memp_trickle(dbmp, pct, nwrotep)
-	DB_MPOOL *dbmp;
-	int pct, *nwrotep;
-{
-	BH *bhp;
-	MPOOL *mp;
-	MPOOLFILE *mfp;
-	u_long total;
-	int ret, wrote;
-
-	MP_PANIC_CHECK(dbmp);
-
-	mp = dbmp->mp;
-	if (nwrotep != NULL)
-		*nwrotep = 0;
-
-	if (pct < 1 || pct > 100)
-		return (EINVAL);
-
-	LOCKREGION(dbmp);
-
-	/*
-	 * If there are sufficient clean buffers, or no buffers or no dirty
-	 * buffers, we're done.
-	 *
-	 * XXX
-	 * Using st_page_clean and st_page_dirty is our only choice at the
-	 * moment, but it's not as correct as we might like in the presence
-	 * of pools with more than one buffer size, as a free 512-byte buffer
-	 * isn't the same as a free 8K buffer.
-	 */
-loop:	total = mp->stat.st_page_clean + mp->stat.st_page_dirty;
-	if (total == 0 || mp->stat.st_page_dirty == 0 ||
-	    (mp->stat.st_page_clean * 100) / total >= (u_long)pct) {
-		UNLOCKREGION(dbmp);
-		return (0);
-	}
-
-	/* Loop until we write a buffer. */
-	for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
-	    bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) {
-		if (bhp->ref != 0 ||
-		    !F_ISSET(bhp, BH_DIRTY) || F_ISSET(bhp, BH_LOCKED))
-			continue;
-
-		mfp = R_ADDR(dbmp, bhp->mf_offset);
-
-		/*
-		 * We can't write to temporary files -- see the comment in
-		 * mp_bh.c:__memp_bhwrite().
-		 */
-		if (F_ISSET(mfp, MP_TEMP))
-			continue;
-
-		if ((ret = __memp_bhwrite(dbmp, mfp, bhp, NULL, &wrote)) != 0)
-			goto err;
-
-		/*
-		 * Any process syncing the shared memory buffer pool had better
-		 * be able to write to any underlying file.  Be understanding,
-		 * but firm, on this point.
-		 */
-		if (!wrote) {
-			__db_err(dbmp->dbenv, "%s: unable to flush page: %lu",
-			    __memp_fns(dbmp, mfp), (u_long)bhp->pgno);
-			ret = EPERM;
-			goto err;
-		}
-
-		++mp->stat.st_page_trickle;
-		if (nwrotep != NULL)
-			++*nwrotep;
-		goto loop;
-	}
-
-	/* No more buffers to write. */
-	ret = 0;
-
-err:	UNLOCKREGION(dbmp);
-	return (ret);
-}
-
-static int
-__bhcmp(p1, p2)
-	const void *p1, *p2;
-{
-	BH *bhp1, *bhp2;
-
-	bhp1 = *(BH * const *)p1;
-	bhp2 = *(BH * const *)p2;
-
-	/* Sort by file (shared memory pool offset). */
-	if (bhp1->mf_offset < bhp2->mf_offset)
-		return (-1);
-	if (bhp1->mf_offset > bhp2->mf_offset)
-		return (1);
-
-	/*
-	 * !!!
-	 * Defend against badly written quicksort code calling the comparison
-	 * function with two identical pointers (e.g., WATCOM C++ (Power++)).
-	 */
-	if (bhp1->pgno < bhp2->pgno)
-		return (-1);
-	if (bhp1->pgno > bhp2->pgno)
-		return (1);
-	return (0);
-}