summary refs log tree commit diff
path: root/db2/common
diff options
context:
space:
mode:
Diffstat (limited to 'db2/common')
-rw-r--r--db2/common/db_appinit.c787
-rw-r--r--db2/common/db_apprec.c143
-rw-r--r--db2/common/db_byteorder.c56
-rw-r--r--db2/common/db_err.c548
-rw-r--r--db2/common/db_log2.c68
-rw-r--r--db2/common/db_region.c565
-rw-r--r--db2/common/db_salloc.c290
-rw-r--r--db2/common/db_shash.c90
8 files changed, 2547 insertions, 0 deletions
diff --git a/db2/common/db_appinit.c b/db2/common/db_appinit.c
new file mode 100644
index 0000000000..01891c66a7
--- /dev/null
+++ b/db2/common/db_appinit.c
@@ -0,0 +1,787 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_appinit.c	10.27 (Sleepycat) 8/23/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "btree.h"
+#include "hash.h"
+#include "log.h"
+#include "txn.h"
+#include "clib_ext.h"
+#include "common_ext.h"
+
+static int __db_home __P((DB_ENV *, const char *, int));
+static int __db_parse __P((DB_ENV *, char *));
+static int __db_tmp_dir __P((DB_ENV *, int));
+static int __db_tmp_open __P((DB_ENV *, char *, int *));
+
+/*
+ * db_version --
+ *	Return verision information.
+ */
+const char *
+db_version(majverp, minverp, patchp)
+	int *majverp, *minverp, *patchp;
+{
+	if (majverp != NULL)
+		*majverp = DB_VERSION_MAJOR;
+	if (minverp != NULL)
+		*minverp = DB_VERSION_MINOR;
+	if (patchp != NULL)
+		*patchp = DB_VERSION_PATCH;
+	return (DB_VERSION_STRING);
+}
+
+/*
+ * db_appinit --
+ *	Initialize the application environment.
+ */
+int
+db_appinit(db_home, db_config, dbenv, flags)
+	const char *db_home;
+	char * const *db_config;
+	DB_ENV *dbenv;
+	int flags;
+{
+	FILE *fp;
+	int i_lock, i_log, i_mpool, i_txn, ret;
+	char *lp, **p, buf[MAXPATHLEN * 2];
+
+	/* Validate arguments. */
+	if (dbenv == NULL)
+		return (EINVAL);
+#ifdef HAVE_SPINLOCKS
+#define	OKFLAGS								\
+   (DB_CREATE | DB_NOMMAP | DB_THREAD | DB_INIT_LOCK | DB_INIT_LOG |	\
+    DB_INIT_MPOOL | DB_INIT_TXN | DB_MPOOL_PRIVATE | DB_RECOVER |	\
+    DB_RECOVER_FATAL | DB_TXN_NOSYNC | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT)
+#else
+#define	OKFLAGS								\
+   (DB_CREATE | DB_NOMMAP | DB_INIT_LOCK | DB_INIT_LOG |		\
+    DB_INIT_MPOOL | DB_INIT_TXN | DB_MPOOL_PRIVATE | DB_RECOVER |	\
+    DB_RECOVER_FATAL | DB_TXN_NOSYNC | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT)
+#endif
+	if ((ret = __db_fchk(dbenv, "db_appinit", flags, OKFLAGS)) != 0)
+		return (ret);
+
+#define	RECOVERY_FLAGS (DB_CREATE | DB_INIT_TXN | DB_INIT_LOG)
+	if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) &&
+	    LF_ISSET(RECOVERY_FLAGS) != RECOVERY_FLAGS)
+		return (__db_ferr(dbenv, "db_appinit", 1));
+
+	fp = NULL;
+	i_lock = i_log = i_mpool = i_txn = 0;
+
+	/* Set the database home. */
+	if ((ret = __db_home(dbenv, db_home, flags)) != 0)
+		goto err;
+
+	/* Parse the config array. */
+	for (p = (char **)db_config; p != NULL && *p != NULL; ++p)
+		if ((ret = __db_parse(dbenv, *p)) != 0)
+			goto err;
+
+	/* Parse the config file. */
+	if (dbenv->db_home != NULL) {
+		(void)snprintf(buf,
+		    sizeof(buf), "%s/DB_CONFIG", dbenv->db_home);
+		if ((fp = fopen(buf, "r")) != NULL) {
+			while (fgets(buf, sizeof(buf), fp) != NULL) {
+				if ((lp = strchr(buf, '\n')) != NULL)
+					*lp = '\0';
+				if ((ret = __db_parse(dbenv, buf)) != 0)
+					goto err;
+			}
+			(void)fclose(fp);
+		}
+	}
+
+	/* Set up the tmp directory path. */
+	if (dbenv->db_tmp_dir == NULL &&
+	    (ret = __db_tmp_dir(dbenv, flags)) != 0)
+		goto err;
+
+	/* Indicate that the path names have been set. */
+	F_SET(dbenv, DB_APP_INIT);
+
+	/*
+	 * If we are doing recovery, remove all the regions.
+	 */
+	if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) {
+		/* Remove all the old shared memory regions.  */
+		if ((ret = log_unlink(NULL, 1 /* force */, dbenv)) != 0)
+			goto err;
+		if ((ret = memp_unlink(NULL, 1 /* force */, dbenv)) != 0)
+			goto err;
+		if ((ret = lock_unlink(NULL, 1 /* force */, dbenv)) != 0)
+			goto err;
+		if ((ret = txn_unlink(NULL, 1 /* force */, dbenv)) != 0)
+			goto err;
+	}
+
+	/* Transactions imply logging. */
+	if (LF_ISSET(DB_INIT_TXN))
+		LF_SET(DB_INIT_LOG);
+
+	/* Default permissions are 0660. */
+#undef	DB_DEFPERM
+#define	DB_DEFPERM	(S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)
+
+	/* Initialize the subsystems. */
+	if (LF_ISSET(DB_INIT_LOCK)) {
+		if ((ret = lock_open(NULL,
+		    LF_ISSET(DB_CREATE | DB_THREAD),
+		    DB_DEFPERM, dbenv, &dbenv->lk_info)) != 0)
+			goto err;
+		i_lock = 1;
+	}
+	if (LF_ISSET(DB_INIT_LOG)) {
+		if ((ret = log_open(NULL,
+		    LF_ISSET(DB_CREATE | DB_THREAD),
+		    DB_DEFPERM, dbenv, &dbenv->lg_info)) != 0)
+			goto err;
+		i_log = 1;
+	}
+	if (LF_ISSET(DB_INIT_MPOOL)) {
+		if ((ret = memp_open(NULL,
+	    LF_ISSET(DB_CREATE | DB_MPOOL_PRIVATE | DB_NOMMAP | DB_THREAD),
+		    DB_DEFPERM, dbenv, &dbenv->mp_info)) != 0)
+			goto err;
+		i_mpool = 1;
+	}
+	if (LF_ISSET(DB_INIT_TXN)) {
+		if ((ret = txn_open(NULL,
+		    LF_ISSET(DB_CREATE | DB_THREAD | DB_TXN_NOSYNC),
+		    DB_DEFPERM, dbenv, &dbenv->tx_info)) != 0)
+			goto err;
+		i_txn = 1;
+	}
+
+	/* Initialize recovery. */
+	if (LF_ISSET(DB_INIT_TXN)) {
+		if ((ret = __bam_init_recover(dbenv)) != 0)
+			goto err;
+		if ((ret = __db_init_recover(dbenv)) != 0)
+			goto err;
+		if ((ret = __ham_init_recover(dbenv)) != 0)
+			goto err;
+		if ((ret = __log_init_recover(dbenv)) != 0)
+			goto err;
+		if ((ret = __txn_init_recover(dbenv)) != 0)
+			goto err;
+	}
+
+	/* Now run recovery if necessary. */
+	if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) && (ret =
+	    __db_apprec(dbenv, LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL))) != 0)
+		goto err;
+
+	return (ret);
+
+err:	if (fp != NULL)
+		(void)fclose(fp);
+	if (i_lock)
+		(void)lock_close(dbenv->lk_info);
+	if (i_log)
+		(void)log_close(dbenv->lg_info);
+	if (i_mpool)
+		(void)memp_close(dbenv->mp_info);
+	if (i_txn)
+		(void)txn_close(dbenv->tx_info);
+
+	(void)db_appexit(dbenv);
+	return (ret);
+}
+
+/*
+ * db_appexit --
+ *	Close down the default application environment.
+ */
+int
+db_appexit(dbenv)
+	DB_ENV *dbenv;
+{
+	int ret, t_ret;
+	char **p;
+
+	ret = 0;
+
+	/* Close subsystems. */
+	if (dbenv->tx_info && (t_ret = txn_close(dbenv->tx_info)) != 0)
+		if (ret == 0)
+			ret = t_ret;
+	if (dbenv->mp_info && (t_ret = memp_close(dbenv->mp_info)) != 0)
+		if (ret == 0)
+			ret = t_ret;
+	if (dbenv->lg_info && (t_ret = log_close(dbenv->lg_info)) != 0)
+		if (ret == 0)
+			ret = t_ret;
+	if (dbenv->lk_info && (t_ret = lock_close(dbenv->lk_info)) != 0)
+		if (ret == 0)
+			ret = t_ret;
+
+	/* Free allocated memory. */
+	if (dbenv->db_home != NULL)
+		FREES(dbenv->db_home);
+	if ((p = dbenv->db_data_dir) != NULL) {
+		for (; *p != NULL; ++p)
+			FREES(*p);
+		FREE(dbenv->db_data_dir, dbenv->data_cnt * sizeof(char **));
+	}
+	if (dbenv->db_log_dir != NULL)
+		FREES(dbenv->db_log_dir);
+	if (dbenv->db_tmp_dir != NULL)
+		FREES(dbenv->db_tmp_dir);
+
+	return (ret);
+}
+
+#define	DB_ADDSTR(str) {						\
+	if ((str) != NULL) {						\
+		/* If leading slash, start over. */			\
+		if (__db_abspath(str)) {				\
+			p = start;					\
+			slash = 0;					\
+		}							\
+		/* Append to the current string. */			\
+		len = strlen(str);					\
+		if (slash)						\
+			*p++ = PATH_SEPARATOR[0];			\
+		memcpy(p, str, len);					\
+		p += len;						\
+		slash = strchr(PATH_SEPARATOR, p[-1]) == NULL;		\
+	}								\
+}
+
+/*
+ * __db_appname --
+ *	Given an optional DB environment, directory and file name and type
+ *	of call, build a path based on the db_appinit(3) rules, and return
+ *	it in allocated space.
+ *
+ * PUBLIC: int __db_appname __P((DB_ENV *,
+ * PUBLIC:    APPNAME, const char *, const char *, int *, char **));
+ */
+int
+__db_appname(dbenv, appname, dir, file, fdp, namep)
+	DB_ENV *dbenv;
+	APPNAME appname;
+	const char *dir, *file;
+	int *fdp;
+	char **namep;
+{
+	DB_ENV etmp;
+	size_t len;
+	int ret, slash, tmp_create, tmp_free;
+	const char *a, *b, *c;
+	int data_entry;
+	char *p, *start;
+
+	a = b = c = NULL;
+	data_entry = -1;
+	tmp_create = tmp_free = 0;
+
+	/*
+	 * We don't return a name when creating temporary files, just an fd.
+	 * Default to error now.
+	 */
+	if (fdp != NULL)
+		*fdp = -1;
+	if (namep != NULL)
+		*namep = NULL;
+
+	/*
+	 * Absolute path names are never modified.  If the file is an absolute
+	 * path, we're done.  If the directory is, simply append the file and
+	 * return.
+	 */
+	if (file != NULL && __db_abspath(file))
+		return ((*namep = (char *)strdup(file)) == NULL ? ENOMEM : 0);
+	if (dir != NULL && __db_abspath(dir)) {
+		a = dir;
+		goto done;
+	}
+
+	/*
+	 * DB_ENV  DIR	   APPNAME	   RESULT
+	 * -------------------------------------------
+	 * null	   null	   none		   <tmp>/file
+	 * null	   set	   none		   DIR/file
+	 * set	   null	   none		   DB_HOME/file
+	 * set	   set	   none		   DB_HOME/DIR/file
+	 *
+	 * DB_ENV  FILE	   APPNAME	   RESULT
+	 * -------------------------------------------
+	 * null	   null	   DB_APP_DATA	   <tmp>/<create>
+	 * null	   set	   DB_APP_DATA	   ./file
+	 * set	   null	   DB_APP_DATA	   <tmp>/<create>
+	 * set	   set	   DB_APP_DATA	   DB_HOME/DB_DATA_DIR/file
+	 *
+	 * DB_ENV  DIR	   APPNAME	   RESULT
+	 * -------------------------------------------
+	 * null	   null	   DB_APP_LOG	   <tmp>/file
+	 * null	   set	   DB_APP_LOG	   DIR/file
+	 * set	   null	   DB_APP_LOG	   DB_HOME/DB_LOG_DIR/file
+	 * set	   set	   DB_APP_LOG	   DB_HOME/DB_LOG_DIR/DIR/file
+	 *
+	 * DB_ENV	   APPNAME	   RESULT
+	 * -------------------------------------------
+	 * null		   DB_APP_TMP	   <tmp>/<create>
+	 * set		   DB_APP_TMP	   DB_HOME/DB_TMP_DIR/<create>
+	 */
+retry:	switch (appname) {
+	case DB_APP_NONE:
+		if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) {
+			if (dir == NULL)
+				goto tmp;
+			a = dir;
+		} else {
+			a = dbenv->db_home;
+			b = dir;
+		}
+		break;
+	case DB_APP_DATA:
+		if (dir != NULL) {
+			__db_err(dbenv,
+			    "DB_APP_DATA: illegal directory specification");
+			return (EINVAL);
+		}
+
+		if (file == NULL) {
+			tmp_create = 1;
+			goto tmp;
+		}
+		if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT))
+			a = PATH_DOT;
+		else {
+			a = dbenv->db_home;
+			if (dbenv->db_data_dir != NULL &&
+			    (b = dbenv->db_data_dir[++data_entry]) == NULL) {
+				data_entry = -1;
+				b = dbenv->db_data_dir[0];
+			}
+		}
+		break;
+	case DB_APP_LOG:
+		if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) {
+			if (dir == NULL)
+				goto tmp;
+			a = dir;
+		} else {
+			a = dbenv->db_home;
+			b = dbenv->db_log_dir;
+			c = dir;
+		}
+		break;
+	case DB_APP_TMP:
+		if (dir != NULL || file != NULL) {
+			__db_err(dbenv,
+		    "DB_APP_TMP: illegal directory or file specification");
+			return (EINVAL);
+		}
+
+		tmp_create = 1;
+		if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT))
+			goto tmp;
+		else {
+			a = dbenv->db_home;
+			b = dbenv->db_tmp_dir;
+		}
+		break;
+	}
+
+	/* Reference a file from the appropriate temporary directory. */
+	if (0) {
+tmp:		if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) {
+			memset(&etmp, 0, sizeof(etmp));
+			if ((ret = __db_tmp_dir(&etmp, DB_USE_ENVIRON)) != 0)
+				return (ret);
+			tmp_free = 1;
+			a = etmp.db_tmp_dir;
+		} else
+			a = dbenv->db_tmp_dir;
+	}
+
+done:	len =
+	    (a == NULL ? 0 : strlen(a) + 1) +
+	    (b == NULL ? 0 : strlen(b) + 1) +
+	    (c == NULL ? 0 : strlen(c) + 1) +
+	    (file == NULL ? 0 : strlen(file) + 1);
+
+	if ((start = (char *)malloc(len)) == NULL) {
+		__db_err(dbenv, "%s", strerror(ENOMEM));
+		if (tmp_free)
+			FREES(etmp.db_tmp_dir);
+		return (ENOMEM);
+	}
+
+	slash = 0;
+	p = start;
+	DB_ADDSTR(a);
+	DB_ADDSTR(b);
+	DB_ADDSTR(file);
+	*p = '\0';
+
+	/*
+	 * If we're opening a data file, see if it exists.  If it does,
+	 * return it, otherwise, try and find another one to open.
+	 */
+	if (data_entry != -1 && __db_exists(start, NULL) != 0) {
+		FREES(start);
+		a = b = c = NULL;
+		goto retry;
+	}
+
+	/* Discard any space allocated to find the temp directory. */
+	if (tmp_free)
+		FREES(etmp.db_tmp_dir);
+
+	/* Create the file if so requested. */
+	if (tmp_create) {
+		ret = __db_tmp_open(dbenv, start, fdp);
+		FREES(start);
+	} else {
+		*namep = start;
+		ret = 0;
+	}
+	return (ret);
+}
+
+/*
+ * __db_home --
+ *	Find the database home.
+ */
+static int
+__db_home(dbenv, db_home, flags)
+	DB_ENV *dbenv;
+	const char *db_home;
+	int flags;
+{
+	const char *p;
+
+	p = db_home;
+
+	/* Use the environment if it's permitted and initialized. */
+#ifdef HAVE_GETUID
+	if (LF_ISSET(DB_USE_ENVIRON) ||
+	    (LF_ISSET(DB_USE_ENVIRON_ROOT) && getuid() == 0)) {
+#else
+	if (LF_ISSET(DB_USE_ENVIRON)) {
+#endif
+		if ((p = getenv("DB_HOME")) == NULL)
+			p = db_home;
+		else if (p[0] == '\0') {
+			__db_err(dbenv,
+			    "illegal DB_HOME environment variable");
+			return (EINVAL);
+		}
+	}
+
+	if (p == NULL)
+		return (0);
+
+	if ((dbenv->db_home = (char *)strdup(p)) == NULL) {
+		__db_err(dbenv, "%s", strerror(ENOMEM));
+		return (ENOMEM);
+	}
+	return (0);
+}
+
+/*
+ * __db_parse --
+ *	Parse a single NAME VALUE pair.
+ */
+static int
+__db_parse(dbenv, s)
+	DB_ENV *dbenv;
+	char *s;
+{
+	int ret;
+	char *local_s, *name, *value, **p, *tp;
+
+	ret = 0;
+
+	/*
+	 * We need to strdup the argument in case the caller passed us
+	 * static data.
+	 */
+	if ((local_s = (char *)strdup(s)) == NULL)
+		return (ENOMEM);
+
+	tp = local_s;
+	while ((name = strsep(&tp, " \t")) != NULL && *name == '\0');
+	if (name == NULL)
+		goto illegal;
+	while ((value = strsep(&tp, " \t")) != NULL && *value == '\0');
+	if (value == NULL) {
+illegal:	ret = EINVAL;
+		__db_err(dbenv, "illegal name-value pair: %s", s);
+		goto err;
+	}
+
+#define	DATA_INIT_CNT	20			/* Start with 20 data slots. */
+	if (!strcmp(name, "DB_DATA_DIR")) {
+		if (dbenv->db_data_dir == NULL) {
+			if ((dbenv->db_data_dir = (char **)calloc(DATA_INIT_CNT,
+			    sizeof(char **))) == NULL)
+				goto nomem;
+			dbenv->data_cnt = DATA_INIT_CNT;
+		} else if (dbenv->data_next == dbenv->data_cnt - 1) {
+			dbenv->data_cnt *= 2;
+			if ((dbenv->db_data_dir =
+			    (char **)realloc(dbenv->db_data_dir,
+			    dbenv->data_cnt * sizeof(char **))) == NULL)
+				goto nomem;
+		}
+		p = &dbenv->db_data_dir[dbenv->data_next++];
+	} else if (!strcmp(name, "DB_LOG_DIR")) {
+		if (dbenv->db_log_dir != NULL)
+			FREES(dbenv->db_log_dir);
+		p = &dbenv->db_log_dir;
+	} else if (!strcmp(name, "DB_TMP_DIR")) {
+		if (dbenv->db_tmp_dir != NULL)
+			FREES(dbenv->db_tmp_dir);
+		p = &dbenv->db_tmp_dir;
+	} else
+		goto err;
+
+	if ((*p = (char *)strdup(value)) == NULL) {
+nomem:		ret = ENOMEM;
+		__db_err(dbenv, "%s", strerror(ENOMEM));
+	}
+
+err:	FREES(local_s);
+	return (ret);
+}
+
+#ifdef macintosh
+#include <TFileSpec.h>
+
+static char *sTempFolder;
+#endif
+
+/*
+ * tmp --
+ *	Set the temporary directory path.
+ */
+static int
+__db_tmp_dir(dbenv, flags)
+	DB_ENV *dbenv;
+	int flags;
+{
+	static const char * list[] = {	/* Ordered: see db_appinit(3). */
+		"/var/tmp",
+		"/usr/tmp",
+		"/temp",		/* WIN32. */
+		"/tmp",
+		"C:/temp",		/* WIN32. */
+		"C:/tmp",		/* WIN32. */
+		NULL
+	};
+	const char **lp, *p;
+
+	/* Use the environment if it's permitted and initialized. */
+	p = NULL;
+#ifdef HAVE_GETEUID
+	if (LF_ISSET(DB_USE_ENVIRON) ||
+	    (LF_ISSET(DB_USE_ENVIRON_ROOT) && getuid() == 0)) {
+#else
+	if (LF_ISSET(DB_USE_ENVIRON)) {
+#endif
+		if ((p = getenv("TMPDIR")) != NULL && p[0] == '\0') {
+			__db_err(dbenv, "illegal TMPDIR environment variable");
+			return (EINVAL);
+		}
+		/* WIN32 */
+		if (p == NULL && (p = getenv("TEMP")) != NULL && p[0] == '\0') {
+			__db_err(dbenv, "illegal TEMP environment variable");
+			return (EINVAL);
+		}
+		/* WIN32 */
+		if (p == NULL && (p = getenv("TMP")) != NULL && p[0] == '\0') {
+			__db_err(dbenv, "illegal TMP environment variable");
+			return (EINVAL);
+		}
+		/* Macintosh */
+		if (p == NULL &&
+		    (p = getenv("TempFolder")) != NULL && p[0] == '\0') {
+			__db_err(dbenv,
+			    "illegal TempFolder environment variable");
+			return (EINVAL);
+		}
+	}
+
+#ifdef macintosh
+	/* Get the path to the temporary folder. */
+	if (p == NULL) {
+		FSSpec spec;
+
+		if (!Special2FSSpec(kTemporaryFolderType,
+		    kOnSystemDisk, 0, &spec)) {
+			p = FSp2FullPath(&spec);
+			sTempFolder = malloc(strlen(p) + 1);
+			strcpy(sTempFolder, p);
+			p = sTempFolder;
+		}
+	}
+#endif
+
+	/* Step through the list looking for a possibility. */
+	if (p == NULL)
+		for (lp = list; *lp != NULL; ++lp)
+			if (__db_exists(p = *lp, NULL) == 0)
+				break;
+
+	if (p == NULL)
+		return (0);
+
+	if ((dbenv->db_tmp_dir = (char *)strdup(p)) == NULL) {
+		__db_err(dbenv, "%s", strerror(ENOMEM));
+		return (ENOMEM);
+	}
+	return (0);
+}
+
+/*
+ * __db_tmp_open --
+ *	Create a temporary file.
+ */
+static int
+__db_tmp_open(dbenv, dir, fdp)
+	DB_ENV *dbenv;
+	char *dir;
+	int *fdp;
+{
+#ifdef HAVE_SIGFILLSET
+	sigset_t set, oset;
+#endif
+	u_long pid;
+	size_t len;
+	int isdir, ret;
+	char *trv, buf[MAXPATHLEN];
+
+	/*
+	 * Check the target directory; if you have six X's and it doesn't
+	 * exist, this runs for a *very* long time.
+	 */
+	if ((ret = __db_exists(dir, &isdir)) != 0) {
+		__db_err(dbenv, "%s: %s", dir, strerror(ret));
+		return (ret);
+	}
+	if (!isdir) {
+		__db_err(dbenv, "%s: %s", dir, strerror(EINVAL));
+		return (EINVAL);
+	}
+
+	/* Build the path. */
+#define	DB_TRAIL	"/XXXXXX"
+	if ((len = strlen(dir)) + sizeof(DB_TRAIL) > sizeof(buf)) {
+		__db_err(dbenv,
+		    "tmp_open: %s: %s", buf, strerror(ENAMETOOLONG));
+		return (ENAMETOOLONG);
+	}
+	(void)strcpy(buf, dir);
+	(void)strcpy(buf + len, DB_TRAIL);
+	buf[len] = PATH_SEPARATOR[0];			/* WIN32 */
+
+	/*
+	 * Replace the X's with the process ID.  Pid should be a pid_t,
+	 * but we use unsigned long for portability.
+	 */
+	for (pid = getpid(),
+	    trv = buf + len + sizeof(DB_TRAIL) - 1; *--trv == 'X'; pid /= 10)
+		switch (pid % 10) {
+		case 0: *trv = '0'; break;
+		case 1: *trv = '1'; break;
+		case 2: *trv = '2'; break;
+		case 3: *trv = '3'; break;
+		case 4: *trv = '4'; break;
+		case 5: *trv = '5'; break;
+		case 6: *trv = '6'; break;
+		case 7: *trv = '7'; break;
+		case 8: *trv = '8'; break;
+		case 9: *trv = '9'; break;
+		}
+	++trv;
+
+	/*
+	 * Try and open a file.  We block every signal we can get our hands
+	 * on so that, if we're interrupted at the wrong time, the temporary
+	 * file isn't left around -- of course, if we drop core in-between
+	 * the calls we'll hang forever, but that's probably okay.  ;-}
+	 */
+#ifdef HAVE_SIGFILLSET
+	(void)sigfillset(&set);
+#endif
+	for (;;) {
+#ifdef HAVE_SIGFILLSET
+		(void)sigprocmask(SIG_BLOCK, &set, &oset);
+#endif
+#define	DB_TEMPOPEN	DB_CREATE | DB_EXCL | DB_TEMPORARY
+		if ((ret = __db_fdopen(buf,
+		    DB_TEMPOPEN, DB_TEMPOPEN, S_IRUSR | S_IWUSR, fdp)) == 0) {
+#ifdef HAVE_SIGFILLSET
+			(void)sigprocmask(SIG_SETMASK, &oset, NULL);
+#endif
+			return (0);
+		}
+#ifdef HAVE_SIGFILLSET
+		(void)sigprocmask(SIG_SETMASK, &oset, NULL);
+#endif
+		/*
+		 * XXX:
+		 * If we don't get an EEXIST error, then there's something
+		 * seriously wrong.  Unfortunately, if the implementation
+		 * doesn't return EEXIST for O_CREAT and O_EXCL regardless
+		 * of other possible errors, we've lost.
+		 */
+		if (ret != EEXIST) {
+			__db_err(dbenv,
+			    "tmp_open: %s: %s", buf, strerror(ret));
+			return (ret);
+		}
+
+		/*
+		 * Tricky little algorithm for backward compatibility.
+		 * Assumes the ASCII ordering of lower-case characters.
+		 */
+		for (;;) {
+			if (*trv == '\0')
+				return (EINVAL);
+			if (*trv == 'z')
+				*trv++ = 'a';
+			else {
+				if (isdigit(*trv))
+					*trv = 'a';
+				else
+					++*trv;
+				break;
+			}
+		}
+	}
+	/* NOTREACHED */
+}
diff --git a/db2/common/db_apprec.c b/db2/common/db_apprec.c
new file mode 100644
index 0000000000..b22b0c5e9a
--- /dev/null
+++ b/db2/common/db_apprec.c
@@ -0,0 +1,143 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1997\n\
+	Sleepycat Software Inc.  All rights reserved.\n";
+static const char sccsid[] = "@(#)db_apprec.c	10.15 (Sleepycat) 7/27/97";
+#endif
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <time.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "db_am.h"
+#include "log.h"
+#include "txn.h"
+#include "common_ext.h"
+
+/*
+ * __db_apprec --
+ *	Perform recovery.
+ *
+ * PUBLIC: int __db_apprec __P((DB_ENV *, int));
+ */
+int
+__db_apprec(dbenv, flags)
+	DB_ENV *dbenv;
+	int flags;
+{
+	DBT data;
+	DB_LOG *lp;
+	DB_LSN ckp_lsn, first_lsn, lsn, tmp_lsn;
+	time_t now;
+	int first_flag, ret, tret;
+	void *txninfo;
+
+	ZERO_LSN(ckp_lsn);
+
+	/* Initialize the transaction list. */
+	if ((ret = __db_txnlist_init(&txninfo)) != 0)
+		return (ret);
+
+	/*
+	 * Read forward through the log opening the appropriate files
+	 * so that we can call recovery routines.  In general, we start
+	 * at the last checkpoint prior to the last checkpointed LSN.
+	 * For catastrophic recovery, we begin at the first LSN that
+	 * appears in any log file (log figures this out for us when
+	 * we pass it the DB_FIRST flag).
+	 */
+	lp = dbenv->lg_info;
+	if (LF_ISSET(DB_RECOVER_FATAL))
+		first_flag = DB_FIRST;
+	else
+		first_flag = __log_findckp(lp, &lsn) != 0 ? DB_FIRST : DB_SET;
+
+	memset(&data, 0, sizeof(data));
+	if ((ret = log_get(lp, &lsn, &data, first_flag)) != 0) {
+		__db_err(dbenv, "Failure: unable to get log record");
+		if (first_flag == DB_SET)
+			__db_err(dbenv, "Retrieving LSN %lu %lu",
+			    (u_long)lsn.file, (u_long)lsn.offset);
+		else
+			__db_err(dbenv, "Retrieving first LSN");
+		goto err;
+	}
+
+	first_lsn = lsn;
+	for (; ret == 0;
+	    ret = log_get(dbenv->lg_info, &lsn, &data, DB_NEXT))
+		if ((tret = __db_dispatch(lp,
+		    &data, &lsn, TXN_OPENFILES, txninfo)) < 0) {
+			ret = tret;
+			goto msgerr;
+		}
+
+	for (ret = log_get(lp, &lsn, &data, DB_LAST);
+	    ret == 0 && log_compare(&lsn, &first_lsn) > 0;
+	    ret = log_get(lp,&lsn, &data, DB_PREV)) {
+		tmp_lsn = lsn;
+		tret =
+		    __db_dispatch(lp, &data, &lsn, TXN_BACKWARD_ROLL, txninfo);
+		if (IS_ZERO_LSN(ckp_lsn) && tret > 0)
+			ckp_lsn = tmp_lsn;
+		if (tret < 0) {
+			ret = tret;
+			goto msgerr;
+		}
+	}
+
+	for (ret = log_get(lp, &lsn, &data, DB_NEXT);
+	    ret == 0; ret = log_get(lp, &lsn, &data, DB_NEXT))
+		if ((tret = __db_dispatch(lp,
+		    &data, &lsn, TXN_FORWARD_ROLL, txninfo)) < 0) {
+			ret = tret;
+			goto msgerr;
+		}
+
+	/* Now close all the db files that are open. */
+	__log_close_files(lp);
+
+	/*
+	 * Now set the maximum transaction id, set the last checkpoint lsn,
+	 * and the current time.  Then take a checkpoint.
+	 */
+	(void)time(&now);
+
+	dbenv->tx_info->region->last_txnid = ((__db_txnhead *)txninfo)->maxid;
+	dbenv->tx_info->region->last_ckp = ckp_lsn;
+	dbenv->tx_info->region->time_ckp = (u_int32_t) now;
+	txn_checkpoint(dbenv->tx_info, 0, 0);
+
+	if (dbenv->db_verbose) {
+		__db_err(lp->dbenv, "Recovery complete at %s", ctime(&now));
+		__db_err(lp->dbenv, "%s %lu %s [%lu][%lu]",
+		    "Maximum transaction id",
+		    (u_long)dbenv->tx_info->region->last_txnid,
+		    "Recovery checkpoint",
+		    (u_long)dbenv->tx_info->region->last_ckp.file,
+		    (u_long)dbenv->tx_info->region->last_ckp.offset);
+	}
+
+	return (0);
+
+msgerr:	__db_err(dbenv, "Recovery function for LSN %lu %lu failed",
+	    (u_long)lsn.file, (u_long)lsn.offset);
+
+err:	return (ret);
+}
diff --git a/db2/common/db_byteorder.c b/db2/common/db_byteorder.c
new file mode 100644
index 0000000000..d49883e093
--- /dev/null
+++ b/db2/common/db_byteorder.c
@@ -0,0 +1,56 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_byteorder.c	10.3 (Sleepycat) 6/21/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#endif
+
+#include "db_int.h"
+#include "common_ext.h"
+
+/*
+ * __db_byteorder --
+ *	Return if we need to do byte swapping, checking for illegal
+ *	values.
+ *
+ * PUBLIC: int __db_byteorder __P((DB_ENV *, int));
+ */
+int
+__db_byteorder(dbenv, lorder)
+	DB_ENV *dbenv;
+	int lorder;
+{
+	switch (lorder) {
+	case 0:
+		break;
+	case 1234:
+#if defined(WORDS_BIGENDIAN)
+		return (DB_SWAPBYTES);
+#else
+		break;
+#endif
+	case 4321:
+#if defined(WORDS_BIGENDIAN)
+		break;
+#else
+		return (DB_SWAPBYTES);
+#endif
+	default:
+		__db_err(dbenv,
+		    "illegal byte order, only big and little-endian supported");
+		return (EINVAL);
+	}
+	return (0);
+}
diff --git a/db2/common/db_err.c b/db2/common/db_err.c
new file mode 100644
index 0000000000..3dc4ca007d
--- /dev/null
+++ b/db2/common/db_err.c
@@ -0,0 +1,548 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_err.c	10.16 (Sleepycat) 8/24/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+
+#ifdef __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+#endif
+
+#include "db_int.h"
+#include "common_ext.h"
+
+static int __db_rdonly __P((const DB_ENV *, const char *));
+
+/*
+ * __db_err --
+ *	Standard DB error routine.
+ *
+ * PUBLIC: #ifdef __STDC__
+ * PUBLIC: void __db_err __P((const DB_ENV *dbenv, const char *fmt, ...));
+ * PUBLIC: #else
+ * PUBLIC: void __db_err();
+ * PUBLIC: #endif
+ */
+void
+#ifdef __STDC__
+__db_err(const DB_ENV *dbenv, const char *fmt, ...)
+#else
+__db_err(dbenv, fmt, va_alist)
+	const DB_ENV *dbenv;
+	const char *fmt;
+	va_dcl
+#endif
+{
+	va_list ap;
+	char errbuf[2048];	/* XXX: END OF THE STACK DON'T TRUST SPRINTF. */
+
+	if (dbenv == NULL)
+		return;
+
+#ifdef __STDC__
+	va_start(ap, fmt);
+#else
+	va_start(ap);
+#endif
+	if (dbenv->db_errcall != NULL) {
+		(void)vsnprintf(errbuf, sizeof(errbuf), fmt, ap);
+		dbenv->db_errcall(dbenv->db_errpfx, errbuf);
+	}
+	if (dbenv->db_errfile != NULL) {
+		if (dbenv->db_errpfx != NULL)
+			(void)fprintf(dbenv->db_errfile, "%s: ",
+			    dbenv->db_errpfx);
+		(void)vfprintf(dbenv->db_errfile, fmt, ap);
+		(void)fprintf(dbenv->db_errfile, "\n");
+		(void)fflush(dbenv->db_errfile);
+	}
+	va_end(ap);
+}
+
+/*
+ * XXX
+ * Provide ANSI C prototypes for the panic functions.  Some compilers, (e.g.,
+ * MS VC 4.2) get upset if they aren't here, even though the K&R declaration
+ * appears before the assignment in the __db__panic() call.
+ */
+static int __db_ecursor __P((DB *, DB_TXN *, DBC **));
+static int __db_edel __P((DB *, DB_TXN *, DBT *, int));
+static int __db_efd __P((DB *, int *));
+static int __db_egp __P((DB *, DB_TXN *, DBT *, DBT *, int));
+static int __db_estat __P((DB *, void *, void *(*)(size_t), int));
+static int __db_esync __P((DB *, int));
+
+/*
+ * __db_ecursor --
+ *	After-panic cursor routine.
+ */
+static int
+__db_ecursor(a, b, c)
+	DB *a;
+	DB_TXN *b;
+	DBC **c;
+{
+	a = a; b = b; c = c;			/* XXX: Shut the compiler up. */
+
+	return (EPERM);
+}
+
+/*
+ * __db_edel --
+ *	After-panic delete routine.
+ */
+static int
+__db_edel(a, b, c, d)
+	DB *a;
+	DB_TXN *b;
+	DBT *c;
+	int d;
+{
+	a = a; b = b; c = c; d = d;		/* XXX: Shut the compiler up. */
+
+	return (EPERM);
+}
+
+/*
+ * __db_efd --
+ *	After-panic fd routine.
+ */
+static int
+__db_efd(a, b)
+	DB *a;
+	int *b;
+{
+	a = a; b = b;				/* XXX: Shut the compiler up. */
+
+	return (EPERM);
+}
+
+/*
+ * __db_egp --
+ *	After-panic get/put routine.
+ */
+static int
+__db_egp(a, b, c, d, e)
+	DB *a;
+	DB_TXN *b;
+	DBT *c, *d;
+	int e;
+{
+	a = a; b = b; c = c; d = d; e = e;	/* XXX: Shut the compiler up. */
+
+	return (EPERM);
+}
+
+/*
+ * __db_estat --
+ *	After-panic stat routine.
+ */
+static int
+__db_estat(a, b, c, d)
+	DB *a;
+	void *b;
+	void *(*c) __P((size_t));
+	int d;
+{
+	a = a; b = b; c = c; d = d;		/* XXX: Shut the compiler up. */
+
+	return (EPERM);
+}
+
+/*
+ * __db_esync --
+ *	After-panic sync routine.
+ */
+static int
+__db_esync(a, b)
+	DB *a;
+	int b;
+{
+	a = a; b = b;				/* XXX: Shut the compiler up. */
+
+	return (EPERM);
+}
+
+/*
+ * __db_panic --
+ *	Lock out the tree due to unrecoverable error.
+ *
+ * PUBLIC: int __db_panic __P((DB *));
+ */
+int
+__db_panic(dbp)
+	DB *dbp;
+{
+	/*
+	 * XXX
+	 * We should shut down all of the process's cursors, too.
+	 *
+	 * We should call mpool and have it shut down the file, so we get
+	 * other processes sharing this file as well.
+	 */
+	dbp->cursor = __db_ecursor;
+	dbp->del = __db_edel;
+	dbp->fd = __db_efd;
+	dbp->get = __db_egp;
+	dbp->put = __db_egp;
+	dbp->stat = __db_estat;
+	dbp->sync = __db_esync;
+
+	return (EPERM);
+}
+
+/* Check for invalid flags. */
+#undef	DB_CHECK_FLAGS
+#define	DB_CHECK_FLAGS(dbenv, name, flags, ok_flags)			\
+	if ((flags) & ~(ok_flags))					\
+		return (__db_ferr(dbenv, name, 0));
+/* Check for invalid flag combinations. */
+#undef	DB_CHECK_FCOMBO
+#define	DB_CHECK_FCOMBO(dbenv, name, flags, flag1, flag2)		\
+	if ((flags) & (flag1) && (flags) & (flag2))			\
+		return (__db_ferr(dbenv, name, 1));
+
+/*
+ * __db_fchk --
+ *	General flags checking routine.
+ *
+ * PUBLIC: int __db_fchk __P((DB_ENV *, char *, int, int));
+ */
+int
+__db_fchk(dbenv, name, flags, ok_flags)
+	DB_ENV *dbenv;
+	const char *name;
+	int flags, ok_flags;
+{
+	DB_CHECK_FLAGS(dbenv, name, flags, ok_flags);
+	return (0);
+}
+
+/*
+ * __db_fcchk --
+ *	General combination flags checking routine.
+ *
+ * PUBLIC: int __db_fcchk __P((DB_ENV *, char *, int, int, int));
+ */
+int
+__db_fcchk(dbenv, name, flags, flag1, flag2)
+	DB_ENV *dbenv;
+	const char *name;
+	int flags, flag1, flag2;
+{
+	DB_CHECK_FCOMBO(dbenv, name, flags, flag1, flag2);
+	return (0);
+}
+
+/*
+ * __db_cdelchk --
+ *	Common cursor delete argument checking routine.
+ *
+ * PUBLIC: int __db_cdelchk __P((const DB *, int, int, int));
+ */
+int
+__db_cdelchk(dbp, flags, isrdonly, isvalid)
+	const DB *dbp;
+	int flags, isrdonly, isvalid;
+{
+	/* Check for changes to a read-only tree. */
+	if (isrdonly)
+		return (__db_rdonly(dbp->dbenv, "c_del"));
+
+	/* Check for invalid dbc->c_del() function flags. */
+	DB_CHECK_FLAGS(dbp->dbenv, "c_del", flags, 0);
+
+	/*
+	 * The cursor must be initialized, return -1 for an invalid cursor,
+	 * otherwise 0.
+	 */
+	return (isvalid ? 0 : EINVAL);
+}
+
+/*
+ * __db_cgetchk --
+ *	Common cursor get argument checking routine.
+ *
+ * PUBLIC: int __db_cgetchk __P((const DB *, DBT *, DBT *, int, int));
+ */
+int
+__db_cgetchk(dbp, key, data, flags, isvalid)
+	const DB *dbp;
+	DBT *key, *data;
+	int flags, isvalid;
+{
+	int check_key;
+
+	check_key = 0;
+
+	/* Check for invalid dbc->c_get() function flags. */
+	switch (flags) {
+	case DB_CURRENT:
+	case DB_FIRST:
+	case DB_LAST:
+	case DB_NEXT:
+	case DB_PREV:
+	case DB_SET_RANGE:
+		check_key = 1;
+		break;
+	case DB_SET:
+		break;
+	case DB_SET_RECNO:
+	case DB_GET_RECNO:
+		if (!F_ISSET(dbp, DB_BT_RECNUM))
+			goto err;
+		check_key = 1;
+		break;
+	default:
+err:		return (__db_ferr(dbp->dbenv, "c_get", 0));
+	}
+
+	/* Check for invalid key/data flags. */
+	DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
+	    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+	DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
+	    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+
+	/* Check dbt's for valid flags when multi-threaded. */
+	if (F_ISSET(dbp, DB_AM_THREAD)) {
+		if (!F_ISSET(data, DB_DBT_USERMEM | DB_DBT_MALLOC))
+			return (__db_ferr(dbp->dbenv, "threaded data", 1));
+		if (check_key &&
+		    !F_ISSET(key, DB_DBT_USERMEM | DB_DBT_MALLOC))
+			return (__db_ferr(dbp->dbenv, "threaded key", 1));
+	}
+
+	/*
+	 * The cursor must be initialized for DB_CURRENT, return -1 for an
+	 * invalid cursor, otherwise 0.
+	 */
+	return (isvalid || flags != DB_CURRENT ? 0 : EINVAL);
+}
+
+/*
+ * __db_cputchk --
+ *	Common cursor put argument checking routine.
+ *
+ * PUBLIC: int __db_cputchk __P((const DB *,
+ * PUBLIC:    const DBT *, DBT *, int, int, int));
+ */
+int
+__db_cputchk(dbp, key, data, flags, isrdonly, isvalid)
+	const DB *dbp;
+	const DBT *key;
+	DBT *data;
+	int flags, isrdonly, isvalid;
+{
+	int check_key;
+
+	/* Check for changes to a read-only tree. */
+	if (isrdonly)
+		return (__db_rdonly(dbp->dbenv, "c_put"));
+
+	/* Check for invalid dbc->c_put() function flags. */
+	check_key = 0;
+	switch (flags) {
+	case DB_AFTER:
+	case DB_BEFORE:
+		if (dbp->type == DB_RECNO && !F_ISSET(dbp, DB_RE_RENUMBER))
+			goto err;
+		if (dbp->type != DB_RECNO && !F_ISSET(dbp, DB_AM_DUP))
+			goto err;
+		break;
+	case DB_CURRENT:
+		break;
+	case DB_KEYFIRST:
+	case DB_KEYLAST:
+		if (dbp->type == DB_RECNO)
+			goto err;
+		check_key = 1;
+		break;
+	default:
+err:		return (__db_ferr(dbp->dbenv, "c_put", 0));
+	}
+
+	/* Check for invalid key/data flags. */
+	if (check_key)
+		DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
+		    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+	DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
+	    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+
+	/*
+	 * The cursor must be initialized for anything other than DB_KEYFIRST
+	 * and DB_KEYLAST, return -1 for an invalid cursor, otherwise 0.
+	 */
+	return (isvalid ||
+	    (flags != DB_KEYFIRST && flags != DB_KEYLAST) ? 0 : EINVAL);
+}
+
+/*
+ * __db_delchk --
+ *	Common delete argument checking routine.
+ *
+ * PUBLIC: int __db_delchk __P((const DB *, int, int));
+ */
+int
+__db_delchk(dbp, flags, isrdonly)
+	const DB *dbp;
+	int flags, isrdonly;
+{
+	/* Check for changes to a read-only tree. */
+	if (isrdonly)
+		return (__db_rdonly(dbp->dbenv, "delete"));
+
+	/* Check for invalid db->del() function flags. */
+	DB_CHECK_FLAGS(dbp->dbenv, "delete", flags, 0);
+
+	return (0);
+}
+
+/*
+ * __db_getchk --
+ *	Common get argument checking routine.
+ *
+ * PUBLIC: int __db_getchk __P((const DB *, const DBT *, DBT *, int));
+ */
+int
+__db_getchk(dbp, key, data, flags)
+	const DB *dbp;
+	const DBT *key;
+	DBT *data;
+	int flags;
+{
+	/* Check for invalid db->get() function flags. */
+	DB_CHECK_FLAGS(dbp->dbenv,
+	    "get", flags, F_ISSET(dbp, DB_BT_RECNUM) ? DB_SET_RECNO : 0);
+
+	/* Check for invalid key/data flags. */
+	DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags, 0);
+	DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
+	    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+	DB_CHECK_FCOMBO(dbp->dbenv,
+	    "data", data->flags, DB_DBT_MALLOC, DB_DBT_USERMEM);
+	if (F_ISSET(dbp, DB_AM_THREAD) &&
+	    !F_ISSET(data, DB_DBT_MALLOC | DB_DBT_USERMEM))
+		return (__db_ferr(dbp->dbenv, "threaded data", 1));
+
+	return (0);
+}
+
+/*
+ * __db_putchk --
+ *	Common put argument checking routine.
+ *
+ * PUBLIC: int __db_putchk __P((const DB *, DBT *, const DBT *, int, int, int));
+ */
+int
+__db_putchk(dbp, key, data, flags, isrdonly, isdup)
+	const DB *dbp;
+	DBT *key;
+	const DBT *data;
+	int flags, isrdonly, isdup;
+{
+	/* Check for changes to a read-only tree. */
+	if (isrdonly)
+		return (__db_rdonly(dbp->dbenv, "put"));
+
+	/* Check for invalid db->put() function flags. */
+	DB_CHECK_FLAGS(dbp->dbenv, "put", flags,
+	    DB_NOOVERWRITE | (dbp->type == DB_RECNO ? DB_APPEND : 0));
+
+	/* Check for invalid key/data flags. */
+	DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags, 0);
+	DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
+	    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+	DB_CHECK_FCOMBO(dbp->dbenv,
+	    "data", data->flags, DB_DBT_MALLOC, DB_DBT_USERMEM);
+
+	/* Check for partial puts in the presence of duplicates. */
+	if (isdup && F_ISSET(data, DB_DBT_PARTIAL)) {
+		__db_err(dbp->dbenv,
+"a partial put in the presence of duplicates requires a cursor operation");
+		return (EINVAL);
+	}
+	return (0);
+}
+
+/*
+ * __db_statchk --
+ *	Common stat argument checking routine.
+ *
+ * PUBLIC: int __db_statchk __P((const DB *, int));
+ */
+int
+__db_statchk(dbp, flags)
+	const DB *dbp;
+	int flags;
+{
+	/* Check for invalid db->stat() function flags. */
+	DB_CHECK_FLAGS(dbp->dbenv, "stat", flags, DB_RECORDCOUNT);
+
+	if (LF_ISSET(DB_RECORDCOUNT) &&
+	    dbp->type == DB_BTREE && !F_ISSET(dbp, DB_BT_RECNUM))
+		return (__db_ferr(dbp->dbenv, "stat", 0));
+
+	return (0);
+}
+
+/*
+ * __db_syncchk --
+ *	Common sync argument checking routine.
+ *
+ * PUBLIC: int __db_syncchk __P((const DB *, int));
+ */
+int
+__db_syncchk(dbp, flags)
+	const DB *dbp;
+	int flags;
+{
+	/* Check for invalid db->sync() function flags. */
+	DB_CHECK_FLAGS(dbp->dbenv, "sync", flags, 0);
+
+	return (0);
+}
+
+/*
+ * __db_ferr --
+ *	Common flag errors.
+ *
+ * PUBLIC: int __db_ferr __P((const DB_ENV *, char *, int));
+ */
+int
+__db_ferr(dbenv, name, combo)
+	const DB_ENV *dbenv;
+	const char *name;
+	int combo;
+{
+	__db_err(dbenv, "illegal flag %sspecified to %s",
+	    combo ? "combination " : "", name);
+	return (EINVAL);
+}
+
+/*
+ * __db_rdonly --
+ *	Common readonly message.
+ */
+static int
+__db_rdonly(dbenv, name)
+	const DB_ENV *dbenv;
+	const char *name;
+{
+	__db_err(dbenv, "%s: attempt to modify a read-only tree", name);
+	return (EACCES);
+}
diff --git a/db2/common/db_log2.c b/db2/common/db_log2.c
new file mode 100644
index 0000000000..9af01116f6
--- /dev/null
+++ b/db2/common/db_log2.c
@@ -0,0 +1,68 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_log2.c	10.3 (Sleepycat) 6/21/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "common_ext.h"
+
+/*
+ * PUBLIC: u_int32_t __db_log2 __P((u_int32_t));
+ */
+u_int32_t
+__db_log2(num)
+	u_int32_t num;
+{
+	u_int32_t i, limit;
+
+	limit = 1;
+	for (i = 0; limit < num; limit = limit << 1, i++);
+	return (i);
+}
diff --git a/db2/common/db_region.c b/db2/common/db_region.c
new file mode 100644
index 0000000000..51f8f4465c
--- /dev/null
+++ b/db2/common/db_region.c
@@ -0,0 +1,565 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *	The President and Fellows of Harvard University.  All rights reserved.
+ *
+ * This code is derived from software contributed to Harvard by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_region.c	10.12 (Sleepycat) 7/26/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "common_ext.h"
+
+static int __db_rmap __P((DB_ENV *, int, size_t, void *));
+
+/*
+ * __db_rcreate --
+ *
+ * Common interface for creating a shared region.  Handles synchronization
+ * across multiple processes.
+ *
+ * The dbenv contains the environment for this process, including naming
+ * information.  The path argument represents the parameters passed to
+ * the open routines and may be either a file or a directory.  If it is
+ * a directory, it must exist.  If it is a file, then the file parameter
+ * must be NULL, otherwise, file is the name to be created inside the
+ * directory path.
+ *
+ * The function returns a pointer to the shared region that has been mapped
+ * into memory, NULL on error.
+ *
+ * PUBLIC: int __db_rcreate __P((DB_ENV *, APPNAME,
+ * PUBLIC:    const char *, const char *, int, size_t, int *, void *));
+ */
+int
+__db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp)
+	DB_ENV *dbenv;
+	APPNAME appname;
+	const char *path, *file;
+	int mode, *fdp;
+	size_t size;
+	void *retp;
+{
+	RLAYOUT *rp;
+	int fd, ret;
+	char *name;
+
+	fd = -1;
+	rp = NULL;
+
+	/*
+	 * Get the filename -- note, if it's a temporary file, it will
+	 * be created by the underlying temporary file creation code,
+	 * so we have to check the file descriptor to be sure it's an
+	 * error.
+	 */
+	if ((ret = __db_appname(dbenv, appname, path, file, &fd, &name)) != 0)
+		return (ret);
+
+	/*
+	 * Now open the file. We need to make sure that multiple processes
+	 * that attempt to create the region at the same time are properly
+	 * ordered, so we open it O_EXCL and O_CREAT so two simultaneous
+	 * attempts to create the region will return failure in one of the
+	 * attempts.
+	 */
+	if (fd == -1 && (ret = __db_fdopen(name,
+	    DB_CREATE | DB_EXCL, DB_CREATE | DB_EXCL, mode, &fd)) != 0) {
+		if (ret != EEXIST)
+			__db_err(dbenv,
+			    "region create: %s: %s", name, strerror(ret));
+		goto err;
+	}
+	*fdp = fd;
+
+	/* Grow the region to the correct size. */
+	if ((ret = __db_rgrow(dbenv, fd, size)) != 0)
+		goto err;
+
+	/* Map the region in. */
+	if ((ret = __db_rmap(dbenv, fd, size, &rp)) != 0)
+		goto err;
+
+	/*
+	 * Initialize the common information.
+	 *
+	 * !!!
+	 * We have to order the region creates so that two processes don't try
+	 * to simultaneously create the region and so that processes that are
+	 * joining the region never see inconsistent data.  We'd like to play
+	 * file permissions games, but we can't because WNT filesystems won't
+	 * open a file mode 0.
+	 *
+	 * So, the process that's creating the region always acquires the lock
+	 * before the setting the version number.  Any process joining always
+	 * checks the version number before attempting to acquire the lock.
+	 *
+	 * We have to check the version number first, because if the version
+	 * number has not been written, it's possible that the mutex has not
+	 * been initialized in which case an attempt to get it could lead to
+	 * random behavior.  If the version number isn't there (the file size
+	 * is too small) or it's 0, we know that the region is being created.
+	 */
+	(void)__db_mutex_init(&rp->lock, MUTEX_LOCK_OFFSET(rp, &rp->lock));
+	(void)__db_mutex_lock(&rp->lock,
+	    fd, dbenv == NULL ? NULL : dbenv->db_yield);
+
+	rp->refcnt = 1;
+	rp->size = size;
+	rp->flags = 0;
+	db_version(&rp->majver, &rp->minver, &rp->patch);
+
+	if (name != NULL)
+		FREES(name);
+
+	*(void **)retp = rp;
+	return (0);
+
+err:	if (fd != -1) {
+		if (rp != NULL)
+			(void)__db_munmap(rp, rp->size);
+		(void)__db_unlink(name);
+		(void)__db_close(fd);
+	}
+	if (name != NULL)
+		FREES(name);
+	return (ret);
+}
+
+/*
+ * __db_ropen --
+ *	Construct the name of a file, open it and map it in.
+ *
+ * PUBLIC: int __db_ropen __P((DB_ENV *,
+ * PUBLIC:    APPNAME, const char *, const char *, int, int *, void *));
+ */
+int
+__db_ropen(dbenv, appname, path, file, flags, fdp, retp)
+	DB_ENV *dbenv;
+	APPNAME appname;
+	const char *path, *file;
+	int flags, *fdp;
+	void *retp;
+{
+	RLAYOUT *rp;
+	off_t size1, size2;
+	int fd, ret;
+	char *name;
+
+	fd = -1;
+	rp = NULL;
+
+	/* Get the filename. */
+	if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0)
+		return (ret);
+
+	/* Open the file. */
+	if ((ret = __db_fdopen(name, flags, DB_MUTEXDEBUG, 0, &fd)) != 0) {
+		__db_err(dbenv, "region open: %s: %s", name, strerror(ret));
+		goto err2;
+	}
+
+	*fdp = fd;
+
+	/*
+	 * Map the file in.  We have to do things in a strange order so that
+	 * we don't get into a situation where the file was just created and
+	 * isn't yet initialized.  See the comment in __db_rcreate() above.
+	 *
+	 * XXX
+	 * We'd like to test to see if the file is too big to mmap.  Since we
+	 * don't know what size or type off_t's or size_t's are, or the largest
+	 * unsigned integral type is, or what random insanity the local C
+	 * compiler will perpetrate, doing the comparison in a portable way is
+	 * flatly impossible.  Hope that mmap fails if the file is too large.
+	 *
+	 */
+	if ((ret = __db_stat(dbenv, name, fd, &size1, NULL)) != 0)
+		goto err2;
+
+	/* Check to make sure the first block has been written. */
+	if ((size_t) size1 < sizeof(RLAYOUT)) {
+		ret = EAGAIN;
+		goto err2;
+	}
+
+	/* Map in whatever is there. */
+	if ((ret = __db_rmap(dbenv, fd, size1, &rp)) != 0)
+		goto err2;
+
+	/*
+	 * Check to make sure the region has been initialized.  We can't just
+	 * grab the lock because the lock may not have been initialized yet.
+	 */
+	if (rp->majver == 0) {
+		ret = EAGAIN;
+		goto err2;
+	}
+
+	/* Get the region lock. */
+	if (!LF_ISSET(DB_MUTEXDEBUG))
+		(void)__db_mutex_lock(&rp->lock,
+		    fd, dbenv == NULL ? NULL : dbenv->db_yield);
+
+	/*
+	 * The file may have been half-written if we were descheduled between
+	 * getting the size of the file and checking the major version.  Check
+	 * to make sure we got the entire file.
+	 */
+	if ((ret = __db_stat(dbenv, name, fd, &size2, NULL)) != 0)
+		goto err1;
+	if (size1 != size2) {
+		ret = EAGAIN;
+		goto err1;
+	}
+
+	/* The file may have just been deleted. */
+	if (F_ISSET(rp, DB_R_DELETED)) {
+		ret = EAGAIN;
+		goto err1;
+	}
+
+	/* Increment the reference count. */
+	++rp->refcnt;
+
+	/* Release the lock. */
+	if (!LF_ISSET(DB_MUTEXDEBUG))
+		(void)__db_mutex_unlock(&rp->lock, fd);
+
+	FREES(name);
+
+	*(void **)retp = rp;
+	return (0);
+
+err1:	if (!LF_ISSET(DB_MUTEXDEBUG))
+		(void)__db_mutex_unlock(&rp->lock, fd);
+err2:	if (rp != NULL)
+		(void)__db_munmap(rp, rp->size);
+	if (fd != -1)
+		(void)__db_close(fd);
+	FREES(name);
+	return (ret);
+}
+
+/*
+ * __db_rclose --
+ *	Close a shared memory region.
+ *
+ * PUBLIC: int __db_rclose __P((DB_ENV *, int, void *));
+ */
+int
+__db_rclose(dbenv, fd, ptr)
+	DB_ENV *dbenv;
+	int fd;
+	void *ptr;
+{
+	RLAYOUT *rp;
+	int ret, t_ret;
+	const char *fail;
+
+	rp = ptr;
+	fail = NULL;
+
+	/* Get the lock. */
+	if ((ret = __db_mutex_lock(&rp->lock,
+	    fd, dbenv == NULL ? NULL : dbenv->db_yield)) != 0) {
+		fail = "lock get";
+		goto err;
+	}
+
+	/* Decrement the reference count. */
+	--rp->refcnt;
+
+	/* Release the lock. */
+	if ((t_ret = __db_mutex_unlock(&rp->lock, fd)) != 0 && fail == NULL) {
+		ret = t_ret;
+		fail = "lock release";
+	}
+
+	/* Discard the region. */
+	if ((t_ret = __db_munmap(ptr, rp->size)) != 0 && fail == NULL) {
+		ret = t_ret;
+		fail = "munmap";
+	}
+
+	if ((t_ret = __db_close(fd)) != 0 && fail == NULL) {
+		ret = t_ret;
+		fail = "close";
+	}
+
+	if (fail == NULL)
+		return (0);
+
+err:	__db_err(dbenv, "region detach: %s: %s", fail, strerror(ret));
+	return (ret);
+}
+
+/*
+ * __db_runlink --
+ *	Remove a shared memory region.
+ *
+ * PUBLIC: int __db_runlink __P((DB_ENV *,
+ * PUBLIC:    APPNAME, const char *, const char *, int));
+ */
+int
+__db_runlink(dbenv, appname, path, file, force)
+	DB_ENV *dbenv;
+	APPNAME appname;
+	const char *path, *file;
+	int force;
+{
+	RLAYOUT *rp;
+	int cnt, fd, ret, t_ret;
+	char *name;
+
+	rp = NULL;
+
+	/* Get the filename. */
+	if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0)
+		return (ret);
+
+	/* If the file doesn't exist, we're done. */
+	if (__db_exists(name, NULL))
+		return (0);		/* XXX: ENOENT? */
+
+	/*
+	 * If we're called with a force flag, try and unlink the file.  This
+	 * may not succeed if the file is currently open, but there's nothing
+	 * we can do about that.  There is a race condition between the check
+	 * for existence above and the actual unlink.  If someone else snuck
+	 * in and removed it before we do the remove, then we might get an
+	 * ENOENT error.  If we get the ENOENT, we treat it as success, just
+	 * as we do above.
+	 */
+	if (force) {
+		if ((ret = __db_unlink(name)) != 0 && ret != ENOENT)
+			goto err1;
+		FREES(name);
+		return (0);
+	}
+
+	/* Open and lock the region. */
+	if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
+		goto err1;
+	(void)__db_mutex_lock(&rp->lock,
+	    fd, dbenv == NULL ? NULL : dbenv->db_yield);
+
+	/* If the region is currently being deleted, fail. */
+	if (F_ISSET(rp, DB_R_DELETED)) {
+		ret = ENOENT;		/* XXX: ENOENT? */
+		goto err2;
+	}
+
+	/* If the region is currently in use by someone else, fail. */
+	if (rp->refcnt > 1) {
+		ret = EBUSY;
+		goto err2;
+	}
+
+	/* Set the delete flag. */
+	F_SET(rp, DB_R_DELETED);
+
+	/* Release the lock and close the region. */
+	(void)__db_mutex_unlock(&rp->lock, fd);
+	if ((t_ret = __db_rclose(dbenv, fd, rp)) != 0 && ret == 0)
+		goto err1;
+
+	/*
+	 * Unlink the region.  There's a race here -- other threads or
+	 * processes might be opening the region while we're trying to
+	 * remove it.  They'll fail, because we've set the DELETED flag,
+	 * but they could still stop us from succeeding in the unlink.
+	 */
+	for (cnt = 5; cnt > 0; --cnt) {
+		if ((ret = __db_unlink(name)) == 0)
+			break;
+		(void)__db_sleep(0, 250000);
+	}
+	if (ret == 0) {
+		FREES(name);
+		return (0);
+	}
+
+	/* Not a clue.  Try to clear the DB_R_DELETED flag. */
+	if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
+		goto err1;
+	(void)__db_mutex_lock(&rp->lock,
+	    fd, dbenv == NULL ? NULL : dbenv->db_yield);
+	F_CLR(rp, DB_R_DELETED);
+	/* FALLTHROUGH */
+
+err2:	(void)__db_mutex_unlock(&rp->lock, fd);
+	(void)__db_rclose(dbenv, fd, rp);
+err1:	__db_err(dbenv, "region unlink: %s: %s", name, strerror(ret));
+	FREES(name);
+	return (ret);
+}
+
+/*
+ * DB creates all regions on 4K boundaries so that we don't make the
+ * underlying VM unhappy.
+ */
+#define	__DB_VMPAGESIZE	(4 * 1024)
+
+/*
+ * __db_rgrow --
+ *	Extend a region by a specified amount.
+ *
+ * PUBLIC: int __db_rgrow __P((DB_ENV *, int, size_t));
+ */
+int
+__db_rgrow(dbenv, fd, incr)
+	DB_ENV *dbenv;
+	int fd;
+	size_t incr;
+{
+#ifdef MMAP_INIT_NEEDED
+	size_t i;
+#endif
+	ssize_t nw;
+	int ret;
+	char buf[__DB_VMPAGESIZE];
+
+	/* Seek to the end of the region. */
+	if ((ret = __db_lseek(fd, 0, 0, 0, SEEK_END)) != 0)
+		goto err;
+
+	/* Write nuls to the new bytes. */
+	memset(buf, 0, sizeof(buf));
+
+	/*
+	 * Historically, some systems required that all of the bytes of the
+	 * region be written before you could mmap it and access it randomly.
+	 */
+#ifdef MMAP_INIT_NEEDED
+	/* Extend the region by writing each new page. */
+	for (i = 0; i < incr; i += __DB_VMPAGESIZE) {
+		if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0)
+			goto err;
+		if (nw != sizeof(buf))
+			goto eio;
+	}
+#else
+	/*
+	 * Extend the region by writing the last page.
+	 *
+	 * Round off the increment to the next page boundary.
+	 */
+	incr += __DB_VMPAGESIZE - 1;
+	incr -= incr % __DB_VMPAGESIZE;
+
+	/* Write the last page, not the page after the last. */
+	if ((ret = __db_lseek(fd, 0, 0, incr - __DB_VMPAGESIZE, SEEK_CUR)) != 0)
+		goto err;
+	if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0)
+		goto err;
+	if (nw != sizeof(buf))
+		goto eio;
+#endif
+	return (0);
+
+eio:	ret = EIO;
+err:	__db_err(dbenv, "region grow: %s", strerror(ret));
+	return (ret);
+}
+
+/*
+ * __db_rremap --
+ *	Unmap the old region and map in a new region of a new size.  If
+ *	either call fails, returns NULL, else returns the address of the
+ *	new region.
+ *
+ * PUBLIC: int __db_rremap __P((DB_ENV *, void *, size_t, size_t, int, void *));
+ */
+int
+__db_rremap(dbenv, ptr, oldsize, newsize, fd, retp)
+	DB_ENV *dbenv;
+	void *ptr, *retp;
+	size_t oldsize, newsize;
+	int fd;
+{
+	int ret;
+
+	if ((ret = __db_munmap(ptr, oldsize)) != 0) {
+		__db_err(dbenv, "region remap: munmap: %s", strerror(ret));
+		return (ret);
+	}
+
+	return (__db_rmap(dbenv, fd, newsize, retp));
+}
+
+/*
+ * __db_rmap --
+ *	Attach to a shared memory region.
+ */
+static int
+__db_rmap(dbenv, fd, size, retp)
+	DB_ENV *dbenv;
+	int fd;
+	size_t size;
+	void *retp;
+{
+	RLAYOUT *rp;
+	int ret;
+
+	if ((ret = __db_mmap(fd, size, 0, 0, &rp)) != 0) {
+		__db_err(dbenv, "region map: mmap %s", strerror(ret));
+		return (ret);
+	}
+	if (rp->size < size)
+		rp->size = size;
+
+	*(void **)retp = rp;
+	return (0);
+}
diff --git a/db2/common/db_salloc.c b/db2/common/db_salloc.c
new file mode 100644
index 0000000000..f0202ddb90
--- /dev/null
+++ b/db2/common/db_salloc.c
@@ -0,0 +1,290 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_salloc.c	10.6 (Sleepycat) 7/5/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "common_ext.h"
+
+/*
+ * Implement shared memory region allocation, using simple first-fit algorithm.
+ * The model is that we take a "chunk" of shared memory store and begin carving
+ * it up into areas, similarly to how malloc works.  We do coalescing on free.
+ *
+ * The "len" field in the __data struct contains the length of the free region
+ * (less the size_t bytes that holds the length).  We use the address provided
+ * by the caller to find this length, which allows us to free a chunk without
+ * requiring that the caller pass in the length of the chunk they're freeing.
+ */
+SH_LIST_HEAD(__head);
+struct __data {
+	size_t len;
+	SH_LIST_ENTRY links;
+};
+
+/*
+ * __db_shalloc_init --
+ *	Initialize the area as one large chunk.
+ *
+ * PUBLIC: void __db_shalloc_init __P((void *, size_t));
+ */
+void
+__db_shalloc_init(area, size)
+	void *area;
+	size_t size;
+{
+	struct __data *elp;
+	struct __head *hp;
+
+	hp = area;
+	SH_LIST_INIT(hp);
+
+	elp = (struct __data *)(hp + 1);
+	elp->len = size - sizeof(struct __head) - sizeof(elp->len);
+	SH_LIST_INSERT_HEAD(hp, elp, links, __data);
+}
+
+/*
+ * __db_shalloc --
+ *	Allocate some space from the shared region.
+ *
+ * PUBLIC: int __db_shalloc __P((void *, size_t, size_t, void *));
+ */
+int
+__db_shalloc(p, len, align, retp)
+	void *p, *retp;
+	size_t len, align;
+{
+	struct __data *elp;
+	size_t *sp;
+	void *rp;
+
+	/*
+	 * We never allocate less than the size of a struct __data, align
+	 * to less than a size_t boundary, or align to something that's not
+	 * a multiple of a size_t.
+	 */
+	if (len < sizeof(struct __data))
+		len = sizeof(struct __data);
+	align = align <= sizeof(size_t) ?
+	    sizeof(size_t) : ALIGN(align, sizeof(size_t));
+
+	/* Walk the list, looking for a slot. */
+	for (elp = SH_LIST_FIRST((struct __head *)p, __data);
+	    elp != NULL;
+	    elp = SH_LIST_NEXT(elp, links, __data)) {
+		/*
+		 * Calculate the value of the returned pointer if we were to
+		 * use this chunk.
+		 *	+ Find the end of the chunk.
+		 *	+ Subtract the memory the user wants.
+		 *	+ Find the closest previous correctly-aligned address.
+		 */
+		rp = (u_int8_t *)elp + sizeof(size_t) + elp->len;
+		rp = (u_int8_t *)rp - len;
+		rp = (u_int8_t *)((ALIGNTYPE)rp & ~(align - 1));
+
+		/*
+		 * Rp may now point before elp->links, in which case the chunk
+		 * was too small, and we have to try again.
+		 */
+		if ((u_int8_t *)rp < (u_int8_t *)&elp->links)
+			continue;
+
+		*(void **)retp = rp;
+
+		/*
+		 * If there are at least 32 bytes of additional memory, divide
+		 * the chunk into two chunks.
+		 */
+		if ((u_int8_t *)rp >= (u_int8_t *)&elp->links + 32) {
+			sp = rp;
+			*--sp = elp->len -
+			    ((u_int8_t *)rp - (u_int8_t *)&elp->links);
+			elp->len -= *sp + sizeof(size_t);
+			return (0);
+		}
+
+		/*
+		 * Otherwise, we return the entire chunk, wasting some amount
+		 * of space to keep the list compact.  However, because the
+		 * address we're returning to the user may not be the address
+		 * of the start of the region for alignment reasons, set the
+		 * size_t length fields back to the "real" length field to a
+		 * flag value, so that we can find the real length during free.
+		 */
+#define	ILLEGAL_SIZE	1
+		SH_LIST_REMOVE(elp, links, __data);
+		for (sp = rp; (u_int8_t *)--sp >= (u_int8_t *)&elp->links;)
+			*sp = ILLEGAL_SIZE;
+		return (0);
+	}
+
+	/* Nothing found large enough; need to figure out how to grow region. */
+	return (ENOMEM);
+}
+
+/*
+ * __db_shalloc_free --
+ *	Free a shared memory allocation.
+ *
+ * PUBLIC: void __db_shalloc_free __P((void *, void *));
+ */
+void
+__db_shalloc_free(regionp, ptr)
+	void *regionp, *ptr;
+{
+	struct __data *elp, *lastp, *newp;
+	struct __head *hp;
+	size_t free_size, *sp;
+	int merged;
+
+	/*
+	 * Step back over flagged length fields to find the beginning of
+	 * the object and its real size.
+	 */
+	for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp);
+	ptr = sp;
+
+	newp = (struct __data *)((u_int8_t *)ptr - sizeof(size_t));
+	free_size = newp->len;
+
+	/*
+	 * Walk the list, looking for where this entry goes.
+	 *
+	 * We keep the free list sorted by address so that coalescing is
+	 * trivial.
+	 *
+	 * XXX
+	 * Probably worth profiling this to see how expensive it is.
+	 */
+	hp = (struct __head *)regionp;
+	for (elp = SH_LIST_FIRST(hp, __data), lastp = NULL;
+	    elp != NULL && (void *)elp < (void *)ptr;
+	    lastp = elp, elp = SH_LIST_NEXT(elp, links, __data));
+
+	/*
+	 * Elp is either NULL (we reached the end of the list), or the slot
+	 * after the one that's being returned.  Lastp is either NULL (we're
+	 * returning the first element of the list) or the element before the
+	 * one being returned.
+	 *
+	 * Check for coalescing with the next element.
+	 */
+	merged = 0;
+	if ((u_int8_t *)ptr + free_size == (u_int8_t *)elp) {
+		newp->len += elp->len + sizeof(size_t);
+		SH_LIST_REMOVE(elp, links, __data);
+		if (lastp != NULL)
+			SH_LIST_INSERT_AFTER(lastp, newp, links, __data);
+		else
+			SH_LIST_INSERT_HEAD(hp, newp, links, __data);
+		merged = 1;
+	}
+
+	/* Check for coalescing with the previous element. */
+	if (lastp != NULL && (u_int8_t *)lastp +
+	    lastp->len + sizeof(size_t) == (u_int8_t *)newp) {
+		lastp->len += newp->len + sizeof(size_t);
+
+		/*
+		 * If we have already put the new element into the list take
+		 * it back off again because it's just been merged with the
+		 * previous element.
+		 */
+		if (merged)
+			SH_LIST_REMOVE(newp, links, __data);
+		merged = 1;
+	}
+
+	if (!merged)
+		if (lastp == NULL)
+			SH_LIST_INSERT_HEAD(hp, newp, links, __data);
+		else
+			SH_LIST_INSERT_AFTER(lastp, newp, links, __data);
+}
+
+/*
+ * __db_shalloc_count --
+ *	Return the amount of memory on the free list.
+ *
+ * PUBLIC: size_t __db_shalloc_count __P((void *));
+ */
+size_t
+__db_shalloc_count(addr)
+	void *addr;
+{
+	struct __data *elp;
+	size_t count;
+
+	count = 0;
+	for (elp = SH_LIST_FIRST((struct __head *)addr, __data);
+	    elp != NULL;
+	    elp = SH_LIST_NEXT(elp, links, __data))
+		count += elp->len;
+
+	return (count);
+}
+
+/*
+ * __db_shsizeof --
+ *	Return the size of a shalloc'd piece of memory.
+ *
+ * PUBLIC: size_t __db_shsizeof __P((void *));
+ */
+size_t
+__db_shsizeof(ptr)
+	void *ptr;
+{
+	struct __data *elp;
+	size_t *sp;
+
+	/*
+	 * Step back over flagged length fields to find the beginning of
+	 * the object and its real size.
+	 */
+	for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp);
+
+	elp = (struct __data *)((u_int8_t *)sp - sizeof(size_t));
+	return (elp->len);
+}
+
+#ifdef DEBUG
+/*
+ * __db_shalloc_dump --
+ *
+ * PUBLIC: void __db_shalloc_dump __P((FILE *, void *));
+ */
+void
+__db_shalloc_dump(fp, addr)
+	FILE *fp;
+	void *addr;
+{
+	struct __data *elp;
+
+	if (fp == NULL)
+		fp = stderr;
+
+	for (elp = SH_LIST_FIRST((struct __head *)addr, __data);
+	    elp != NULL;
+	    elp = SH_LIST_NEXT(elp, links, __data))
+		fprintf(fp, "%#lx: %lu\t", (u_long)elp, (u_long)elp->len);
+	fprintf(fp, "\n");
+}
+#endif
diff --git a/db2/common/db_shash.c b/db2/common/db_shash.c
new file mode 100644
index 0000000000..988de8a994
--- /dev/null
+++ b/db2/common/db_shash.c
@@ -0,0 +1,90 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_shash.c	10.3 (Sleepycat) 6/21/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "common_ext.h"
+
+/* Powers-of-2 and close-by prime number pairs. */
+static const struct {
+	int	power;
+	int	prime;
+} list[] = {
+	{  64,	  67},
+	{ 128,	 131},
+	{ 256,	 257},
+	{ 512,	 521},
+	{1024,	1031},
+	{2048,	2053},
+	{4096,	4099},
+	{8192,	8191},
+	{0,	   0}
+};
+
+/*
+ * __db_tablesize --
+ *	Choose a size for the hash table.
+ *
+ * PUBLIC: int __db_tablesize __P((int));
+ */
+int
+__db_tablesize(n_buckets)
+	int n_buckets;
+{
+	int i;
+
+	/*
+	 * We try to be clever about how big we make the hash tables.  Pick
+	 * a prime number close to the "suggested" number of elements that
+	 * will be in the hash table.  We shoot for minimum collisions (i.e.
+	 * one element in each bucket).  We use 64 as the minimum table size.
+	 *
+	 * Ref: Sedgewick, Algorithms in C, "Hash Functions"
+	 */
+	if (n_buckets < 64)
+		n_buckets = 64;
+
+	for (i = 0;; ++i) {
+		if (list[i].power == 0) {
+			--i;
+			break;
+		}
+		if (list[i].power >= n_buckets)
+			break;
+	}
+	return (list[i].prime);
+}
+
+/*
+ * __db_hashinit --
+ *	Initialize a hash table that resides in shared memory.
+ *
+ * PUBLIC: void __db_hashinit __P((void *, int));
+ */
+void
+__db_hashinit(begin, nelements)
+	void *begin;
+	int nelements;
+{
+	int i;
+	SH_TAILQ_HEAD(hash_head) *headp;
+
+	headp = (struct hash_head *)begin;
+
+	for (i = 0; i < nelements; i++, headp++)
+		SH_TAILQ_INIT(headp);
+}