about summary refs log tree commit diff
path: root/db2/common
diff options
context:
space:
mode:
Diffstat (limited to 'db2/common')
-rw-r--r--db2/common/db_appinit.c306
-rw-r--r--db2/common/db_apprec.c67
-rw-r--r--db2/common/db_err.c624
-rw-r--r--db2/common/db_region.c129
-rw-r--r--db2/common/db_salloc.c4
5 files changed, 355 insertions, 775 deletions
diff --git a/db2/common/db_appinit.c b/db2/common/db_appinit.c
index 6ec007be0a..e02b1a872d 100644
--- a/db2/common/db_appinit.c
+++ b/db2/common/db_appinit.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_appinit.c	10.52 (Sleepycat) 6/2/98";
+static const char sccsid[] = "@(#)db_appinit.c	10.66 (Sleepycat) 12/7/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -16,7 +16,6 @@ static const char sccsid[] = "@(#)db_appinit.c	10.52 (Sleepycat) 6/2/98";
 
 #include <ctype.h>
 #include <errno.h>
-#include <signal.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
@@ -34,10 +33,22 @@ static const char sccsid[] = "@(#)db_appinit.c	10.52 (Sleepycat) 6/2/98";
 
 static int __db_home __P((DB_ENV *, const char *, u_int32_t));
 static int __db_parse __P((DB_ENV *, char *));
-static int __db_tmp_dir __P((DB_ENV *, u_int32_t));
 static int __db_tmp_open __P((DB_ENV *, u_int32_t, char *, int *));
 
 /*
+ * This conflict array is used for concurrent db access (cdb).  It
+ * uses the same locks as the db_rw_conflict array, but adds an IW
+ * mode to be used for write cursors.
+ */
+static u_int8_t const db_cdb_conflicts[] = {
+	/*		N   R   W  IW */
+	/*    N */	0,  0,  0,  0,
+	/*    R */	0,  0,  1,  0,
+	/*    W */	0,  1,  1,  1,
+	/*   IW */	0,  0,  1,  1
+};
+
+/*
  * db_version --
  *	Return version information.
  */
@@ -70,21 +81,24 @@ db_appinit(db_home, db_config, dbenv, flags)
 	char * const *p;
 	char *lp, buf[MAXPATHLEN * 2];
 
+	fp = NULL;
+
 	/* Validate arguments. */
 	if (dbenv == NULL)
 		return (EINVAL);
 
-
 #ifdef HAVE_SPINLOCKS
 #define	OKFLAGS								\
-   (DB_CREATE | DB_NOMMAP | DB_THREAD | DB_INIT_LOCK | DB_INIT_LOG |	\
-    DB_INIT_MPOOL | DB_INIT_TXN | DB_MPOOL_PRIVATE | DB_RECOVER |	\
-    DB_RECOVER_FATAL | DB_TXN_NOSYNC | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT)
+    (DB_CREATE | DB_INIT_CDB | DB_INIT_LOCK | DB_INIT_LOG |		\
+    DB_INIT_MPOOL | DB_INIT_TXN | DB_MPOOL_PRIVATE | DB_NOMMAP |	\
+    DB_RECOVER | DB_RECOVER_FATAL | DB_THREAD | DB_TXN_NOSYNC |		\
+    DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT)
 #else
 #define	OKFLAGS								\
-   (DB_CREATE | DB_NOMMAP | DB_INIT_LOCK | DB_INIT_LOG |		\
-    DB_INIT_MPOOL | DB_INIT_TXN | DB_MPOOL_PRIVATE | DB_RECOVER |	\
-    DB_RECOVER_FATAL | DB_TXN_NOSYNC | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT)
+    (DB_CREATE | DB_INIT_CDB | DB_INIT_LOCK | DB_INIT_LOG |		\
+    DB_INIT_MPOOL | DB_INIT_TXN | DB_MPOOL_PRIVATE | DB_NOMMAP |	\
+    DB_RECOVER | DB_RECOVER_FATAL | DB_TXN_NOSYNC |			\
+    DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT)
 #endif
 	if ((ret = __db_fchk(dbenv, "db_appinit", flags, OKFLAGS)) != 0)
 		return (ret);
@@ -97,8 +111,6 @@ db_appinit(db_home, db_config, dbenv, flags)
 	if (LF_ISSET(DB_THREAD))
 		F_SET(dbenv, DB_ENV_THREAD);
 
-	fp = NULL;
-
 	/* Set the database home. */
 	if ((ret = __db_home(dbenv, db_home, flags)) != 0)
 		goto err;
@@ -127,8 +139,17 @@ db_appinit(db_home, db_config, dbenv, flags)
 		(void)strcat(buf, CONFIG_NAME);
 		if ((fp = fopen(buf, "r")) != NULL) {
 			while (fgets(buf, sizeof(buf), fp) != NULL) {
-				if ((lp = strchr(buf, '\n')) != NULL)
-					*lp = '\0';
+				if ((lp = strchr(buf, '\n')) == NULL) {
+					__db_err(dbenv,
+					    "%s: line too long", CONFIG_NAME);
+					ret = EINVAL;
+					goto err;
+				}
+				*lp = '\0';
+				if (buf[0] == '\0' ||
+				    buf[0] == '#' || isspace(buf[0]))
+					continue;
+
 				if ((ret = __db_parse(dbenv, buf)) != 0)
 					goto err;
 			}
@@ -138,11 +159,14 @@ db_appinit(db_home, db_config, dbenv, flags)
 	}
 
 	/* Set up the tmp directory path. */
-	if (dbenv->db_tmp_dir == NULL &&
-	    (ret = __db_tmp_dir(dbenv, flags)) != 0)
+	if (dbenv->db_tmp_dir == NULL && (ret = __os_tmpdir(dbenv, flags)) != 0)
 		goto err;
 
-	/* Indicate that the path names have been set. */
+	/*
+	 * Flag that the structure has been initialized by the application.
+	 * Note, this must be set before calling into the subsystems as it
+	 * is used when we're doing file naming.
+	 */
 	F_SET(dbenv, DB_ENV_APPINIT);
 
 	/*
@@ -166,6 +190,18 @@ db_appinit(db_home, db_config, dbenv, flags)
 	 * Default permissions are read-write for both owner and group.
 	 */
 	mode = __db_omode("rwrw--");
+	if (LF_ISSET(DB_INIT_CDB)) {
+		if (LF_ISSET(DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN)) {
+			ret = EINVAL;
+			goto err;
+		}
+		F_SET(dbenv, DB_ENV_CDB);
+		dbenv->lk_conflicts = db_cdb_conflicts;
+		dbenv->lk_modes = DB_LOCK_RW_N + 1;
+		if ((ret = lock_open(NULL, LF_ISSET(DB_CREATE | DB_THREAD),
+		    mode, dbenv, &dbenv->lk_info)) != 0)
+			goto err;
+	}
 	if (LF_ISSET(DB_INIT_LOCK) && (ret = lock_open(NULL,
 	    LF_ISSET(DB_CREATE | DB_THREAD),
 	    mode, dbenv, &dbenv->lk_info)) != 0)
@@ -232,28 +268,32 @@ db_appexit(dbenv)
 	if (dbenv->tx_info && (t_ret = txn_close(dbenv->tx_info)) != 0)
 		if (ret == 0)
 			ret = t_ret;
-	if (dbenv->mp_info && (t_ret = memp_close(dbenv->mp_info)) != 0)
+	if (dbenv->lg_info && (t_ret = log_close(dbenv->lg_info)) != 0)
 		if (ret == 0)
 			ret = t_ret;
-	if (dbenv->lg_info && (t_ret = log_close(dbenv->lg_info)) != 0)
+	if (dbenv->mp_info && (t_ret = memp_close(dbenv->mp_info)) != 0)
 		if (ret == 0)
 			ret = t_ret;
 	if (dbenv->lk_info && (t_ret = lock_close(dbenv->lk_info)) != 0)
 		if (ret == 0)
 			ret = t_ret;
 
+	/* Clear initialized flag (after subsystems, it affects naming). */
+	F_CLR(dbenv, DB_ENV_APPINIT);
+
 	/* Free allocated memory. */
 	if (dbenv->db_home != NULL)
-		FREES(dbenv->db_home);
+		__os_freestr(dbenv->db_home);
 	if ((p = dbenv->db_data_dir) != NULL) {
 		for (; *p != NULL; ++p)
-			FREES(*p);
-		FREE(dbenv->db_data_dir, dbenv->data_cnt * sizeof(char **));
+			__os_freestr(*p);
+		__os_free(dbenv->db_data_dir,
+		    dbenv->data_cnt * sizeof(char **));
 	}
 	if (dbenv->db_log_dir != NULL)
-		FREES(dbenv->db_log_dir);
+		__os_freestr(dbenv->db_log_dir);
 	if (dbenv->db_tmp_dir != NULL)
-		FREES(dbenv->db_tmp_dir);
+		__os_freestr(dbenv->db_tmp_dir);
 
 	return (ret);
 }
@@ -261,7 +301,7 @@ db_appexit(dbenv)
 #define	DB_ADDSTR(str) {						\
 	if ((str) != NULL) {						\
 		/* If leading slash, start over. */			\
-		if (__db_abspath(str)) {				\
+		if (__os_abspath(str)) {				\
 			p = start;					\
 			slash = 0;					\
 		}							\
@@ -317,10 +357,9 @@ __db_appname(dbenv, appname, dir, file, tmp_oflags, fdp, namep)
 	 * path, we're done.  If the directory is, simply append the file and
 	 * return.
 	 */
-	if (file != NULL && __db_abspath(file))
-		return ((*namep =
-		    (char *)__db_strdup(file)) == NULL ? ENOMEM : 0);
-	if (dir != NULL && __db_abspath(dir)) {
+	if (file != NULL && __os_abspath(file))
+		return (__os_strdup(file, namep));
+	if (dir != NULL && __os_abspath(dir)) {
 		a = dir;
 		goto done;
 	}
@@ -417,7 +456,7 @@ retry:	switch (appname) {
 	if (0) {
 tmp:		if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_APPINIT)) {
 			memset(&etmp, 0, sizeof(etmp));
-			if ((ret = __db_tmp_dir(&etmp, DB_USE_ENVIRON)) != 0)
+			if ((ret = __os_tmpdir(&etmp, DB_USE_ENVIRON)) != 0)
 				return (ret);
 			tmp_free = 1;
 			a = etmp.db_tmp_dir;
@@ -437,12 +476,11 @@ done:	len =
 	 * name.
 	 */
 #define	DB_TRAIL	"XXXXXX"
-	if ((start =
-	    (char *)__db_malloc(len + sizeof(DB_TRAIL) + 10)) == NULL) {
-		__db_err(dbenv, "%s", strerror(ENOMEM));
+	if ((ret =
+	    __os_malloc(len + sizeof(DB_TRAIL) + 10, NULL, &start)) != 0) {
 		if (tmp_free)
-			FREES(etmp.db_tmp_dir);
-		return (ENOMEM);
+			__os_freestr(etmp.db_tmp_dir);
+		return (ret);
 	}
 
 	slash = 0;
@@ -452,28 +490,32 @@ done:	len =
 	DB_ADDSTR(file);
 	*p = '\0';
 
+	/* Discard any space allocated to find the temp directory. */
+	if (tmp_free) {
+		__os_freestr(etmp.db_tmp_dir);
+		tmp_free = 0;
+	}
+
 	/*
 	 * If we're opening a data file, see if it exists.  If it does,
 	 * return it, otherwise, try and find another one to open.
 	 */
-	if (data_entry != -1 && __db_exists(start, NULL) != 0) {
-		FREES(start);
+	if (data_entry != -1 && __os_exists(start, NULL) != 0) {
+		__os_freestr(start);
 		a = b = c = NULL;
 		goto retry;
 	}
 
-	/* Discard any space allocated to find the temp directory. */
-	if (tmp_free)
-		FREES(etmp.db_tmp_dir);
-
 	/* Create the file if so requested. */
 	if (tmp_create &&
 	    (ret = __db_tmp_open(dbenv, tmp_oflags, start, fdp)) != 0) {
-		FREES(start);
+		__os_freestr(start);
 		return (ret);
 	}
 
-	if (namep != NULL)
+	if (namep == NULL)
+		__os_freestr(start);
+	else
 		*namep = start;
 	return (0);
 }
@@ -511,11 +553,7 @@ __db_home(dbenv, db_home, flags)
 	if (p == NULL)
 		return (0);
 
-	if ((dbenv->db_home = (char *)__db_strdup(p)) == NULL) {
-		__db_err(dbenv, "%s", strerror(ENOMEM));
-		return (ENOMEM);
-	}
-	return (0);
+	return (__os_strdup(p, &dbenv->db_home));
 }
 
 /*
@@ -530,152 +568,73 @@ __db_parse(dbenv, s)
 	int ret;
 	char *local_s, *name, *value, **p, *tp;
 
-	ret = 0;
-
 	/*
 	 * We need to strdup the argument in case the caller passed us
 	 * static data.
 	 */
-	if ((local_s = (char *)__db_strdup(s)) == NULL)
-		return (ENOMEM);
+	if ((ret = __os_strdup(s, &local_s)) != 0)
+		return (ret);
 
-	tp = local_s;
-	while ((name = strsep(&tp, " \t")) != NULL && *name == '\0')
+	/*
+	 * Name/value pairs are parsed as two white-space separated strings.
+	 * Leading and trailing white-space is trimmed from the value, but
+	 * it may contain embedded white-space.  Note: we use the isspace(3)
+	 * macro because it's more portable, but that means that you can use
+	 * characters like form-feed to separate the strings.
+	 */
+	name = local_s;
+	for (tp = name; *tp != '\0' && !isspace(*tp); ++tp)
+		;
+	if (*tp == '\0' || tp == name)
+		goto illegal;
+	*tp = '\0';
+	for (++tp; isspace(*tp); ++tp)
 		;
-	if (name == NULL)
+	if (*tp == '\0')
 		goto illegal;
-	while ((value = strsep(&tp, " \t")) != NULL && *value == '\0')
+	value = tp;
+	for (++tp; *tp != '\0'; ++tp)
+		;
+	for (--tp; isspace(*tp); --tp)
 		;
-	if (value == NULL) {
+	if (tp == value) {
 illegal:	ret = EINVAL;
 		__db_err(dbenv, "illegal name-value pair: %s", s);
 		goto err;
 	}
+	*++tp = '\0';
 
 #define	DATA_INIT_CNT	20			/* Start with 20 data slots. */
 	if (!strcmp(name, "DB_DATA_DIR")) {
 		if (dbenv->db_data_dir == NULL) {
-			if ((dbenv->db_data_dir =
-			    (char **)__db_calloc(DATA_INIT_CNT,
-			    sizeof(char **))) == NULL)
-				goto nomem;
+			if ((ret = __os_calloc(DATA_INIT_CNT,
+			    sizeof(char **), &dbenv->db_data_dir)) != 0)
+				goto err;
 			dbenv->data_cnt = DATA_INIT_CNT;
 		} else if (dbenv->data_next == dbenv->data_cnt - 1) {
 			dbenv->data_cnt *= 2;
-			if ((dbenv->db_data_dir =
-			    (char **)__db_realloc(dbenv->db_data_dir,
-			    dbenv->data_cnt * sizeof(char **))) == NULL)
-				goto nomem;
+			if ((ret = __os_realloc(&dbenv->db_data_dir,
+			    dbenv->data_cnt * sizeof(char **))) != 0)
+				goto err;
 		}
 		p = &dbenv->db_data_dir[dbenv->data_next++];
 	} else if (!strcmp(name, "DB_LOG_DIR")) {
 		if (dbenv->db_log_dir != NULL)
-			FREES(dbenv->db_log_dir);
+			__os_freestr(dbenv->db_log_dir);
 		p = &dbenv->db_log_dir;
 	} else if (!strcmp(name, "DB_TMP_DIR")) {
 		if (dbenv->db_tmp_dir != NULL)
-			FREES(dbenv->db_tmp_dir);
+			__os_freestr(dbenv->db_tmp_dir);
 		p = &dbenv->db_tmp_dir;
 	} else
 		goto err;
 
-	if ((*p = (char *)__db_strdup(value)) == NULL) {
-nomem:		ret = ENOMEM;
-		__db_err(dbenv, "%s", strerror(ENOMEM));
-	}
+	ret = __os_strdup(value, p);
 
-err:	FREES(local_s);
+err:	__os_freestr(local_s);
 	return (ret);
 }
 
-#ifdef macintosh
-#include <TFileSpec.h>
-
-static char *sTempFolder;
-#endif
-
-/*
- * tmp --
- *	Set the temporary directory path.
- */
-static int
-__db_tmp_dir(dbenv, flags)
-	DB_ENV *dbenv;
-	u_int32_t flags;
-{
-	static const char * list[] = {	/* Ordered: see db_appinit(3). */
-		"/var/tmp",
-		"/usr/tmp",
-		"/temp",		/* WIN32. */
-		"/tmp",
-		"C:/temp",		/* WIN32. */
-		"C:/tmp",		/* WIN32. */
-		NULL
-	};
-	const char **lp, *p;
-
-	/* Use the environment if it's permitted and initialized. */
-	p = NULL;
-#ifdef HAVE_GETEUID
-	if (LF_ISSET(DB_USE_ENVIRON) ||
-	    (LF_ISSET(DB_USE_ENVIRON_ROOT) && getuid() == 0)) {
-#else
-	if (LF_ISSET(DB_USE_ENVIRON)) {
-#endif
-		if ((p = getenv("TMPDIR")) != NULL && p[0] == '\0') {
-			__db_err(dbenv, "illegal TMPDIR environment variable");
-			return (EINVAL);
-		}
-		/* WIN32 */
-		if (p == NULL && (p = getenv("TEMP")) != NULL && p[0] == '\0') {
-			__db_err(dbenv, "illegal TEMP environment variable");
-			return (EINVAL);
-		}
-		/* WIN32 */
-		if (p == NULL && (p = getenv("TMP")) != NULL && p[0] == '\0') {
-			__db_err(dbenv, "illegal TMP environment variable");
-			return (EINVAL);
-		}
-		/* Macintosh */
-		if (p == NULL &&
-		    (p = getenv("TempFolder")) != NULL && p[0] == '\0') {
-			__db_err(dbenv,
-			    "illegal TempFolder environment variable");
-			return (EINVAL);
-		}
-	}
-
-#ifdef macintosh
-	/* Get the path to the temporary folder. */
-	if (p == NULL) {
-		FSSpec spec;
-
-		if (!Special2FSSpec(kTemporaryFolderType,
-		    kOnSystemDisk, 0, &spec)) {
-			p = FSp2FullPath(&spec);
-			sTempFolder = __db_malloc(strlen(p) + 1);
-			strcpy(sTempFolder, p);
-			p = sTempFolder;
-		}
-	}
-#endif
-
-	/* Step through the list looking for a possibility. */
-	if (p == NULL)
-		for (lp = list; *lp != NULL; ++lp)
-			if (__db_exists(p = *lp, NULL) == 0)
-				break;
-
-	if (p == NULL)
-		return (0);
-
-	if ((dbenv->db_tmp_dir = (char *)__db_strdup(p)) == NULL) {
-		__db_err(dbenv, "%s", strerror(ENOMEM));
-		return (ENOMEM);
-	}
-	return (0);
-}
-
 /*
  * __db_tmp_open --
  *	Create a temporary file.
@@ -687,9 +646,6 @@ __db_tmp_open(dbenv, flags, path, fdp)
 	char *path;
 	int *fdp;
 {
-#ifdef HAVE_SIGFILLSET
-	sigset_t set, oset;
-#endif
 	u_long pid;
 	int mode, isdir, ret;
 	const char *p;
@@ -699,7 +655,7 @@ __db_tmp_open(dbenv, flags, path, fdp)
 	 * Check the target directory; if you have six X's and it doesn't
 	 * exist, this runs for a *very* long time.
 	 */
-	if ((ret = __db_exists(path, &isdir)) != 0) {
+	if ((ret = __os_exists(path, &isdir)) != 0) {
 		__db_err(dbenv, "%s: %s", path, strerror(ret));
 		return (ret);
 	}
@@ -738,27 +694,9 @@ __db_tmp_open(dbenv, flags, path, fdp)
 	LF_SET(DB_CREATE | DB_EXCL);
 	mode = __db_omode("rw----");
 
-	/*
-	 * Try to open a file.  We block every signal we can get our hands
-	 * on so that, if we're interrupted at the wrong time, the temporary
-	 * file isn't left around -- of course, if we drop core in-between
-	 * the calls we'll hang forever, but that's probably okay.  ;-}
-	 */
-#ifdef HAVE_SIGFILLSET
-	if (LF_ISSET(DB_TEMPORARY))
-		(void)sigfillset(&set);
-#endif
+	/* Loop, trying to open a file. */
 	for (;;) {
-#ifdef HAVE_SIGFILLSET
-		if (LF_ISSET(DB_TEMPORARY))
-			(void)sigprocmask(SIG_BLOCK, &set, &oset);
-#endif
-		ret = __db_open(path, flags, flags, mode, fdp);
-#ifdef HAVE_SIGFILLSET
-		if (LF_ISSET(DB_TEMPORARY))
-			(void)sigprocmask(SIG_SETMASK, &oset, NULL);
-#endif
-		if (ret == 0)
+		if ((ret = __db_open(path, flags, flags, mode, fdp)) == 0)
 			return (0);
 
 		/*
diff --git a/db2/common/db_apprec.c b/db2/common/db_apprec.c
index 7f0cb3a212..5e8fec4659 100644
--- a/db2/common/db_apprec.c
+++ b/db2/common/db_apprec.c
@@ -11,7 +11,7 @@
 static const char copyright[] =
 "@(#) Copyright (c) 1996, 1997, 1998\n\
 	Sleepycat Software Inc.  All rights reserved.\n";
-static const char sccsid[] = "@(#)db_apprec.c	10.30 (Sleepycat) 5/3/98";
+static const char sccsid[] = "@(#)db_apprec.c	10.33 (Sleepycat) 10/5/98";
 #endif
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -44,7 +44,8 @@ __db_apprec(dbenv, flags)
 {
 	DBT data;
 	DB_LOG *lp;
-	DB_LSN ckp_lsn, first_lsn, lsn;
+	DB_LSN ckp_lsn, first_lsn, lsn, open_lsn;
+	__txn_ckp_args *ckp_args;
 	time_t now;
 	u_int32_t is_thread;
 	int ret;
@@ -65,10 +66,16 @@ __db_apprec(dbenv, flags)
 
 	/*
 	 * Recovery is done in three passes:
+	 * Pass #0:
+	 *	We need to find the position from which we will open files
+	 *	We need to open files beginning with the last to next
+	 *	checkpoint because we might have crashed after writing the
+	 * 	last checkpoint record, but before having written out all
+	 *	the open file information.
 	 * Pass #1:
-	 *	Read forward through the log from the last checkpoint to the
-	 *	end of the log, opening and closing files so that at the end
-	 *	of the log we have the "current" set of files open.
+	 *	Read forward through the log from the second to last checkpoint
+	 *	opening and closing files so that at the end of the log we have
+	 *	the "current" set of files open.
 	 * Pass #2:
 	 *	Read backward through the log undoing any uncompleted TXNs.
 	 *	If doing catastrophic recovery, we read to the beginning of
@@ -84,33 +91,50 @@ __db_apprec(dbenv, flags)
 	 */
 
 	/*
-	 * Find the last checkpoint in the log.  This is the point from which
-	 * we want to begin pass #1 (the TXN_OPENFILES pass).
+	 * Find the second to last checkpoint in the log.  This is the point
+	 * from which we want to begin pass #1 (the TXN_OPENFILES pass).
 	 */
 	memset(&data, 0, sizeof(data));
+	ckp_args = NULL;
+
 	if ((ret = log_get(lp, &ckp_lsn, &data, DB_CHECKPOINT)) != 0) {
 		/*
 		 * If we don't find a checkpoint, start from the beginning.
 		 * If that fails, we're done.  Note, we do not require that
 		 * there be log records if we're performing recovery.
 		 */
-		if ((ret = log_get(lp, &ckp_lsn, &data, DB_FIRST)) != 0) {
+first:		if ((ret = log_get(lp, &ckp_lsn, &data, DB_FIRST)) != 0) {
 			if (ret == DB_NOTFOUND)
 				ret = 0;
 			else
 				__db_err(dbenv, "First log record not found");
 			goto out;
 		}
-	}
+		open_lsn = ckp_lsn;
+	} else if ((ret = __txn_ckp_read(data.data, &ckp_args)) != 0) {
+		__db_err(dbenv, "Invalid checkpoint record at [%ld][%ld]\n",
+		    (u_long)ckp_lsn.file, (u_long)ckp_lsn.offset);
+		goto out;
+	} else if (IS_ZERO_LSN(ckp_args->last_ckp) ||
+		(ret = log_get(lp, &ckp_args->last_ckp, &data, DB_SET)) != 0)
+		goto first;
+	else
+		open_lsn = ckp_args->last_ckp;
 
 	/*
 	 * Now, ckp_lsn is either the lsn of the last checkpoint or the lsn
-	 * of the first record in the log.  Begin the TXN_OPENFILES pass from
-	 * that lsn, and proceed to the end of the log.
+	 * of the first record in the log.  Open_lsn is the second to last
+	 * checkpoint or the beinning of the log; begin the TXN_OPENFILES
+	 * pass from that lsn, and proceed to the end of the log.
 	 */
-	lsn = ckp_lsn;
+	lsn = open_lsn;
 	for (;;) {
-		ret = __db_dispatch(lp, &data, &lsn, TXN_OPENFILES, txninfo);
+		if (dbenv->tx_recover != NULL)
+			ret = dbenv->tx_recover(lp,
+			    &data, &lsn, TXN_OPENFILES, txninfo);
+		else
+			ret = __db_dispatch(lp,
+			    &data, &lsn, TXN_OPENFILES, txninfo);
 		if (ret != 0 && ret != DB_TXN_CKP)
 			goto msgerr;
 		if ((ret = log_get(lp, &lsn, &data, DB_NEXT)) != 0) {
@@ -148,8 +172,12 @@ __db_apprec(dbenv, flags)
 	for (ret = log_get(lp, &lsn, &data, DB_LAST);
 	    ret == 0 && log_compare(&lsn, &first_lsn) > 0;
 	    ret = log_get(lp, &lsn, &data, DB_PREV)) {
-		ret = __db_dispatch(lp,
-		    &data, &lsn, TXN_BACKWARD_ROLL, txninfo);
+		if (dbenv->tx_recover != NULL)
+			ret = dbenv->tx_recover(lp,
+			    &data, &lsn, TXN_BACKWARD_ROLL, txninfo);
+		else
+			ret = __db_dispatch(lp,
+			    &data, &lsn, TXN_BACKWARD_ROLL, txninfo);
 		if (ret != 0) {
 			if (ret != DB_TXN_CKP)
 				goto msgerr;
@@ -165,7 +193,12 @@ __db_apprec(dbenv, flags)
 	 */
 	for (ret = log_get(lp, &lsn, &data, DB_NEXT);
 	    ret == 0; ret = log_get(lp, &lsn, &data, DB_NEXT)) {
-		ret = __db_dispatch(lp, &data, &lsn, TXN_FORWARD_ROLL, txninfo);
+		if (dbenv->tx_recover != NULL)
+			ret = dbenv->tx_recover(lp,
+			    &data, &lsn, TXN_FORWARD_ROLL, txninfo);
+		else
+			ret = __db_dispatch(lp,
+			    &data, &lsn, TXN_FORWARD_ROLL, txninfo);
 		if (ret != 0) {
 			if (ret != DB_TXN_CKP)
 				goto msgerr;
@@ -207,6 +240,8 @@ msgerr:		__db_err(dbenv, "Recovery function for LSN %lu %lu failed",
 
 out:	F_SET(lp, is_thread);
 	__db_txnlist_end(txninfo);
+	if (ckp_args != NULL)
+		__os_free(ckp_args, sizeof(*ckp_args));
 
 	return (ret);
 }
diff --git a/db2/common/db_err.c b/db2/common/db_err.c
index 98a414279e..e935ddfcc5 100644
--- a/db2/common/db_err.c
+++ b/db2/common/db_err.c
@@ -8,13 +8,15 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_err.c	10.25 (Sleepycat) 5/2/98";
+static const char sccsid[] = "@(#)db_err.c	10.42 (Sleepycat) 11/24/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
 #include <sys/types.h>
 
 #include <errno.h>
+#include <stdio.h>
+#include <string.h>
 
 #ifdef __STDC__
 #include <stdarg.h>
@@ -24,10 +26,67 @@ static const char sccsid[] = "@(#)db_err.c	10.25 (Sleepycat) 5/2/98";
 #endif
 
 #include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "lock.h"
+#include "lock_ext.h"
+#include "log.h"
+#include "log_ext.h"
+#include "mp.h"
+#include "mp_ext.h"
+#include "txn.h"
+#include "txn_ext.h"
 #include "common_ext.h"
+#include "clib_ext.h"
 
-static int __db_keyempty __P((const DB_ENV *));
-static int __db_rdonly __P((const DB_ENV *, const char *));
+/*
+ * __db_fchk --
+ *	General flags checking routine.
+ *
+ * PUBLIC: int __db_fchk __P((DB_ENV *, const char *, u_int32_t, u_int32_t));
+ */
+int
+__db_fchk(dbenv, name, flags, ok_flags)
+	DB_ENV *dbenv;
+	const char *name;
+	u_int32_t flags, ok_flags;
+{
+	return (flags & ~ok_flags ?  __db_ferr(dbenv, name, 0) : 0);
+}
+
+/*
+ * __db_fcchk --
+ *	General combination flags checking routine.
+ *
+ * PUBLIC: int __db_fcchk
+ * PUBLIC:    __P((DB_ENV *, const char *, u_int32_t, u_int32_t, u_int32_t));
+ */
+int
+__db_fcchk(dbenv, name, flags, flag1, flag2)
+	DB_ENV *dbenv;
+	const char *name;
+	u_int32_t flags, flag1, flag2;
+{
+	return ((flags & flag1) &&
+	    (flags & flag2) ?  __db_ferr(dbenv, name, 1) : 0);
+}
+
+/*
+ * __db_ferr --
+ *	Common flag errors.
+ *
+ * PUBLIC: int __db_ferr __P((const DB_ENV *, const char *, int));
+ */
+int
+__db_ferr(dbenv, name, iscombo)
+	const DB_ENV *dbenv;
+	const char *name;
+	int iscombo;
+{
+	__db_err(dbenv, "illegal flag %sspecified to %s",
+	    iscombo ? "combination " : "", name);
+	return (EINVAL);
+}
 
 /*
  * __db_err --
@@ -55,561 +114,98 @@ __db_err(dbenv, fmt, va_alist)
 	if (dbenv == NULL)
 		return;
 
+	if (dbenv->db_errcall != NULL) {
 #ifdef __STDC__
-	va_start(ap, fmt);
+         	va_start(ap, fmt);
 #else
-	va_start(ap);
+	        va_start(ap);
 #endif
-	if (dbenv->db_errcall != NULL) {
 		(void)vsnprintf(errbuf, sizeof(errbuf), fmt, ap);
 		dbenv->db_errcall(dbenv->db_errpfx, errbuf);
+		va_end(ap);
 	}
 	if (dbenv->db_errfile != NULL) {
 		if (dbenv->db_errpfx != NULL)
 			(void)fprintf(dbenv->db_errfile, "%s: ",
 			    dbenv->db_errpfx);
+#ifdef __STDC__
+         	va_start(ap, fmt);
+#else
+	        va_start(ap);
+#endif
 		(void)vfprintf(dbenv->db_errfile, fmt, ap);
 		(void)fprintf(dbenv->db_errfile, "\n");
 		(void)fflush(dbenv->db_errfile);
+		va_end(ap);
 	}
-	va_end(ap);
-}
-
-/*
- * XXX
- * Provide ANSI C prototypes for the panic functions.  Some compilers, (e.g.,
- * MS VC 4.2) get upset if they aren't here, even though the K&R declaration
- * appears before the assignment in the __db__panic() call.
- */
-static int __db_ecursor __P((DB *, DB_TXN *, DBC **));
-static int __db_edel __P((DB *, DB_TXN *, DBT *, u_int32_t));
-static int __db_efd __P((DB *, int *));
-static int __db_egp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
-static int __db_estat __P((DB *, void *, void *(*)(size_t), u_int32_t));
-static int __db_esync __P((DB *, u_int32_t));
-
-/*
- * __db_ecursor --
- *	After-panic cursor routine.
- */
-static int
-__db_ecursor(a, b, c)
-	DB *a;
-	DB_TXN *b;
-	DBC **c;
-{
-	COMPQUIET(a, NULL);
-	COMPQUIET(b, NULL);
-	COMPQUIET(c, NULL);
-
-	return (EPERM);
-}
-
-/*
- * __db_edel --
- *	After-panic delete routine.
- */
-static int
-__db_edel(a, b, c, d)
-	DB *a;
-	DB_TXN *b;
-	DBT *c;
-	u_int32_t d;
-{
-	COMPQUIET(a, NULL);
-	COMPQUIET(b, NULL);
-	COMPQUIET(c, NULL);
-	COMPQUIET(d, 0);
-
-	return (EPERM);
 }
 
 /*
- * __db_efd --
- *	After-panic fd routine.
- */
-static int
-__db_efd(a, b)
-	DB *a;
-	int *b;
-{
-	COMPQUIET(a, NULL);
-	COMPQUIET(b, NULL);
-
-	return (EPERM);
-}
-
-/*
- * __db_egp --
- *	After-panic get/put routine.
- */
-static int
-__db_egp(a, b, c, d, e)
-	DB *a;
-	DB_TXN *b;
-	DBT *c, *d;
-	u_int32_t e;
-{
-	COMPQUIET(a, NULL);
-	COMPQUIET(b, NULL);
-	COMPQUIET(c, NULL);
-	COMPQUIET(d, NULL);
-	COMPQUIET(e, 0);
-
-	return (EPERM);
-}
-
-/*
- * __db_estat --
- *	After-panic stat routine.
- */
-static int
-__db_estat(a, b, c, d)
-	DB *a;
-	void *b;
-	void *(*c) __P((size_t));
-	u_int32_t d;
-{
-	COMPQUIET(a, NULL);
-	COMPQUIET(b, NULL);
-	COMPQUIET(c, NULL);
-	COMPQUIET(d, 0);
-
-	return (EPERM);
-}
-
-/*
- * __db_esync --
- *	After-panic sync routine.
- */
-static int
-__db_esync(a, b)
-	DB *a;
-	u_int32_t b;
-{
-	COMPQUIET(a, NULL);
-	COMPQUIET(b, 0);
-
-	return (EPERM);
-}
-
-/*
- * __db_panic --
- *	Lock out the tree due to unrecoverable error.
+ * __db_pgerr --
+ *	Error when unable to retrieve a specified page.
  *
- * PUBLIC: int __db_panic __P((DB *));
+ * PUBLIC: int __db_pgerr __P((DB *, db_pgno_t));
  */
 int
-__db_panic(dbp)
+__db_pgerr(dbp, pgno)
 	DB *dbp;
+	db_pgno_t pgno;
 {
 	/*
-	 * XXX
-	 * We should shut down all of the process's cursors, too.
-	 *
-	 * We should call mpool and have it shut down the file, so we get
-	 * other processes sharing this file as well.
-	 *
-	 *	Chaos reigns within.
-	 *	Reflect, repent, and reboot.
-	 *	Order shall return.
+	 * Three things are certain:
+	 * Death, taxes, and lost data.
+	 * Guess which has occurred.
 	 */
-	dbp->cursor = __db_ecursor;
-	dbp->del = __db_edel;
-	dbp->fd = __db_efd;
-	dbp->get = __db_egp;
-	dbp->put = __db_egp;
-	dbp->stat = __db_estat;
-	dbp->sync = __db_esync;
-
-	return (EPERM);
+	__db_err(dbp->dbenv,
+	    "unable to create/retrieve page %lu", (u_long)pgno);
+	return (__db_panic(dbp->dbenv, EIO));
 }
 
-/* Check for invalid flags. */
-#undef	DB_CHECK_FLAGS
-#define	DB_CHECK_FLAGS(dbenv, name, flags, ok_flags)			\
-	if ((flags) & ~(ok_flags))					\
-		return (__db_ferr(dbenv, name, 0));
-/* Check for invalid flag combinations. */
-#undef	DB_CHECK_FCOMBO
-#define	DB_CHECK_FCOMBO(dbenv, name, flags, flag1, flag2)		\
-	if ((flags) & (flag1) && (flags) & (flag2))			\
-		return (__db_ferr(dbenv, name, 1));
-
 /*
- * __db_fchk --
- *	General flags checking routine.
+ * __db_pgfmt --
+ *	Error when a page has the wrong format.
  *
- * PUBLIC: int __db_fchk __P((DB_ENV *, const char *, u_int32_t, u_int32_t));
+ * PUBLIC: int __db_pgfmt __P((DB *, db_pgno_t));
  */
 int
-__db_fchk(dbenv, name, flags, ok_flags)
-	DB_ENV *dbenv;
-	const char *name;
-	u_int32_t flags, ok_flags;
+__db_pgfmt(dbp, pgno)
+	DB *dbp;
+	db_pgno_t pgno;
 {
-	DB_CHECK_FLAGS(dbenv, name, flags, ok_flags);
-	return (0);
+	__db_err(dbp->dbenv,
+	    "page %lu: illegal page type or format", (u_long)pgno);
+	return (__db_panic(dbp->dbenv, EINVAL));
 }
 
 /*
- * __db_fcchk --
- *	General combination flags checking routine.
+ * __db_panic --
+ *	Lock out the tree due to unrecoverable error.
  *
- * PUBLIC: int __db_fcchk
- * PUBLIC:    __P((DB_ENV *, const char *, u_int32_t, u_int32_t, u_int32_t));
+ * PUBLIC: int __db_panic __P((DB_ENV *, int));
  */
 int
-__db_fcchk(dbenv, name, flags, flag1, flag2)
+__db_panic(dbenv, errval)
 	DB_ENV *dbenv;
-	const char *name;
-	u_int32_t flags, flag1, flag2;
+	int errval;
 {
-	DB_CHECK_FCOMBO(dbenv, name, flags, flag1, flag2);
-	return (0);
-}
+	if (dbenv != NULL) {
+		dbenv->db_panic = errval;
 
-/*
- * __db_cdelchk --
- *	Common cursor delete argument checking routine.
- *
- * PUBLIC: int __db_cdelchk __P((const DB *, u_int32_t, int, int));
- */
-int
-__db_cdelchk(dbp, flags, isrdonly, isvalid)
-	const DB *dbp;
-	u_int32_t flags;
-	int isrdonly, isvalid;
-{
-	/* Check for changes to a read-only tree. */
-	if (isrdonly)
-		return (__db_rdonly(dbp->dbenv, "c_del"));
+		(void)__log_panic(dbenv);
+		(void)__memp_panic(dbenv);
+		(void)__lock_panic(dbenv);
+		(void)__txn_panic(dbenv);
 
-	/* Check for invalid dbc->c_del() function flags. */
-	DB_CHECK_FLAGS(dbp->dbenv, "c_del", flags, 0);
-
-	/*
-	 * The cursor must be initialized, return -1 for an invalid cursor,
-	 * otherwise 0.
-	 */
-	return (isvalid ? 0 : EINVAL);
-}
+		__db_err(dbenv, "PANIC: %s", strerror(errval));
 
-/*
- * __db_cgetchk --
- *	Common cursor get argument checking routine.
- *
- * PUBLIC: int __db_cgetchk __P((const DB *, DBT *, DBT *, u_int32_t, int));
- */
-int
-__db_cgetchk(dbp, key, data, flags, isvalid)
-	const DB *dbp;
-	DBT *key, *data;
-	u_int32_t flags;
-	int isvalid;
-{
-	int key_einval, key_flags;
-
-	key_flags = key_einval = 0;
-
-	/* Check for invalid dbc->c_get() function flags. */
-	switch (flags) {
-	case DB_CURRENT:
-	case DB_FIRST:
-	case DB_LAST:
-	case DB_NEXT:
-	case DB_PREV:
-		key_flags = 1;
-		break;
-	case DB_SET_RANGE:
-		key_einval = key_flags = 1;
-		break;
-	case DB_SET:
-		key_einval = 1;
-		break;
-	case DB_GET_RECNO:
-		if (!F_ISSET(dbp, DB_BT_RECNUM))
-			goto err;
-		break;
-	case DB_SET_RECNO:
-		if (!F_ISSET(dbp, DB_BT_RECNUM))
-			goto err;
-		key_einval = key_flags = 1;
-		break;
-	default:
-err:		return (__db_ferr(dbp->dbenv, "c_get", 0));
+		if (dbenv->db_paniccall != NULL)
+			dbenv->db_paniccall(dbenv, errval);
 	}
 
-	/* Check for invalid key/data flags. */
-	if (key_flags)
-		DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
-		    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
-	DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
-	    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
-
-	/* Check dbt's for valid flags when multi-threaded. */
-	if (F_ISSET(dbp, DB_AM_THREAD)) {
-		if (!F_ISSET(data, DB_DBT_USERMEM | DB_DBT_MALLOC))
-			return (__db_ferr(dbp->dbenv, "threaded data", 1));
-		if (key_flags &&
-		    !F_ISSET(key, DB_DBT_USERMEM | DB_DBT_MALLOC))
-			return (__db_ferr(dbp->dbenv, "threaded key", 1));
-	}
-
-	/* Check for missing keys. */
-	if (key_einval && (key->data == NULL || key->size == 0))
-		return (__db_keyempty(dbp->dbenv));
-
 	/*
-	 * The cursor must be initialized for DB_CURRENT, return -1 for an
-	 * invalid cursor, otherwise 0.
+	 * Chaos reigns within.
+	 * Reflect, repent, and reboot.
+	 * Order shall return.
 	 */
-	return (isvalid || flags != DB_CURRENT ? 0 : EINVAL);
-}
-
-/*
- * __db_cputchk --
- *	Common cursor put argument checking routine.
- *
- * PUBLIC: int __db_cputchk __P((const DB *,
- * PUBLIC:    const DBT *, DBT *, u_int32_t, int, int));
- */
-int
-__db_cputchk(dbp, key, data, flags, isrdonly, isvalid)
-	const DB *dbp;
-	const DBT *key;
-	DBT *data;
-	u_int32_t flags;
-	int isrdonly, isvalid;
-{
-	int key_einval, key_flags;
-
-	/* Check for changes to a read-only tree. */
-	if (isrdonly)
-		return (__db_rdonly(dbp->dbenv, "c_put"));
-
-	/* Check for invalid dbc->c_put() function flags. */
-	key_einval = key_flags = 0;
-	switch (flags) {
-	case DB_AFTER:
-	case DB_BEFORE:
-		if (dbp->type == DB_RECNO && !F_ISSET(dbp, DB_RE_RENUMBER))
-			goto err;
-		if (dbp->type != DB_RECNO && !F_ISSET(dbp, DB_AM_DUP))
-			goto err;
-		break;
-	case DB_CURRENT:
-		break;
-	case DB_KEYFIRST:
-	case DB_KEYLAST:
-		if (dbp->type == DB_RECNO)
-			goto err;
-		key_einval = key_flags = 1;
-		break;
-	default:
-err:		return (__db_ferr(dbp->dbenv, "c_put", 0));
-	}
-
-	/* Check for invalid key/data flags. */
-	if (key_flags)
-		DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
-		    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
-	DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
-	    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
-
-	/* Check for missing keys. */
-	if (key_einval && (key->data == NULL || key->size == 0))
-		return (__db_keyempty(dbp->dbenv));
-
-	/*
-	 * The cursor must be initialized for anything other than DB_KEYFIRST
-	 * and DB_KEYLAST, return -1 for an invalid cursor, otherwise 0.
-	 */
-	return (isvalid ||
-	    (flags != DB_KEYFIRST && flags != DB_KEYLAST) ? 0 : EINVAL);
-}
-
-/*
- * __db_delchk --
- *	Common delete argument checking routine.
- *
- * PUBLIC: int __db_delchk __P((const DB *, DBT *, u_int32_t, int));
- */
-int
-__db_delchk(dbp, key, flags, isrdonly)
-	const DB *dbp;
-	DBT *key;
-	u_int32_t flags;
-	int isrdonly;
-{
-	/* Check for changes to a read-only tree. */
-	if (isrdonly)
-		return (__db_rdonly(dbp->dbenv, "delete"));
-
-	/* Check for invalid db->del() function flags. */
-	DB_CHECK_FLAGS(dbp->dbenv, "delete", flags, 0);
-
-	/* Check for missing keys. */
-	if (key->data == NULL || key->size == 0)
-		return (__db_keyempty(dbp->dbenv));
-
-	return (0);
-}
-
-/*
- * __db_getchk --
- *	Common get argument checking routine.
- *
- * PUBLIC: int __db_getchk __P((const DB *, const DBT *, DBT *, u_int32_t));
- */
-int
-__db_getchk(dbp, key, data, flags)
-	const DB *dbp;
-	const DBT *key;
-	DBT *data;
-	u_int32_t flags;
-{
-	/* Check for invalid db->get() function flags. */
-	DB_CHECK_FLAGS(dbp->dbenv,
-	    "get", flags, F_ISSET(dbp, DB_BT_RECNUM) ? DB_SET_RECNO : 0);
-
-	/* Check for invalid key/data flags. */
-	DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags, 0);
-	DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
-	    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
-	DB_CHECK_FCOMBO(dbp->dbenv,
-	    "data", data->flags, DB_DBT_MALLOC, DB_DBT_USERMEM);
-	if (F_ISSET(dbp, DB_AM_THREAD) &&
-	    !F_ISSET(data, DB_DBT_MALLOC | DB_DBT_USERMEM))
-		return (__db_ferr(dbp->dbenv, "threaded data", 1));
-
-	/* Check for missing keys. */
-	if (key->data == NULL || key->size == 0)
-		return (__db_keyempty(dbp->dbenv));
-
-	return (0);
-}
-
-/*
- * __db_putchk --
- *	Common put argument checking routine.
- *
- * PUBLIC: int __db_putchk
- * PUBLIC:    __P((const DB *, DBT *, const DBT *, u_int32_t, int, int));
- */
-int
-__db_putchk(dbp, key, data, flags, isrdonly, isdup)
-	const DB *dbp;
-	DBT *key;
-	const DBT *data;
-	u_int32_t flags;
-	int isrdonly, isdup;
-{
-	/* Check for changes to a read-only tree. */
-	if (isrdonly)
-		return (__db_rdonly(dbp->dbenv, "put"));
-
-	/* Check for invalid db->put() function flags. */
-	DB_CHECK_FLAGS(dbp->dbenv, "put", flags,
-	    DB_NOOVERWRITE | (dbp->type == DB_RECNO ? DB_APPEND : 0));
-
-	/* Check for invalid key/data flags. */
-	DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags, 0);
-	DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
-	    DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
-	DB_CHECK_FCOMBO(dbp->dbenv,
-	    "data", data->flags, DB_DBT_MALLOC, DB_DBT_USERMEM);
-
-	/* Check for missing keys. */
-	if (key->data == NULL || key->size == 0)
-		return (__db_keyempty(dbp->dbenv));
-
-	/* Check for partial puts in the presence of duplicates. */
-	if (isdup && F_ISSET(data, DB_DBT_PARTIAL)) {
-		__db_err(dbp->dbenv,
-"a partial put in the presence of duplicates requires a cursor operation");
-		return (EINVAL);
-	}
-
-	return (0);
-}
-
-/*
- * __db_statchk --
- *	Common stat argument checking routine.
- *
- * PUBLIC: int __db_statchk __P((const DB *, u_int32_t));
- */
-int
-__db_statchk(dbp, flags)
-	const DB *dbp;
-	u_int32_t flags;
-{
-	/* Check for invalid db->stat() function flags. */
-	DB_CHECK_FLAGS(dbp->dbenv, "stat", flags, DB_RECORDCOUNT);
-
-	if (LF_ISSET(DB_RECORDCOUNT) &&
-	    dbp->type == DB_BTREE && !F_ISSET(dbp, DB_BT_RECNUM))
-		return (__db_ferr(dbp->dbenv, "stat", 0));
-
-	return (0);
-}
-
-/*
- * __db_syncchk --
- *	Common sync argument checking routine.
- *
- * PUBLIC: int __db_syncchk __P((const DB *, u_int32_t));
- */
-int
-__db_syncchk(dbp, flags)
-	const DB *dbp;
-	u_int32_t flags;
-{
-	/* Check for invalid db->sync() function flags. */
-	DB_CHECK_FLAGS(dbp->dbenv, "sync", flags, 0);
-
-	return (0);
-}
-
-/*
- * __db_ferr --
- *	Common flag errors.
- *
- * PUBLIC: int __db_ferr __P((const DB_ENV *, const char *, int));
- */
-int
-__db_ferr(dbenv, name, iscombo)
-	const DB_ENV *dbenv;
-	const char *name;
-	int iscombo;
-{
-	__db_err(dbenv, "illegal flag %sspecified to %s",
-	    iscombo ? "combination " : "", name);
-	return (EINVAL);
-}
-
-/*
- * __db_rdonly --
- *	Common readonly message.
- */
-static int
-__db_rdonly(dbenv, name)
-	const DB_ENV *dbenv;
-	const char *name;
-{
-	__db_err(dbenv, "%s: attempt to modify a read-only tree", name);
-	return (EACCES);
-}
-
-/*
- * __db_keyempty --
- *	Common missing or empty key value message.
- */
-static int
-__db_keyempty(dbenv)
-	const DB_ENV *dbenv;
-{
-	__db_err(dbenv, "missing or empty key value specified");
-	return (EINVAL);
+	return (DB_RUNRECOVERY);
 }
diff --git a/db2/common/db_region.c b/db2/common/db_region.c
index 284af6176a..12abfa524d 100644
--- a/db2/common/db_region.c
+++ b/db2/common/db_region.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_region.c	10.46 (Sleepycat) 5/26/98";
+static const char sccsid[] = "@(#)db_region.c	10.53 (Sleepycat) 11/10/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -46,7 +46,7 @@ __db_rattach(infop)
 	ret = retry_cnt = 0;
 
 	/* Round off the requested size to the next page boundary. */
-	DB_ROUNDOFF(infop->size);
+	DB_ROUNDOFF(infop->size, DB_VMPAGESIZE);
 
 	/* Some architectures have hard limits on the maximum region size. */
 #ifdef DB_REGIONSIZE_MAX
@@ -61,7 +61,7 @@ loop:	infop->addr = NULL;
 	infop->fd = -1;
 	infop->segid = INVALID_SEGID;
 	if (infop->name != NULL) {
-		FREES(infop->name);
+		__os_freestr(infop->name);
 		infop->name = NULL;
 	}
 	F_CLR(infop, REGION_CANGROW | REGION_CREATED);
@@ -74,6 +74,11 @@ loop:	infop->addr = NULL;
 	 * (Theoretically, we could probably get a file descriptor to lock
 	 * other types of shared regions, but I don't see any reason to
 	 * bother.)
+	 *
+	 * Since we may be using shared memory regions, e.g., shmget(2),
+	 * and not mmap of regular files, the backing file may be only a
+	 * few tens of bytes in length.  So, this depends on the ability
+	 * to fcntl lock file offsets much larger than the physical file.
 	 */
 	malloc_possible = 0;
 #endif
@@ -91,15 +96,16 @@ loop:	infop->addr = NULL;
 	 * than either anonymous memory or a shared file.
 	 */
 	if (malloc_possible && F_ISSET(infop, REGION_PRIVATE)) {
-		if ((infop->addr = __db_malloc(infop->size)) == NULL)
-			return (ENOMEM);
+		if ((ret = __os_malloc(infop->size, NULL, &infop->addr)) != 0)
+			return (ret);
 
 		/*
-		 * It's sometimes significantly faster to page-fault in all
-		 * of the region's pages before we run the application, as
-		 * we can see fairly nasty side-effects when we page-fault
-		 * while holding various locks, i.e., the lock takes a long
-		 * time, and other threads convoy behind the lock holder.
+		 * It's sometimes significantly faster to page-fault in all of
+		 * the region's pages before we run the application, as we see
+		 * nasty side-effects when we page-fault while holding various
+		 * locks, i.e., the lock takes a long time to acquire because
+		 * of the underlying page fault, and the other threads convoy
+		 * behind the lock holder.
 		 */
 		if (DB_GLOBAL(db_region_init))
 			for (p = infop->addr;
@@ -159,7 +165,7 @@ loop:	infop->addr = NULL;
 	 *    3. Memory backed by a regular file (mmap(2)).
 	 *
 	 * We instantiate a backing file in all cases, which contains at least
-	 * the RLAYOUT structure, and in case #4, contains the actual region.
+	 * the RLAYOUT structure, and in case #3, contains the actual region.
 	 * This is necessary for a couple of reasons:
 	 *
 	 * First, the mpool region uses temporary files to name regions, and
@@ -218,7 +224,7 @@ loop:	infop->addr = NULL;
 		 * And yes, this makes me want to take somebody and kill them,
 		 * but I can't think of any other solution.
 		 */
-		if ((ret = __db_ioinfo(infop->name,
+		if ((ret = __os_ioinfo(infop->name,
 		    infop->fd, &mbytes, &bytes, NULL)) != 0)
 			goto errmsg;
 		size = mbytes * MEGABYTE + bytes;
@@ -233,7 +239,7 @@ loop:	infop->addr = NULL;
 			if (size < sizeof(RLAYOUT))
 				goto retry;
 			if ((ret =
-			    __db_read(infop->fd, &rl, sizeof(rl), &nr)) != 0)
+			    __os_read(infop->fd, &rl, sizeof(rl), &nr)) != 0)
 				goto retry;
 			if (rl.valid != DB_REGIONMAGIC)
 				goto retry;
@@ -284,6 +290,7 @@ loop:	infop->addr = NULL;
 		} else
 			goto err;
 	}
+
 region_init:
 	/*
 	 * Initialize the common region information.
@@ -321,6 +328,7 @@ region_init:
 		rlp->refcnt = 1;
 		rlp->size = infop->size;
 		db_version(&rlp->majver, &rlp->minver, &rlp->patch);
+		rlp->panic = 0;
 		rlp->segid = infop->segid;
 		rlp->flags = 0;
 		if (F_ISSET(infop, REGION_ANONYMOUS))
@@ -347,13 +355,19 @@ region_init:
 		 * the file.
 		 */
 		if (F_ISSET(infop, REGION_ANONYMOUS)) {
-			if ((ret = __db_seek(infop->fd, 0, 0, 0, 0, 0)) != 0)
+			if ((ret = __os_seek(infop->fd, 0, 0, 0, 0, 0)) != 0)
 				goto err;
 			if ((ret =
-			    __db_write(infop->fd, rlp, sizeof(*rlp), &nw)) != 0)
+			    __os_write(infop->fd, rlp, sizeof(*rlp), &nw)) != 0)
 				goto err;
 		}
 	} else {
+		/* Check to see if the region has had catastrophic failure. */
+		if (rlp->panic) {
+			ret = DB_RUNRECOVERY;
+			goto err;
+		}
+
 		/*
 		 * Check the valid flag to ensure the region is initialized.
 		 * If the valid flag has not been set, the mutex may not have
@@ -380,18 +394,6 @@ region_init:
 		}
 
 		/*
-		 * Problem #2: We want a bigger region than has previously been
-		 * created.  Detected by checking if the region is smaller than
-		 * our caller requested.  If it is, we grow the region, (which
-		 * does the detach and re-attach for us).
-		 */
-		if (grow_region != 0 &&
-		    (ret = __db_rgrow(infop, grow_region)) != 0) {
-			(void)__db_mutex_unlock(&rlp->lock, infop->fd);
-			goto err;
-		}
-
-		/*
 		 * Problem #3: when we checked the size of the file, it was
 		 * still growing as part of creation.  Detected by the fact
 		 * that infop->size isn't the same size as the region.
@@ -419,16 +421,16 @@ retry:		/* Discard the region. */
 
 		/* Discard the backing file. */
 		if (infop->fd != -1) {
-			(void)__db_close(infop->fd);
+			(void)__os_close(infop->fd);
 			infop->fd = -1;
 
 			if (F_ISSET(infop, REGION_CREATED))
-				(void)__db_unlink(infop->name);
+				(void)__os_unlink(infop->name);
 		}
 
 		/* Discard the name. */
 		if (infop->name != NULL) {
-			FREES(infop->name);
+			__os_freestr(infop->name);
 			infop->name = NULL;
 		}
 
@@ -438,7 +440,7 @@ retry:		/* Discard the region. */
 		 */
 		if (ret == 0) {
 			if (++retry_cnt <= 3) {
-				__db_sleep(retry_cnt * 2, 0);
+				__os_sleep(retry_cnt * 2, 0);
 				goto loop;
 			}
 			ret = EAGAIN;
@@ -481,10 +483,11 @@ retry:		/* Discard the region. */
 			F_SET(infop, REGION_REMOVED);
 			F_CLR(infop, REGION_CANGROW);
 
-			(void)__db_close(infop->fd);
-			(void)__db_unlink(infop->name);
+			(void)__os_close(infop->fd);
+			(void)__os_unlink(infop->name);
 		}
 	}
+
 	return (ret);
 }
 
@@ -514,7 +517,7 @@ __db_rdetach(infop)
 	 * action required is freeing the memory.
 	 */
 	if (F_ISSET(infop, REGION_MALLOC)) {
-		__db_free(infop->addr);
+		__os_free(infop->addr, 0);
 		goto done;
 	}
 
@@ -549,7 +552,7 @@ __db_rdetach(infop)
 	(void)__db_mutex_unlock(&rlp->lock, infop->fd);
 
 	/* Close the backing file descriptor. */
-	(void)__db_close(infop->fd);
+	(void)__os_close(infop->fd);
 	infop->fd = -1;
 
 	/* Discard our mapping of the region. */
@@ -561,13 +564,13 @@ __db_rdetach(infop)
 		if ((t_ret =
 		    __db_unlinkregion(infop->name, infop) != 0) && ret == 0)
 			ret = t_ret;
-		if ((t_ret = __db_unlink(infop->name) != 0) && ret == 0)
+		if ((t_ret = __os_unlink(infop->name) != 0) && ret == 0)
 			ret = t_ret;
 	}
 
 done:	/* Discard the name. */
 	if (infop->name != NULL) {
-		FREES(infop->name);
+		__os_freestr(infop->name);
 		infop->name = NULL;
 	}
 
@@ -629,8 +632,8 @@ __db_runlink(infop, force)
 	 * (REGION_PRIVATE) ones, regardless of whether or not it's used to
 	 * back the region.  If that file doesn't exist, we're done.
 	 */
-	if (__db_exists(name, NULL) != 0) {
-		FREES(name);
+	if (__os_exists(name, NULL) != 0) {
+		__os_freestr(name);
 		return (0);
 	}
 
@@ -641,12 +644,12 @@ __db_runlink(infop, force)
 	 */
 	if ((ret = __db_open(name, DB_RDONLY, DB_RDONLY, 0, &fd)) != 0)
 		goto errmsg;
-	if ((ret = __db_ioinfo(name, fd, &mbytes, &bytes, NULL)) != 0)
+	if ((ret = __os_ioinfo(name, fd, &mbytes, &bytes, NULL)) != 0)
 		goto errmsg;
 	size = mbytes * MEGABYTE + bytes;
 
 	if (size <= sizeof(RLAYOUT)) {
-		if ((ret = __db_read(fd, &rl, sizeof(rl), &nr)) != 0)
+		if ((ret = __os_read(fd, &rl, sizeof(rl), &nr)) != 0)
 			goto errmsg;
 		if (rl.valid != DB_REGIONMAGIC) {
 			__db_err(infop->dbenv,
@@ -673,16 +676,16 @@ __db_runlink(infop, force)
 	 * because some architectures (e.g., Win32) won't unlink a file if
 	 * open file descriptors remain.
 	 */
-	(void)__db_close(fd);
-	if ((t_ret = __db_unlink(name)) != 0 && ret == 0)
+	(void)__os_close(fd);
+	if ((t_ret = __os_unlink(name)) != 0 && ret == 0)
 		ret = t_ret;
 
 	if (0) {
 errmsg:		__db_err(infop->dbenv, "%s: %s", name, strerror(ret));
-err:		(void)__db_close(fd);
+err:		(void)__os_close(fd);
 	}
 
-	FREES(name);
+	__os_freestr(name);
 	return (ret);
 }
 
@@ -715,7 +718,7 @@ __db_rgrow(infop, new_size)
 	 * determine the additional space required.
 	 */
 	rlp = (RLAYOUT *)infop->addr;
-	DB_ROUNDOFF(new_size);
+	DB_ROUNDOFF(new_size, DB_VMPAGESIZE);
 	increment = new_size - rlp->size;
 
 	if ((ret = __db_growregion(infop, increment)) != 0)
@@ -745,7 +748,7 @@ __db_growregion(infop, increment)
 	char buf[DB_VMPAGESIZE];
 
 	/* Seek to the end of the region. */
-	if ((ret = __db_seek(infop->fd, 0, 0, 0, 0, SEEK_END)) != 0)
+	if ((ret = __os_seek(infop->fd, 0, 0, 0, 0, SEEK_END)) != 0)
 		goto err;
 
 	/* Write nuls to the new bytes. */
@@ -760,7 +763,7 @@ __db_growregion(infop, increment)
 		/* Extend the region by writing each new page. */
 		for (i = 0; i < increment; i += DB_VMPAGESIZE) {
 			if ((ret =
-			    __db_write(infop->fd, buf, sizeof(buf), &nw)) != 0)
+			    __os_write(infop->fd, buf, sizeof(buf), &nw)) != 0)
 				goto err;
 			if (nw != sizeof(buf))
 				goto eio;
@@ -776,36 +779,44 @@ __db_growregion(infop, increment)
 		 */
 		pages = (increment - DB_VMPAGESIZE) / MEGABYTE;
 		relative = (increment - DB_VMPAGESIZE) % MEGABYTE;
-		if ((ret = __db_seek(infop->fd,
+		if ((ret = __os_seek(infop->fd,
 		    MEGABYTE, pages, relative, 0, SEEK_CUR)) != 0)
 			goto err;
-		if ((ret = __db_write(infop->fd, buf, sizeof(buf), &nw)) != 0)
+		if ((ret = __os_write(infop->fd, buf, sizeof(buf), &nw)) != 0)
 			goto err;
 		if (nw != sizeof(buf))
 			goto eio;
 
 		/*
-		 * It's sometimes significantly faster to page-fault in all
-		 * of the region's pages before we run the application, as
-		 * we can see fairly nasty side-effects when we page-fault
-		 * while holding various locks, i.e., the lock takes a long
-		 * time, and other threads convoy behind the lock holder.
+		 * It's sometimes significantly faster to page-fault in all of
+		 * the region's pages before we run the application, as we see
+		 * nasty side-effects when we page-fault while holding various
+		 * locks, i.e., the lock takes a long time to acquire because
+		 * of the underlying page fault, and the other threads convoy
+		 * behind the lock holder.
+		 *
+		 * We also use REGION_INIT to guarantee that there is enough
+		 * disk space for the region, so we also write a byte to each
+		 * page.  Reading the byte is insufficient as some systems
+		 * (e.g., Solaris) do not instantiate disk pages to satisfy
+		 * a read, and so we don't know if there is enough disk space
+		 * or not.
 		 */
 		if (DB_GLOBAL(db_region_init)) {
 			pages = increment / MEGABYTE;
 			relative = increment % MEGABYTE;
-			if ((ret = __db_seek(infop->fd,
+			if ((ret = __os_seek(infop->fd,
 			    MEGABYTE, pages, relative, 1, SEEK_END)) != 0)
 				goto err;
 
-			/* Read a byte from each page. */
+			/* Write a byte to each page. */
 			for (i = 0; i < increment; i += DB_VMPAGESIZE) {
 				if ((ret =
-				    __db_read(infop->fd, buf, 1, &nr)) != 0)
+				    __os_write(infop->fd, buf, 1, &nr)) != 0)
 					goto err;
 				if (nr != 1)
 					goto eio;
-				if ((ret = __db_seek(infop->fd,
+				if ((ret = __os_seek(infop->fd,
 				    0, 0, DB_VMPAGESIZE - 1, 0, SEEK_CUR)) != 0)
 					goto err;
 			}
diff --git a/db2/common/db_salloc.c b/db2/common/db_salloc.c
index c02d7e18e9..d58b79f3c4 100644
--- a/db2/common/db_salloc.c
+++ b/db2/common/db_salloc.c
@@ -8,7 +8,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)db_salloc.c	10.13 (Sleepycat) 5/10/98";
+static const char sccsid[] = "@(#)db_salloc.c	10.14 (Sleepycat) 11/16/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -170,7 +170,7 @@ __db_shalloc_free(regionp, ptr)
 
 	/* Trash the returned memory. */
 #ifdef DIAGNOSTIC
-	memset(ptr, 0xff, free_size);
+	memset(ptr, 0xdb, free_size);
 #endif
 
 	/*