about summary refs log tree commit diff
path: root/db2/log
diff options
context:
space:
mode:
Diffstat (limited to 'db2/log')
-rw-r--r--db2/log/log.c438
-rw-r--r--db2/log/log.src53
-rw-r--r--db2/log/log_archive.c413
-rw-r--r--db2/log/log_auto.c351
-rw-r--r--db2/log/log_compare.c34
-rw-r--r--db2/log/log_findckp.c130
-rw-r--r--db2/log/log_get.c355
-rw-r--r--db2/log/log_put.c484
-rw-r--r--db2/log/log_rec.c332
-rw-r--r--db2/log/log_register.c199
10 files changed, 2789 insertions, 0 deletions
diff --git a/db2/log/log.c b/db2/log/log.c
new file mode 100644
index 0000000000..1684ce8cc8
--- /dev/null
+++ b/db2/log/log.c
@@ -0,0 +1,438 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log.c	10.24 (Sleepycat) 8/16/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "log.h"
+#include "db_dispatch.h"
+#include "txn_auto.h"
+#include "common_ext.h"
+
+static int __log_recover __P((DB_ENV *, DB_LOG *));
+
+/*
+ * log_open --
+ *	Initialize and/or join a log.
+ */
+int
+log_open(path, flags, mode, dbenv, lpp)
+	const char *path;
+	int flags;
+	int mode;
+	DB_ENV *dbenv;
+	DB_LOG **lpp;
+{
+	DB_LOG *dblp;
+	LOG *lp;
+	size_t len;
+	int fd, newregion, ret, retry_cnt;
+
+	/* Validate arguments. */
+#ifdef HAVE_SPINLOCKS
+#define	OKFLAGS	(DB_CREATE | DB_THREAD)
+#else
+#define	OKFLAGS	(DB_CREATE)
+#endif
+	if ((ret = __db_fchk(dbenv, "log_open", flags, OKFLAGS)) != 0)
+		return (ret);
+
+	/*
+	 * We store 4-byte offsets into the file, so the maximum file
+	 * size can't be larger than that.
+	 */
+	if (dbenv != NULL && dbenv->lg_max > UINT32_T_MAX) {
+		__db_err(dbenv, "log_open: maximum file size too large");
+		return (EINVAL);
+	}
+
+	/* Create and initialize the DB_LOG structure. */
+	if ((dblp = (DB_LOG *)calloc(1, sizeof(DB_LOG))) == NULL)
+		return (ENOMEM);
+
+	dblp->dbenv = dbenv;
+	dblp->lfd = -1;
+	ZERO_LSN(dblp->c_lsn);
+	dblp->c_fd = -1;
+	if (LF_ISSET(DB_THREAD)) {
+		F_SET(dblp, DB_AM_THREAD);
+		(void)__db_mutex_init(&dblp->mutex, -1);
+	}
+
+	/*
+	 * The log region isn't fixed size because we store the registered
+	 * file names there.  Make it fairly large so that we don't have to
+	 * grow it.
+	 */
+	len = 30 * 1024;
+
+	/* Map in the region. */
+	retry_cnt = newregion = 0;
+retry:	if (LF_ISSET(DB_CREATE)) {
+		ret = __db_rcreate(dbenv, DB_APP_LOG, path,
+		    DB_DEFAULT_LOG_FILE, mode, len, &fd, &dblp->maddr);
+		if (ret == 0) {
+			/* Put the LOG structure first in the region. */
+			lp = dblp->maddr;
+
+			/* Initialize the rest of the region as free space. */
+			dblp->addr = (u_int8_t *)dblp->maddr + sizeof(LOG);
+			__db_shalloc_init(dblp->addr, len - sizeof(LOG));
+
+			/* Initialize the LOG structure. */
+			lp->persist.lg_max = dbenv == NULL ? 0 : dbenv->lg_max;
+			if (lp->persist.lg_max == 0)
+				lp->persist.lg_max = DEFAULT_MAX;
+			lp->persist.magic = DB_LOGMAGIC;
+			lp->persist.version = DB_LOGVERSION;
+			lp->persist.mode = mode;
+			SH_TAILQ_INIT(&lp->fq);
+
+			/* Initialize LOG LSNs. */
+			lp->lsn.file = 1;
+			lp->lsn.offset = 0;
+
+			newregion = 1;
+		} else if (ret != EEXIST)
+			return (ret);
+	}
+
+	/* If we didn't or couldn't create the region, try and join it. */
+	if (!newregion &&
+	    (ret = __db_ropen(dbenv, DB_APP_LOG,
+	    path, DB_DEFAULT_LOG_FILE, 0, &fd, &dblp->maddr)) != 0) {
+		/*
+		 * If we fail because the file isn't available, wait a
+		 * second and try again.
+		 */
+		if (ret == EAGAIN && ++retry_cnt < 3) {
+			(void)__db_sleep(1, 0);
+			goto retry;
+		}
+		return (ret);
+	}
+
+	/* Set up the common information. */
+	dblp->lp = dblp->maddr;
+	dblp->addr = (u_int8_t *)dblp->maddr + sizeof(LOG);
+	dblp->fd = fd;
+
+	/*
+	 * If doing recovery, try and recover any previous log files
+	 * before releasing the lock.
+	 */
+	if (newregion) {
+		if ((ret = __log_recover(dbenv, dblp)) != 0) {
+			log_unlink(path, 1, dbenv);
+			return (ret);
+		}
+		UNLOCK_LOGREGION(dblp);
+	}
+	*lpp = dblp;
+	return (0);
+}
+
+/*
+ * __log_recover --
+ *	Recover a log.
+ */
+static int
+__log_recover(dbenv, dblp)
+	DB_ENV *dbenv;
+	DB_LOG *dblp;
+{
+	DBT dbt;
+	DB_LSN lsn;
+	LOG *lp;
+	u_int32_t chk;
+	int cnt, found_checkpoint, ret;
+
+	lp = dblp->lp;
+
+	/*
+	 * Find a log file.  If none exist, we simply return, leaving
+	 * everything initialized to a new log.
+	 */
+	if ((ret = __log_find(dbenv, lp, &cnt)) != 0)
+		return (ret);
+	if (cnt == 0)
+		return (0);
+
+	/* We have a log file name, find the last one. */
+	while (cnt < MAXLFNAME)
+		if (__log_valid(dbenv, lp, ++cnt) != 0) {
+			--cnt;
+			break;
+		}
+
+	/*
+	 * We have the last useful log file and we've loaded any persistent
+	 * information.  Pretend that the log is larger than it can possibly
+	 * be, and read this file, looking for a checkpoint and its end.
+	 */
+	dblp->c_lsn.file = cnt;
+	dblp->c_lsn.offset = 0;
+	lsn = dblp->c_lsn;
+	lp->lsn.file = cnt + 1;
+	lp->lsn.offset = 0;
+
+	/* Set the cursor.  Shouldn't fail, leave error messages on. */
+	memset(&dbt, 0, sizeof(dbt));
+	if ((ret = __log_get(dblp, &lsn, &dbt, DB_SET, 0)) != 0)
+		return (ret);
+
+	/*
+	 * Read to the end of the file, saving checkpoints.  This will fail
+	 * at some point, so turn off error messages.
+	 */
+	found_checkpoint = 0;
+	while (__log_get(dblp, &lsn, &dbt, DB_NEXT, 1) == 0) {
+		if (dbt.size < sizeof(u_int32_t))
+			continue;
+		memcpy(&chk, dbt.data, sizeof(u_int32_t));
+		if (chk == DB_txn_ckp) {
+			lp->c_lsn = lsn;
+			found_checkpoint = 1;
+		}
+	}
+
+	/*
+	 * We know where the end of the log is.  Since that record is on disk,
+	 * it's also the last-synced LSN.
+	 */
+	lp->lsn = lsn;
+	lp->lsn.offset += dblp->c_len;
+	lp->s_lsn = lp->lsn;
+
+	/* Set up the current buffer information, too. */
+	lp->len = dblp->c_len;
+	lp->b_off = 0;
+	lp->w_off = lp->lsn.offset;
+
+	/*
+	 * It's possible that we didn't find a checkpoint because there wasn't
+	 * one in the last log file.  Start searching.
+	 */
+	while (!found_checkpoint && cnt > 1) {
+		dblp->c_lsn.file = --cnt;
+		dblp->c_lsn.offset = 0;
+		lsn = dblp->c_lsn;
+
+		/* Set the cursor.  Shouldn't fail, leave error messages on. */
+		if ((ret = __log_get(dblp, &lsn, &dbt, DB_SET, 0)) != 0)
+			return (ret);
+
+		/*
+		 * Read to the end of the file, saving checkpoints.  Shouldn't
+		 * fail, leave error messages on.
+		 */
+		while (__log_get(dblp, &lsn, &dbt, DB_NEXT, 0) == 0) {
+			if (dbt.size < sizeof(u_int32_t))
+				continue;
+			memcpy(&chk, dbt.data, sizeof(u_int32_t));
+			if (chk == DB_txn_ckp) {
+				lp->c_lsn = lsn;
+				found_checkpoint = 1;
+			}
+		}
+	}
+
+	/* If we never find a checkpoint, that's okay, just 0 it out. */
+	if (!found_checkpoint) {
+		lp->c_lsn.file = 1;
+		lp->c_lsn.offset = 0;
+	}
+
+	__db_err(dbenv,
+	    "Recovering the log: last valid LSN: file: %lu offset %lu",
+	    (u_long)lp->lsn.file, (u_long)lp->lsn.offset);
+
+	/* Reset the cursor.  */
+	ZERO_LSN(dblp->c_lsn);
+
+	return (0);
+}
+
+/*
+ * __log_find --
+ *	Try to find a log file.
+ *
+ * PUBLIC: int __log_find __P((DB_ENV *, LOG *, int *));
+ */
+int
+__log_find(dbenv, lp, valp)
+	DB_ENV *dbenv;
+	LOG *lp;
+	int *valp;
+{
+	int cnt, fcnt, logval, ret;
+	const char *dir;
+	char **names, *p, *q;
+
+	/* Find the directory name. */
+	if ((ret = __log_name(dbenv, 1, &p)) != 0)
+		return (ret);
+	if ((q = __db_rpath(p)) == NULL)
+		dir = PATH_DOT;
+	else {
+		*q = '\0';
+		dir = p;
+	}
+
+	/* Get the list of file names. */
+	ret = __db_dir(dbenv, dir, &names, &fcnt);
+	FREES(p);
+	if (ret != 0)
+		return (ret);
+
+	/*
+	 * Search for a valid log file name, return a value of 0 on
+	 * failure.
+	 */
+	*valp = 0;
+	for (cnt = fcnt, logval = 0; --cnt >= 0;)
+		if (strncmp(names[cnt], "log.", sizeof("log.") - 1) == 0) {
+			logval = atoi(names[cnt] + 4);
+			if (logval != 0 &&
+			    __log_valid(dbenv, lp, logval) == 0) {
+				*valp = logval;
+				break;
+			}
+		}
+
+	/* Discard the list. */
+	__db_dirf(dbenv, names, fcnt);
+
+	return (ret);
+}
+
+/*
+ * log_valid --
+ *	Validate a log file.
+ *
+ * PUBLIC: int __log_valid __P((DB_ENV *, LOG *, int));
+ */
+int
+__log_valid(dbenv, lp, cnt)
+	DB_ENV *dbenv;
+	LOG *lp;
+	int cnt;
+{
+	LOGP persist;
+	ssize_t nw;
+	int fd, ret;
+	char *p;
+
+	if ((ret = __log_name(dbenv, cnt, &p)) != 0)
+		return (ret);
+
+	fd = -1;
+	if ((ret = __db_fdopen(p,
+	    DB_RDONLY | DB_SEQUENTIAL,
+	    DB_RDONLY | DB_SEQUENTIAL, 0, &fd)) != 0 ||
+	    (ret = __db_lseek(fd, 0, 0, sizeof(HDR), SEEK_SET)) != 0 ||
+	    (ret = __db_read(fd, &persist, sizeof(LOGP), &nw)) != 0 ||
+	    nw != sizeof(LOGP)) {
+		if (ret == 0)
+			ret = EIO;
+		if (fd != -1) {
+			(void)__db_close(fd);
+			__db_err(dbenv,
+			    "Ignoring log file: %s: %s", p, strerror(ret));
+		}
+		goto err;
+	}
+	(void)__db_close(fd);
+
+	if (persist.magic != DB_LOGMAGIC) {
+		__db_err(dbenv,
+		    "Ignoring log file: %s: magic number %lx, not %lx",
+		    p, (u_long)persist.magic, (u_long)DB_LOGMAGIC);
+		ret = EINVAL;
+		goto err;
+	}
+	if (persist.version < DB_LOGOLDVER || persist.version > DB_LOGVERSION) {
+		__db_err(dbenv,
+		    "Ignoring log file: %s: unsupported log version %lu",
+		    p, (u_long)persist.version);
+		ret = EINVAL;
+		goto err;
+	}
+
+	if (lp != NULL) {
+		lp->persist.lg_max = persist.lg_max;
+		lp->persist.mode = persist.mode;
+	}
+	ret = 0;
+
+err:	FREES(p);
+	return (ret);
+}
+
+/*
+ * log_close --
+ *	Close a log.
+ */
+int
+log_close(dblp)
+	DB_LOG *dblp;
+{
+	int ret, t_ret;
+
+	ret = 0;
+
+	/* Close the region. */
+	if ((t_ret =
+	    __db_rclose(dblp->dbenv, dblp->fd, dblp->maddr)) != 0 && ret == 0)
+		ret = t_ret;
+
+	/* Close open files, release allocated memory. */
+	if (dblp->lfd != -1 && (t_ret = __db_close(dblp->lfd)) != 0 && ret == 0)
+		ret = t_ret;
+	if (dblp->c_dbt.data != NULL)
+		FREE(dblp->c_dbt.data, dblp->c_dbt.ulen);
+	if (dblp->c_fd != -1 &&
+	    (t_ret = __db_close(dblp->c_fd)) != 0 && ret == 0)
+		ret = t_ret;
+
+	/* Free the structure. */
+	if (dblp->dbentry != NULL)
+		FREE(dblp->dbentry, (dblp->dbentry_cnt * sizeof(DB_ENTRY)));
+	FREE(dblp, sizeof(DB_LOG));
+
+	return (ret);
+}
+
+/*
+ * log_unlink --
+ *	Exit a log.
+ */
+int
+log_unlink(path, force, dbenv)
+	const char *path;
+	int force;
+	DB_ENV *dbenv;
+{
+	return (__db_runlink(dbenv,
+	    DB_APP_LOG, path, DB_DEFAULT_LOG_FILE, force));
+}
diff --git a/db2/log/log.src b/db2/log/log.src
new file mode 100644
index 0000000000..9f4829179b
--- /dev/null
+++ b/db2/log/log.src
@@ -0,0 +1,53 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ *
+ *	@(#)log.src	10.3 (Sleepycat) 8/20/97
+ *
+ * This is the source file used to create the logging functions for the
+ * log package.  Each access method (or set of routines wishing to register
+ * record types with the transaction system) should have a file like this.
+ * Each type of log record and its parameters is defined.  The basic
+ * format of a record definition is:
+ *
+ * BEGIN	<RECORD_TYPE>
+ * ARG|STRING|POINTER	<variable name>	<variable type> <printf format>
+ * ...
+ * END
+ * ARG the argument is a simple parameter of the type *	specified.
+ * DBT the argument is a DBT (db.h) containing a length and pointer.
+ * PTR the argument is a pointer to the data type specified; the entire
+ *     type should be logged.
+ *
+ * There are a set of shell scripts of the form xxx.sh that generate c
+ * code and or h files to process these.  (This is probably better done
+ * in a single PERL script, but for now, this works.)
+ *
+ * The DB recovery system requires the following three fields appear in
+ * every record, and will assign them to the per-record-type structures
+ * as well as making them the first parameters to the appropriate logging
+ * call.
+ * rectype:	record-type, identifies the structure and log/read call
+ * txnid:	transaction id, a DBT in this implementation
+ * prev:	the last LSN for this transaction
+ */
+
+/*
+ * Use the argument of PREFIX as the prefix for all record types,
+ * routines, id numbers, etc.
+ */
+PREFIX	log
+
+/* Used for registering new name/id translations. */
+BEGIN	register
+DBT	name		DBT		s
+DBT	uid		DBT		s
+ARG	id		u_int32_t	lu
+ARG	ftype		DBTYPE		lx
+END
+
+BEGIN	unregister
+ARG	id		u_int32_t	lu
+END
diff --git a/db2/log/log_archive.c b/db2/log/log_archive.c
new file mode 100644
index 0000000000..d70d4c64c0
--- /dev/null
+++ b/db2/log/log_archive.c
@@ -0,0 +1,413 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_archive.c	10.23 (Sleepycat) 8/23/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "db_dispatch.h"
+#include "shqueue.h"
+#include "log.h"
+#include "clib_ext.h"
+#include "common_ext.h"
+
+static int absname __P((char *, char *, char **));
+static int build_data __P((DB_LOG *, char *, char ***, void *(*)(size_t)));
+static int cmpfunc __P((const void *, const void *));
+static int usermem __P((char ***, void *(*)(size_t)));
+
+/*
+ * log_archive --
+ *	Supporting function for db_archive(1).
+ */
+int
+log_archive(logp, listp, flags, db_malloc)
+	DB_LOG *logp;
+	char ***listp;
+	int flags;
+	void *(*db_malloc) __P((size_t));
+{
+	DBT rec;
+	DB_LSN stable_lsn;
+	u_int32_t fnum;
+	int array_size, n, ret;
+	char **array, **arrayp, *name, *p, *pref, buf[MAXPATHLEN];
+
+	fnum = 0;				/* XXX: Shut the compiler up. */
+
+#define	OKFLAGS	(DB_ARCH_ABS | DB_ARCH_DATA | DB_ARCH_LOG)
+	if (flags != 0) {
+		if ((ret =
+		    __db_fchk(logp->dbenv, "log_archive", flags, OKFLAGS)) != 0)
+			return (ret);
+		if ((ret =
+		    __db_fcchk(logp->dbenv,
+		        "log_archive", flags, DB_ARCH_DATA, DB_ARCH_LOG)) != 0)
+			return (ret);
+	}
+
+	/*
+	 * Get the absolute pathname of the current directory.  It would
+	 * be nice to get the shortest pathname of the database directory,
+	 * but that's just not possible.
+	 */
+	if (LF_ISSET(DB_ARCH_ABS)) {
+		errno = 0;
+		if ((pref = getcwd(buf, sizeof(buf))) == NULL)
+			return (errno == 0 ? ENOMEM : errno);
+	} else
+		pref = NULL;
+
+	switch (LF_ISSET(~DB_ARCH_ABS)) {
+	case DB_ARCH_DATA:
+		return (build_data(logp, pref, listp, db_malloc));
+	case DB_ARCH_LOG:
+		memset(&rec, 0, sizeof(rec));
+		if (F_ISSET(logp, DB_AM_THREAD))
+			F_SET(&rec, DB_DBT_MALLOC);
+		if ((ret = log_get(logp, &stable_lsn, &rec, DB_LAST)) != 0)
+			return (ret);
+		if (F_ISSET(logp, DB_AM_THREAD))
+			free(rec.data);
+		fnum = stable_lsn.file;
+		break;
+	case 0:
+		if ((ret = __log_findckp(logp, &stable_lsn)) != 0) {
+			if (ret != DB_NOTFOUND)
+				return (ret);
+			*listp = NULL;
+			return (0);
+		}
+		/* Remove any log files before the last stable LSN. */
+		fnum = stable_lsn.file - 1;
+		break;
+	}
+
+#define	LIST_INCREMENT	64
+	/* Get some initial space. */
+	if ((array =
+	    (char **)malloc(sizeof(char *) * (array_size = 10))) == NULL)
+		return (ENOMEM);
+	array[0] = NULL;
+
+	/* Build an array of the file names. */
+	for (n = 0; fnum > 0; --fnum) {
+		if ((ret = __log_name(logp->dbenv, fnum, &name)) != 0)
+			goto err;
+		if (__db_exists(name, NULL) != 0)
+			break;
+
+		if (n >= array_size - 1) {
+			array_size += LIST_INCREMENT;
+			if ((array = (char **)realloc(array,
+			    sizeof(char *) * array_size)) == NULL) {
+				ret = ENOMEM;
+				goto err;
+			}
+		}
+
+		if (LF_ISSET(DB_ARCH_ABS)) {
+			if ((ret = absname(pref, name, &array[n])) != 0)
+				goto err;
+			FREES(name);
+		} else if ((p = __db_rpath(name)) != NULL) {
+			if ((array[n] = (char *)strdup(p + 1)) == NULL) {
+				ret = ENOMEM;
+				goto err;
+			}
+			FREES(name);
+		} else
+			array[n] = name;
+
+		array[++n] = NULL;
+	}
+
+	/* If there's nothing to return, we're done. */
+	if (n == 0) {
+		*listp = NULL;
+		ret = 0;
+		goto err;
+	}
+
+	/* Sort the list. */
+	qsort(array, (size_t)n, sizeof(char *), cmpfunc);
+
+	/* Rework the memory. */
+	if ((ret = usermem(&array, db_malloc)) != 0)
+		goto err;
+
+	*listp = array;
+	return (0);
+
+err:	if (array != NULL) {
+		for (arrayp = array; *arrayp != NULL; ++arrayp)
+			FREES(*arrayp);
+		free(array);
+	}
+	return (ret);
+}
+
+/*
+ * build_data --
+ *	Build a list of datafiles for return.
+ */
+static int
+build_data(logp, pref, listp, db_malloc)
+	DB_LOG *logp;
+	char *pref, ***listp;
+	void *(*db_malloc) __P((size_t));
+{
+	DBT rec;
+	DB_LSN lsn;
+	__log_register_args *argp;
+	u_int32_t rectype;
+	int array_size, last, n, nxt, ret;
+	char **array, **arrayp, *p, *real_name;
+
+	/* Get some initial space. */
+	if ((array =
+	    (char **)malloc(sizeof(char *) * (array_size = 10))) == NULL)
+		return (ENOMEM);
+	array[0] = NULL;
+
+	memset(&rec, 0, sizeof(rec));
+	if (F_ISSET(logp, DB_AM_THREAD))
+		F_SET(&rec, DB_DBT_MALLOC);
+	for (n = 0, ret = log_get(logp, &lsn, &rec, DB_FIRST);
+	    ret == 0; ret = log_get(logp, &lsn, &rec, DB_NEXT)) {
+		if (rec.size < sizeof(rectype)) {
+			ret = EINVAL;
+			__db_err(logp->dbenv, "log_archive: bad log record");
+			goto lg_free;
+		}
+
+		memcpy(&rectype, rec.data, sizeof(rectype));
+		if (rectype != DB_log_register) {
+			if (F_ISSET(logp, DB_AM_THREAD)) {
+				free(rec.data);
+				rec.data = NULL;
+			}
+			continue;
+		}
+		if ((ret = __log_register_read(rec.data, &argp)) != 0) {
+			ret = EINVAL;
+			__db_err(logp->dbenv,
+			    "log_archive: unable to read log record");
+			goto lg_free;
+		}
+
+		if (n >= array_size - 1) {
+			array_size += LIST_INCREMENT;
+			if ((array = (char **)realloc(array,
+			    sizeof(char *) * array_size)) == NULL) {
+				ret = ENOMEM;
+				goto lg_free;
+			}
+		}
+
+		if ((array[n] = (char *)strdup(argp->name.data)) == NULL) {
+			ret = ENOMEM;
+lg_free:		if (F_ISSET(&rec, DB_DBT_MALLOC) && rec.data != NULL)
+				free(rec.data);
+			goto err1;
+		}
+
+		array[++n] = NULL;
+		free(argp);
+
+		if (F_ISSET(logp, DB_AM_THREAD)) {
+			free(rec.data);
+			rec.data = NULL;
+		}
+	}
+
+	/* If there's nothing to return, we're done. */
+	if (n == 0) {
+		ret = 0;
+		*listp = NULL;
+		goto err1;
+	}
+
+	/* Sort the list. */
+	qsort(array, (size_t)n, sizeof(char *), cmpfunc);
+
+	/*
+	 * Build the real pathnames, discarding nonexistent files and
+	 * duplicates.
+	 */
+	for (last = nxt = 0; nxt < n;) {
+		/*
+		 * Discard duplicates.  Last is the next slot we're going
+		 * to return to the user, nxt is the next slot that we're
+		 * going to consider.
+		 */
+		if (last != nxt) {
+			array[last] = array[nxt];
+			array[nxt] = NULL;
+		}
+		for (++nxt; nxt < n &&
+		    strcmp(array[last], array[nxt]) == 0; ++nxt) {
+			FREES(array[nxt]);
+			array[nxt] = NULL;
+		}
+
+		/* Get the real name. */
+		if ((ret = __db_appname(logp->dbenv,
+		    DB_APP_DATA, NULL, array[last], NULL, &real_name)) != 0)
+			goto err2;
+
+		/* If the file doesn't exist, ignore it. */
+		if (__db_exists(real_name, NULL) != 0) {
+			FREES(real_name);
+			FREES(array[last]);
+			array[last] = NULL;
+			continue;
+		}
+
+		/* Rework the name as requested by the user. */
+		FREES(array[last]);
+		array[last] = NULL;
+		if (pref != NULL) {
+			ret = absname(pref, real_name, &array[last]);
+			FREES(real_name);
+			if (ret != 0)
+				goto err2;
+		} else if ((p = __db_rpath(real_name)) != NULL) {
+			array[last] = (char *)strdup(p + 1);
+			FREES(real_name);
+			if (array[last] == NULL)
+				goto err2;
+		} else
+			array[last] = real_name;
+		++last;
+	}
+
+	/* NULL-terminate the list. */
+	array[last] = NULL;
+
+	/* Rework the memory. */
+	if ((ret = usermem(&array, db_malloc)) != 0)
+		goto err1;
+
+	*listp = array;
+	return (0);
+
+err2:	/*
+	 * XXX
+	 * We've possibly inserted NULLs into the array list, so clean up a
+	 * bit so that the other error processing works.
+	 */
+	if (array != NULL)
+		for (; nxt < n; ++nxt)
+			FREES(array[nxt]);
+	/* FALLTHROUGH */
+
+err1:	if (array != NULL) {
+		for (arrayp = array; *arrayp != NULL; ++arrayp)
+			FREES(*arrayp);
+		free(array);
+	}
+	return (ret);
+}
+
+/*
+ * absname --
+ *	Return an absolute path name for the file.
+ */
+static int
+absname(pref, name, newnamep)
+	char *pref, *name, **newnamep;
+{
+	size_t l_pref, l_name;
+	char *newname;
+
+	l_pref = strlen(pref);
+	l_name = strlen(name);
+
+	/* Malloc space for concatenating the two. */
+	if ((newname = (char *)malloc(l_pref + l_name + 2)) == NULL)
+		return (ENOMEM);
+
+	/* Build the name. */
+	memcpy(newname, pref, l_pref);
+	if (strchr(PATH_SEPARATOR, newname[l_pref - 1]) == NULL)
+		newname[l_pref++] = PATH_SEPARATOR[0];
+	memcpy(newname + l_pref, name, l_name + 1);
+	*newnamep = newname;
+
+	return (0);
+}
+
+/*
+ * usermem --
+ *	Create a single chunk of memory that holds the returned information.
+ *	If the user has their own malloc routine, use it.
+ */
+static int
+usermem(listp, func)
+	char ***listp;
+	void *(*func) __P((size_t));
+{
+	size_t len;
+	char **array, **arrayp, **orig, *strp;
+
+	/* Find out how much space we need. */
+	for (len = 0, orig = *listp; *orig != NULL; ++orig)
+		len += sizeof(char *) + strlen(*orig) + 1;
+	len += sizeof(char *);
+
+	/*
+	 * Allocate it and set up the pointers.
+	 *
+	 * XXX
+	 * Don't simplify this expression, SunOS compilers don't like it.
+	 */
+	if (func == NULL)
+		array = (char **)malloc(len);
+	else
+		array = (char **)func(len);
+	if (array == NULL)
+		return (ENOMEM);
+	strp = (char *)(array + (orig - *listp) + 1);
+
+	/* Copy the original information into the new memory. */
+	for (orig = *listp, arrayp = array; *orig != NULL; ++orig, ++arrayp) {
+		len = strlen(*orig);
+		memcpy(strp, *orig, len + 1);
+		*arrayp = strp;
+		strp += len + 1;
+
+		FREES(*orig);
+	}
+
+	/* NULL-terminate the list. */
+	*arrayp = NULL;
+
+	free(*listp);
+	*listp = array;
+
+	return (0);
+}
+
+static int
+cmpfunc(p1, p2)
+	const void *p1, *p2;
+{
+	return (strcmp(*((char **)p1), *((char **)p2)));
+}
diff --git a/db2/log/log_auto.c b/db2/log/log_auto.c
new file mode 100644
index 0000000000..59400087ca
--- /dev/null
+++ b/db2/log/log_auto.c
@@ -0,0 +1,351 @@
+/* Do not edit: automatically built by dist/db_gen.sh. */
+#include "config.h"
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <ctype.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "log.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+/*
+ * PUBLIC: int __log_register_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     DBT *, DBT *, u_int32_t, DBTYPE));
+ */
+int __log_register_log(logp, txnid, ret_lsnp, flags,
+	name, uid, id, ftype)
+	DB_LOG *logp;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	DBT *name;
+	DBT *uid;
+	u_int32_t id;
+	DBTYPE ftype;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t zero;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_log_register;
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		null_lsn.file = 0;
+		null_lsn.offset = 0;
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(u_int32_t) + (name == NULL ? 0 : name->size)
+	    + sizeof(u_int32_t) + (uid == NULL ? 0 : uid->size)
+	    + sizeof(id)
+	    + sizeof(ftype);
+	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+		return (ENOMEM);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	if (name == NULL) {
+		zero = 0;
+		memcpy(bp, &zero, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else {
+		memcpy(bp, &name->size, sizeof(name->size));
+		bp += sizeof(name->size);
+		memcpy(bp, name->data, name->size);
+		bp += name->size;
+	}
+	if (uid == NULL) {
+		zero = 0;
+		memcpy(bp, &zero, sizeof(u_int32_t));
+		bp += sizeof(u_int32_t);
+	} else {
+		memcpy(bp, &uid->size, sizeof(uid->size));
+		bp += sizeof(uid->size);
+		memcpy(bp, uid->data, uid->size);
+		bp += uid->size;
+	}
+	memcpy(bp, &id, sizeof(id));
+	bp += sizeof(id);
+	memcpy(bp, &ftype, sizeof(ftype));
+	bp += sizeof(ftype);
+#ifdef DEBUG
+	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+		fprintf(stderr, "Error in log record length");
+#endif
+	ret = __log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	free(logrec.data);
+	return (ret);
+}
+
+/*
+ * PUBLIC: int __log_register_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__log_register_print(notused1, dbtp, lsnp, notused3, notused4)
+	DB_LOG *notused1;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	int notused3;
+	void *notused4;
+{
+	__log_register_args *argp;
+	u_int32_t i;
+	int c, ret;
+
+	i = 0;
+	c = 0;
+	notused1 = NULL;
+	notused3 = 0;
+	notused4 = NULL;
+
+	if((ret = __log_register_read(dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]log_register: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tname: ");
+	for (i = 0; i < argp->name.size; i++) {
+		c = ((char *)argp->name.data)[i];
+		if (isprint(c) || c == 0xa)
+			putchar(c);
+		else
+			printf("%#x ", c);
+	}
+	printf("\n");
+	printf("\tuid: ");
+	for (i = 0; i < argp->uid.size; i++) {
+		c = ((char *)argp->uid.data)[i];
+		if (isprint(c) || c == 0xa)
+			putchar(c);
+		else
+			printf("%#x ", c);
+	}
+	printf("\n");
+	printf("\tid: %lu\n", (u_long)argp->id);
+	printf("\tftype: 0x%lx\n", (u_long)argp->ftype);
+	printf("\n");
+	free(argp);
+	return (0);
+}
+
+/*
+ * PUBLIC: int __log_register_read __P((void *, __log_register_args **));
+ */
+int
+__log_register_read(recbuf, argpp)
+	void *recbuf;
+	__log_register_args **argpp;
+{
+	__log_register_args *argp;
+	u_int8_t *bp;
+
+	argp = (__log_register_args *)malloc(sizeof(__log_register_args) +
+	    sizeof(DB_TXN));
+	if (argp == NULL)
+		return (ENOMEM);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->name.size, bp, sizeof(u_int32_t));
+	bp += sizeof(u_int32_t);
+	argp->name.data = bp;
+	bp += argp->name.size;
+	memcpy(&argp->uid.size, bp, sizeof(u_int32_t));
+	bp += sizeof(u_int32_t);
+	argp->uid.data = bp;
+	bp += argp->uid.size;
+	memcpy(&argp->id, bp, sizeof(argp->id));
+	bp += sizeof(argp->id);
+	memcpy(&argp->ftype, bp, sizeof(argp->ftype));
+	bp += sizeof(argp->ftype);
+	*argpp = argp;
+	return (0);
+}
+
+/*
+ * PUBLIC: int __log_unregister_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t));
+ */
+int __log_unregister_log(logp, txnid, ret_lsnp, flags,
+	id)
+	DB_LOG *logp;
+	DB_TXN *txnid;
+	DB_LSN *ret_lsnp;
+	u_int32_t flags;
+	u_int32_t id;
+{
+	DBT logrec;
+	DB_LSN *lsnp, null_lsn;
+	u_int32_t rectype, txn_num;
+	int ret;
+	u_int8_t *bp;
+
+	rectype = DB_log_unregister;
+	txn_num = txnid == NULL ? 0 : txnid->txnid;
+	if (txnid == NULL) {
+		null_lsn.file = 0;
+		null_lsn.offset = 0;
+		lsnp = &null_lsn;
+	} else
+		lsnp = &txnid->last_lsn;
+	logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+	    + sizeof(id);
+	if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+		return (ENOMEM);
+
+	bp = logrec.data;
+	memcpy(bp, &rectype, sizeof(rectype));
+	bp += sizeof(rectype);
+	memcpy(bp, &txn_num, sizeof(txn_num));
+	bp += sizeof(txn_num);
+	memcpy(bp, lsnp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(bp, &id, sizeof(id));
+	bp += sizeof(id);
+#ifdef DEBUG
+	if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+		fprintf(stderr, "Error in log record length");
+#endif
+	ret = __log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+	if (txnid != NULL)
+		txnid->last_lsn = *ret_lsnp;
+	free(logrec.data);
+	return (ret);
+}
+
+/*
+ * PUBLIC: int __log_unregister_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__log_unregister_print(notused1, dbtp, lsnp, notused3, notused4)
+	DB_LOG *notused1;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	int notused3;
+	void *notused4;
+{
+	__log_unregister_args *argp;
+	u_int32_t i;
+	int c, ret;
+
+	i = 0;
+	c = 0;
+	notused1 = NULL;
+	notused3 = 0;
+	notused4 = NULL;
+
+	if((ret = __log_unregister_read(dbtp->data, &argp)) != 0)
+		return (ret);
+	printf("[%lu][%lu]log_unregister: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+	    (u_long)lsnp->file,
+	    (u_long)lsnp->offset,
+	    (u_long)argp->type,
+	    (u_long)argp->txnid->txnid,
+	    (u_long)argp->prev_lsn.file,
+	    (u_long)argp->prev_lsn.offset);
+	printf("\tid: %lu\n", (u_long)argp->id);
+	printf("\n");
+	free(argp);
+	return (0);
+}
+
+/*
+ * PUBLIC: int __log_unregister_read __P((void *, __log_unregister_args **));
+ */
+int
+__log_unregister_read(recbuf, argpp)
+	void *recbuf;
+	__log_unregister_args **argpp;
+{
+	__log_unregister_args *argp;
+	u_int8_t *bp;
+
+	argp = (__log_unregister_args *)malloc(sizeof(__log_unregister_args) +
+	    sizeof(DB_TXN));
+	if (argp == NULL)
+		return (ENOMEM);
+	argp->txnid = (DB_TXN *)&argp[1];
+	bp = recbuf;
+	memcpy(&argp->type, bp, sizeof(argp->type));
+	bp += sizeof(argp->type);
+	memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+	bp += sizeof(argp->txnid->txnid);
+	memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+	bp += sizeof(DB_LSN);
+	memcpy(&argp->id, bp, sizeof(argp->id));
+	bp += sizeof(argp->id);
+	*argpp = argp;
+	return (0);
+}
+
+/*
+ * PUBLIC: int __log_init_print __P((DB_ENV *));
+ */
+int
+__log_init_print(dbenv)
+	DB_ENV *dbenv;
+{
+	int ret;
+
+	if ((ret = __db_add_recovery(dbenv,
+	    __log_register_print, DB_log_register)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __log_unregister_print, DB_log_unregister)) != 0)
+		return (ret);
+	return (0);
+}
+
+/*
+ * PUBLIC: int __log_init_recover __P((DB_ENV *));
+ */
+int
+__log_init_recover(dbenv)
+	DB_ENV *dbenv;
+{
+	int ret;
+
+	if ((ret = __db_add_recovery(dbenv,
+	    __log_register_recover, DB_log_register)) != 0)
+		return (ret);
+	if ((ret = __db_add_recovery(dbenv,
+	    __log_unregister_recover, DB_log_unregister)) != 0)
+		return (ret);
+	return (0);
+}
+
diff --git a/db2/log/log_compare.c b/db2/log/log_compare.c
new file mode 100644
index 0000000000..601b25c626
--- /dev/null
+++ b/db2/log/log_compare.c
@@ -0,0 +1,34 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_compare.c	10.2 (Sleepycat) 6/21/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * log_compare --
+ *	Compare two LSN's.
+ */
+int
+log_compare(lsn0, lsn1)
+	const DB_LSN *lsn0, *lsn1;
+{
+	if (lsn0->file != lsn1->file)
+		return (lsn0->file < lsn1->file ? -1 : 1);
+
+	if (lsn0->offset != lsn1->offset)
+		return (lsn0->offset < lsn1->offset ? -1 : 1);
+
+	return (0);
+}
diff --git a/db2/log/log_findckp.c b/db2/log/log_findckp.c
new file mode 100644
index 0000000000..67fe9c9f50
--- /dev/null
+++ b/db2/log/log_findckp.c
@@ -0,0 +1,130 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_findckp.c	10.10 (Sleepycat) 7/30/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "log.h"
+#include "txn.h"
+#include "common_ext.h"
+
+/*
+ * __log_findckp --
+ *
+ * Looks for the most recent checkpoint that occurs before the most recent
+ * checkpoint LSN.  This is the point from which recovery can start and the
+ * point up to which archival/truncation can take place.  Checkpoints in
+ * the log look like:
+ *
+ * -------------------------------------------------------------------
+ *  | ckp A, ckplsn 100 |  .... record .... | ckp B, ckplsn 600 | ...
+ * -------------------------------------------------------------------
+ *         LSN 500                                 LSN 1000
+ *
+ * If we read what log returns from using the DB_CKP parameter to logput,
+ * we'll get the record at LSN 1000.  The checkpoint LSN there is 600.
+ * Now we have to scan backwards looking for a checkpoint before LSN 600.
+ * We find one at 500.  This means that we can truncate the log before
+ * 500 or run recovery beginning at 500.
+ *
+ * Returns 0 if we find a checkpoint.
+ * Returns errno on error.
+ * Returns DB_NOTFOUND if we could not find a suitable start point and
+ * we should start from the beginning.
+ *
+ * PUBLIC: int __log_findckp __P((DB_LOG *, DB_LSN *));
+ */
+int
+__log_findckp(lp, lsnp)
+	DB_LOG *lp;
+	DB_LSN *lsnp;
+{
+	DBT data;
+	DB_LSN ckp_lsn, last_ckp, next_lsn;
+	__txn_ckp_args *ckp_args;
+	int ret, verbose;
+
+	verbose = lp->dbenv != NULL && lp->dbenv->db_verbose != 0;
+
+	/*
+	 * Need to find the appropriate point from which to begin
+	 * recovery.
+	 */
+	memset(&data, 0, sizeof(data));
+	if (F_ISSET(lp, DB_AM_THREAD))
+		F_SET(&data, DB_DBT_MALLOC);
+	if ((ret = log_get(lp, &last_ckp, &data, DB_CHECKPOINT)) != 0)
+		return (ret == ENOENT ? DB_NOTFOUND : ret);
+	ZERO_LSN(ckp_lsn);
+
+	next_lsn = last_ckp;
+	do {
+		if (F_ISSET(lp, DB_AM_THREAD))
+			free(data.data);
+
+		if ((ret = log_get(lp, &next_lsn, &data, DB_SET)) != 0)
+			return (ret);
+		if ((ret = __txn_ckp_read(data.data, &ckp_args)) != 0) {
+			if (F_ISSET(lp, DB_AM_THREAD))
+				free(data.data);
+			return (ret);
+		}
+		if (IS_ZERO_LSN(ckp_lsn))
+			ckp_lsn = ckp_args->ckp_lsn;
+		if (verbose) {
+			__db_err(lp->dbenv, "Checkpoint at: [%lu][%lu]",
+			    (u_long)last_ckp.file, (u_long)last_ckp.offset);
+			__db_err(lp->dbenv, "Checkpoint LSN: [%lu][%lu]",
+			    (u_long)ckp_args->ckp_lsn.file,
+			    (u_long)ckp_args->ckp_lsn.offset);
+			__db_err(lp->dbenv, "Previous checkpoint: [%lu][%lu]",
+			    (u_long)ckp_args->last_ckp.file,
+			    (u_long)ckp_args->last_ckp.offset);
+		}
+		last_ckp = next_lsn;
+		next_lsn = ckp_args->last_ckp;
+		free(ckp_args);
+	} while (!IS_ZERO_LSN(next_lsn) &&
+	    log_compare(&last_ckp, &ckp_lsn) > 0);
+
+	if (F_ISSET(lp, DB_AM_THREAD))
+		free(data.data);
+
+	/*
+	 * At this point, either, next_lsn is ZERO or ckp_lsn is the
+	 * checkpoint lsn and last_ckp is the LSN of the last checkpoint
+	 * before ckp_lsn.  If the compare in the loop is still true, then
+	 * next_lsn must be 0 and we need to roll forward from the
+	 * beginning of the log.
+	 */
+	if (log_compare(&last_ckp, &ckp_lsn) > 0) {
+		if ((ret = log_get(lp, &last_ckp, &data, DB_FIRST)) != 0)
+			return (ret);
+		if (F_ISSET(lp, DB_AM_THREAD))
+			free(data.data);
+	} 
+	*lsnp = last_ckp;
+
+	if (verbose)
+		__db_err(lp->dbenv, "Rolling forward from [%lu][%lu]",
+			(u_long)last_ckp.file, (u_long)last_ckp.offset);
+
+	return (IS_ZERO_LSN(last_ckp) ? DB_NOTFOUND : 0);
+}
diff --git a/db2/log/log_get.c b/db2/log/log_get.c
new file mode 100644
index 0000000000..37eb5cb249
--- /dev/null
+++ b/db2/log/log_get.c
@@ -0,0 +1,355 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_get.c	10.16 (Sleepycat) 8/19/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "log.h"
+#include "hash.h"
+#include "common_ext.h"
+
+/*
+ * log_get --
+ *	Get a log record.
+ */
+int
+log_get(dblp, alsn, dbt, flags)
+	DB_LOG *dblp;
+	DB_LSN *alsn;
+	DBT *dbt;
+	int flags;
+{
+	LOG *lp;
+	int ret;
+
+	/* Validate arguments. */
+#define	OKFLAGS	(DB_CHECKPOINT | \
+    DB_CURRENT | DB_FIRST | DB_LAST | DB_NEXT | DB_PREV | DB_SET)
+	if (flags != 0) {
+		if ((ret =
+		    __db_fchk(dblp->dbenv, "log_get", flags, OKFLAGS)) != 0)
+			return (ret);
+		switch (flags) {
+		case DB_CHECKPOINT:
+		case DB_CURRENT:
+		case DB_FIRST:
+		case DB_LAST:
+		case DB_NEXT:
+		case DB_PREV:
+		case DB_SET:
+		case 0:
+			break;
+		default:
+			return (__db_ferr(dblp->dbenv, "log_get", 1));
+		}
+	}
+	if (F_ISSET(dblp, DB_AM_THREAD)) {
+		if (LF_ISSET(DB_NEXT | DB_PREV | DB_CURRENT))
+			return (__db_ferr(dblp->dbenv, "log_get", 1));
+		if (!F_ISSET(dbt, DB_DBT_USERMEM | DB_DBT_MALLOC))
+			return (__db_ferr(dblp->dbenv, "threaded data", 1));
+	}
+
+	lp = dblp->lp;
+
+	LOCK_LOGREGION(dblp);
+
+	/*
+	 * If we get one of the log's header records, repeat the operation.
+	 * This assumes that applications don't ever request the log header
+	 * records by LSN, but that seems reasonable to me.
+	 */
+	ret = __log_get(dblp, alsn, dbt, flags, 0);
+	if (ret == 0 && alsn->offset == 0) {
+		switch (flags) {
+		case DB_FIRST:
+			flags = DB_NEXT;
+			break;
+		case DB_LAST:
+			flags = DB_PREV;
+			break;
+		}
+		ret = __log_get(dblp, alsn, dbt, flags, 0);
+	}
+
+	UNLOCK_LOGREGION(dblp);
+
+	return (ret);
+}
+
+/*
+ * __log_get --
+ *	Get a log record; internal version.
+ *
+ * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, int, int));
+ */
+int
+__log_get(dblp, alsn, dbt, flags, silent)
+	DB_LOG *dblp;
+	DB_LSN *alsn;
+	DBT *dbt;
+	int flags, silent;
+{
+	DB_LSN nlsn;
+	HDR hdr;
+	LOG *lp;
+	size_t len;
+	ssize_t nr;
+	int cnt, ret;
+	const char *fail;
+	char *np, *tbuf;
+	void *p, *shortp;
+
+	lp = dblp->lp;
+	fail = np = tbuf = NULL;
+
+	nlsn = dblp->c_lsn;
+	switch (flags) {
+	case DB_CHECKPOINT:
+		nlsn = dblp->lp->c_lsn;
+		if (IS_ZERO_LSN(nlsn)) {
+			__db_err(dblp->dbenv,
+	"log_get: unable to find checkpoint record: no checkpoint set.");
+			ret = ENOENT;
+			goto err2;
+		}
+		break;
+	case DB_NEXT:				/* Next log record. */
+		if (!IS_ZERO_LSN(nlsn)) {
+			/* Increment the cursor by the cursor record size. */
+			nlsn.offset += dblp->c_len;
+			break;
+		}
+		/* FALLTHROUGH */
+	case DB_FIRST:				/* Find the first log record. */
+		/*
+		 * Find any log file.  Note, we may have only entered records
+		 * in the buffer, and not yet written a log file.
+		 */
+		if ((ret = __log_find(dblp->dbenv, lp, &cnt)) != 0) {
+			__db_err(dblp->dbenv,
+	"log_get: unable to find the first record: no log files found.");
+			goto err2;
+		}
+
+		/* If there's anything in the buffer, it belongs to file 1. */
+		if (cnt == 0)
+			cnt = 1;
+
+		/* Now go backwards to find the smallest one. */
+		for (; cnt > 1; --cnt)
+			if (__log_valid(dblp->dbenv, NULL, cnt) != 0) {
+				++cnt;
+				break;
+			}
+		nlsn.file = cnt;
+		nlsn.offset = 0;
+		break;
+	case DB_CURRENT:			/* Current log record. */
+		break;
+	case DB_PREV:				/* Previous log record. */
+		if (!IS_ZERO_LSN(nlsn)) {
+			/* If at start-of-file, move to the previous file. */
+			if (nlsn.offset == 0) {
+				if (nlsn.file == 1)
+					return (DB_NOTFOUND);
+
+				--nlsn.file;
+				nlsn.offset = dblp->c_off;
+			} else
+				nlsn.offset = dblp->c_off;
+			break;
+		}
+		/* FALLTHROUGH */
+	case DB_LAST:				/* Last log record. */
+		nlsn.file = lp->lsn.file;
+		nlsn.offset = lp->lsn.offset - lp->len;
+		break;
+	case DB_SET:				/* Set log record. */
+		nlsn = *alsn;
+		break;
+	}
+
+retry:
+	/* Return 1 if the request is past end-of-file. */
+	if (nlsn.file > lp->lsn.file ||
+	    (nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset))
+		return (DB_NOTFOUND);
+
+	/* If we've switched files, discard the current fd. */
+	if (dblp->c_lsn.file != nlsn.file && dblp->c_fd != -1) {
+		(void)__db_close(dblp->c_fd);
+		dblp->c_fd = -1;
+	}
+
+	/* If the entire record is in the in-memory buffer, copy it out. */
+	if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) {
+		/* Copy the header. */
+		p = lp->buf + (nlsn.offset - lp->w_off);
+		memcpy(&hdr, p, sizeof(HDR));
+
+		/* Copy the record. */
+		len = hdr.len - sizeof(HDR);
+		if ((ret = __db_retcopy(dbt, (u_int8_t *)p + sizeof(HDR),
+		    len, &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
+			goto err1;
+		goto cksum;
+	}
+
+	/*
+	 * Move the file descriptor to the page that has the hdr.  We dealt
+	 * with moving to a previous log file in the flags switch code, but
+	 * we don't yet know if we'll need to move to a subsequent file.
+	 *
+	 * Acquire a file descriptor.
+	 */
+	if (dblp->c_fd == -1) {
+		if ((ret = __log_name(dblp->dbenv, nlsn.file, &np)) != 0)
+			goto err1;
+		if ((ret = __db_fdopen(np, DB_RDONLY | DB_SEQUENTIAL,
+		    DB_RDONLY | DB_SEQUENTIAL, 0, &dblp->c_fd)) != 0) {
+			fail = np;
+			goto err1;
+		}
+		free(np);
+		np = NULL;
+	}
+
+	/* Seek to the header offset and read the header. */
+	if ((ret = __db_lseek(dblp->c_fd, 0, 0, nlsn.offset, SEEK_SET)) != 0) {
+		fail = "seek";
+		goto err1;
+	}
+	if ((ret = __db_read(dblp->c_fd, &hdr, sizeof(HDR), &nr)) != 0) {
+		fail = "read";
+		goto err1;
+	}
+	if (nr == sizeof(HDR))
+		shortp = NULL;
+	else {
+		/* If read returns EOF, try the next file. */
+		if (nr == 0) {
+			if (flags != DB_NEXT || nlsn.file == lp->lsn.file)
+				goto corrupt;
+
+			/* Move to the next file. */
+			++nlsn.file;
+			nlsn.offset = 0;
+			goto retry;
+		}
+
+		/*
+		 * If read returns a short count the rest of the record has
+		 * to be in the in-memory buffer.
+		 */
+		if (lp->b_off < sizeof(HDR) - nr)
+			goto corrupt;
+
+		/* Get the rest of the header from the in-memory buffer. */
+		memcpy((u_int8_t *)&hdr + nr, lp->buf, sizeof(HDR) - nr);
+		shortp = lp->buf + (sizeof(HDR) - nr);
+	}
+
+	/*
+	 * Check for buffers of 0's, that's what we usually see during
+	 * recovery, although it's certainly not something on which we
+	 * can depend.
+	 */
+	if (hdr.len <= sizeof(HDR))
+		goto corrupt;
+	len = hdr.len - sizeof(HDR);
+
+	/* If we've already moved to the in-memory buffer, fill from there. */
+	if (shortp != NULL) {
+		if (lp->b_off < ((u_int8_t *)shortp - lp->buf) + len)
+			goto corrupt;
+		if ((ret = __db_retcopy(dbt, shortp, len,
+		    &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
+			goto err1;
+		goto cksum;
+	}
+
+	/* Allocate temporary memory to hold the record. */
+	if ((tbuf = (char *)malloc(len)) == NULL) {
+		ret = ENOMEM;
+		goto err1;
+	}
+
+	/*
+	 * Read the record into the buffer.  If read returns a short count,
+	 * there was an error or the rest of the record is in the in-memory
+	 * buffer.  Note, the information may be garbage if we're in recovery,
+	 * so don't read past the end of the buffer's memory.
+	 */
+	if ((ret = __db_read(dblp->c_fd, tbuf, len, &nr)) != 0) {
+		fail = "read";
+		goto err1;
+	}
+	if (len - nr > sizeof(lp->buf))
+		goto corrupt;
+	if (nr != (ssize_t)len) {
+		if (lp->b_off < len - nr)
+			goto corrupt;
+
+		/* Get the rest of the record from the in-memory buffer. */
+		memcpy((u_int8_t *)tbuf + nr, lp->buf, len - nr);
+	}
+
+	/* Copy the record into the user's DBT. */
+	if ((ret = __db_retcopy(dbt, tbuf, len,
+	    &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
+		goto err1;
+	free(tbuf);
+
+cksum:	if (hdr.cksum != __ham_func4(dbt->data, dbt->size)) {
+		if (!silent)
+			__db_err(dblp->dbenv, "log_get: checksum mismatch");
+		goto corrupt;
+	}
+
+	/* Update the cursor and the return lsn. */
+	dblp->c_off = hdr.prev;
+	dblp->c_len = hdr.len;
+	dblp->c_lsn = *alsn = nlsn;
+
+	return (0);
+
+corrupt:/*
+	 * This is the catchall -- for some reason we didn't find enough
+	 * information or it wasn't reasonable information, and it wasn't
+	 * because a system call failed.
+	 */
+	ret = EIO;
+	fail = "read";
+
+err1:	if (!silent)
+		if (fail == NULL)
+			__db_err(dblp->dbenv, "log_get: %s", strerror(ret));
+		else
+			__db_err(dblp->dbenv,
+			    "log_get: %s: %s", fail, strerror(ret));
+err2:	if (np != NULL)
+		free(np);
+	if (tbuf != NULL)
+		free(tbuf);
+	return (ret);
+}
diff --git a/db2/log/log_put.c b/db2/log/log_put.c
new file mode 100644
index 0000000000..db31f9b0e1
--- /dev/null
+++ b/db2/log/log_put.c
@@ -0,0 +1,484 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_put.c	10.12 (Sleepycat) 8/20/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "log.h"
+#include "hash.h"
+#include "common_ext.h"
+
+static int __log_fill __P((DB_LOG *, void *, u_int32_t));
+static int __log_newfd __P((DB_LOG *));
+static int __log_write __P((DB_LOG *, void *, u_int32_t));
+static int __log_putr __P((DB_LOG *, const DBT *, u_int32_t));
+
+/*
+ * log_put --
+ *	Write a log record.
+ */
+int
+log_put(dblp, lsn, dbt, flags)
+	DB_LOG *dblp;
+	DB_LSN *lsn;
+	const DBT *dbt;
+	int flags;
+{
+	int ret;
+
+	/* Validate arguments. */
+#define	OKFLAGS	(DB_CHECKPOINT | DB_FLUSH)
+	if (flags != 0) {
+		if ((ret =
+		    __db_fchk(dblp->dbenv, "log_put", flags, OKFLAGS)) != 0)
+			return (ret);
+		switch (flags) {
+		case DB_CHECKPOINT:
+		case DB_FLUSH:
+		case 0:
+			break;
+		default:
+			return (__db_ferr(dblp->dbenv, "log_put", 1));
+		}
+	}
+
+	LOCK_LOGREGION(dblp);
+
+	ret = __log_put(dblp, lsn, dbt, flags);
+
+	UNLOCK_LOGREGION(dblp);
+
+	return (ret);
+}
+
+/*
+ * __log_put --
+ *	Write a log record; internal version.
+ *
+ * PUBLIC: int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
+ */
+int
+__log_put(dblp, lsn, dbt, flags)
+	DB_LOG *dblp;
+	DB_LSN *lsn;
+	const DBT *dbt;
+	int flags;
+{
+	DBT t;
+	DBT fid_dbt;
+	DB_LSN r_unused;
+	FNAME *fnp;
+	LOG *lp;
+	u_int32_t lastoff;
+	int ret;
+
+	lp = dblp->lp;
+
+	/* If this information won't fit in the file, swap files. */
+	if (lp->lsn.offset + sizeof(HDR) + dbt->size > lp->persist.lg_max) {
+		if (sizeof(HDR) +
+		    sizeof(LOGP) + dbt->size > lp->persist.lg_max) {
+			__db_err(dblp->dbenv,
+			    "log_put: record larger than maximum file size");
+			return (EINVAL);
+		}
+		if (lp->b_off != 0) {
+			if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
+				return (ret);
+			if ((ret = __db_fsync(dblp->lfd)) != 0)
+				return (ret);
+			lp->s_lsn.file = lp->lsn.file;
+			lp->s_lsn.offset = lp->lsn.offset - 1;
+		}
+
+		/*
+		 * Save the last known offset from the previous file, we'll
+		 * need it to initialize the persistent header information.
+		 */
+		lastoff = lp->lsn.offset;
+
+		++lp->lsn.file;
+		lp->lsn.offset = 0;
+		lp->w_off = 0;
+	} else
+		lastoff = 0;
+
+	/*
+	 * Insert persistent information as the first record in every file.
+	 * Note that the previous length is wrong for the very first record
+	 * of the log, but that's okay, we check for it during retrieval.
+	 */
+	if (lp->lsn.offset == 0) {
+		t.data = &lp->persist;
+		t.size = sizeof(LOGP);
+		if ((ret = __log_putr(dblp,
+		    &t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0)
+			return (ret);
+	}
+
+	/* Initialize the LSN information returned to the user. */
+	lsn->file = lp->lsn.file;
+	lsn->offset = lp->lsn.offset;
+
+	/* Put out the user's record. */
+	if ((ret = __log_putr(dblp, dbt, lp->lsn.offset - lp->len)) != 0)
+		return (ret);
+
+	/*
+	 * On a checkpoint, we:
+	 *	Put out the checkpoint record (above).
+	 *	Save the LSN of the checkpoint in the shared region.
+	 *	Append the set of file name information into the log.
+	 *	Flush the current buffer contents to disk.
+	 *	Sync the log to disk.
+	 *	Save the time the checkpoint was written.
+	 *	Reset the bytes written since the last checkpoint.
+	 */
+	if (flags == DB_CHECKPOINT) {
+		lp->c_lsn = *lsn;
+
+		for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
+		    fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
+			t.data = ADDR(dblp, fnp->name_off);
+			t.size = strlen(t.data) + 1;
+			memset(&fid_dbt, 0, sizeof(fid_dbt));
+			fid_dbt.data = ADDR(dblp, fnp->fileid_off);
+			fid_dbt.size = DB_FILE_ID_LEN;
+			if ((ret = __log_register_log(dblp, NULL, &r_unused,
+			    0, &t, &fid_dbt, fnp->id, fnp->s_type)) != 0)
+				return (ret);
+		}
+		if (lp->b_off != 0 &&
+		    (ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
+			return (ret);
+		(void)time(&lp->chkpt);
+		lp->written = 0;
+
+		if ((ret = __db_fsync(dblp->lfd)) != 0)
+			return (ret);
+		lp->s_lsn.file = lp->lsn.file;
+		lp->s_lsn.offset = lp->lsn.offset - 1;
+	}
+
+	/* We always flush on a checkpoint. */
+	if (flags == DB_FLUSH || flags == DB_CHECKPOINT) {
+		if (lp->b_off != 0 &&
+		    (ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
+			return (ret);
+
+		if ((ret = __db_fsync(dblp->lfd)) != 0)
+			return (ret);
+		lp->s_lsn.file = lp->lsn.file;
+		lp->s_lsn.offset = lp->lsn.offset - 1;
+	}
+
+	/*
+	 * If we just did I/O, i.e., this LSN could have spanned the start of
+	 * the in-core buffer, we remember it so that we can flush correctly
+	 * during a sync.
+	 */
+	if (lsn->offset < lp->w_off && lsn->offset + lp->len > lp->w_off)
+		lp->span_lsn = *lsn;
+	return (0);
+}
+
+/*
+ * __log_putr --
+ *	Actually put a record into the log.
+ */
+static int
+__log_putr(dblp, dbt, prev)
+	DB_LOG *dblp;
+	const DBT *dbt;
+	u_int32_t prev;
+{
+	HDR hdr;
+	LOG *lp;
+	int ret;
+
+	lp = dblp->lp;
+
+	/*
+	 * Initialize the header.  If we just switched files, lsn.offset will
+	 * be 0, and what we really want is the offset of the previous record
+	 * in the previous file.  Fortunately, prev holds the value we want.
+	 */
+	hdr.prev = prev;
+	hdr.len = sizeof(HDR) + dbt->size;
+	hdr.cksum = __ham_func4(dbt->data, dbt->size);
+
+	if ((ret = __log_fill(dblp, &hdr, sizeof(HDR))) != 0)
+		return (ret);
+	lp->lsn.offset += sizeof(HDR);
+
+	if ((ret = __log_fill(dblp, dbt->data, dbt->size)) != 0)
+		return (ret);
+	lp->lsn.offset += dbt->size;
+
+	lp->len = sizeof(HDR) + dbt->size;
+	return (0);
+}
+
+/*
+ * log_flush --
+ *	Write all records less than or equal to the specified LSN.
+ */
+int
+log_flush(dblp, lsn)
+	DB_LOG *dblp;
+	const DB_LSN *lsn;
+{
+	DB_LSN t_lsn;
+	LOG *lp;
+	int ret;
+
+	ret = 0;
+	lp = dblp->lp;
+
+	LOCK_LOGREGION(dblp);
+
+	/* If no LSN specified, flush the entire log. */
+	if (lsn == NULL) {
+		t_lsn.file = lp->lsn.file;
+		t_lsn.offset = lp->lsn.offset - lp->len;
+		lsn = &t_lsn;
+	}
+
+	/* If it's a non-existent record, it's an error. */
+	if (lsn->file > lp->lsn.file ||
+	    (lsn->file == lp->lsn.file && lsn->offset > lp->lsn.offset)) {
+		__db_err(dblp->dbenv, "log_flush: LSN past current end-of-log");
+		ret = EINVAL;
+		goto ret1;
+	}
+
+	/*
+	 * If it's from a previous file, we're done because we sync each
+	 * file when we move to a new one.
+	 */
+	if (lsn->file < lp->lsn.file)
+		goto ret1;
+
+	/*
+	 * If it's less than the last-sync'd offset, we've already sync'd
+	 * this LSN.
+	 */
+	if (lsn->offset <= lp->s_lsn.offset)
+		goto ret1;
+
+	/*
+	 * We may need to write the current buffer.  We have to write the
+	 * current buffer if the sync LSN is greater than or equal to the
+	 * saved spanning-LSN.
+	 */
+	if (lsn->file >= lp->span_lsn.file &&
+	    lsn->offset >= lp->span_lsn.offset)
+		if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
+			goto ret1;
+
+	/* Acquire a file descriptor if we don't have one. */
+	if (dblp->lfname != dblp->lp->lsn.file &&
+	    (ret = __log_newfd(dblp)) != 0)
+		goto ret1;
+
+	if ((ret = __db_fsync(dblp->lfd)) != 0)
+		goto ret1;
+
+	lp->s_lsn.file = lp->lsn.file;
+	lp->s_lsn.offset = lsn->offset;
+
+ret1:	UNLOCK_LOGREGION(dblp);
+	return (ret);
+}
+
+/*
+ * __log_fill --
+ *	Write information into the log.
+ */
+static int
+__log_fill(dblp, addr, len)
+	DB_LOG *dblp;
+	void *addr;
+	u_int32_t len;
+{
+	LOG *lp;
+	u_int32_t nrec;
+	size_t nw, remain;
+	int ret;
+
+	/* Copy out the data. */
+	for (lp = dblp->lp; len > 0;) {
+		/*
+		 * If we're on a buffer boundary and the data is big enough,
+		 * copy as many records as we can directly from the data.
+		 */
+		if (lp->b_off == 0 && len >= sizeof(lp->buf)) {
+			nrec = len / sizeof(lp->buf);
+			if ((ret = __log_write(dblp,
+			    addr, nrec * sizeof(lp->buf))) != 0)
+				return (ret);
+			addr = (u_int8_t *)addr + nrec * sizeof(lp->buf);
+			len -= nrec * sizeof(lp->buf);
+			continue;
+		}
+
+		/* Figure out how many bytes we can copy this time. */
+		remain = sizeof(lp->buf) - lp->b_off;
+		nw = remain > len ? len : remain;
+		memcpy(lp->buf + lp->b_off, addr, nw);
+		addr = (u_int8_t *)addr + nw;
+		len -= nw;
+		lp->b_off += nw;
+
+		/* If we fill the buffer, flush it. */
+		if (lp->b_off == sizeof(lp->buf) &&
+		    (ret = __log_write(dblp, lp->buf, sizeof(lp->buf))) != 0)
+			return (ret);
+	}
+	return (0);
+}
+
+/*
+ * __log_write --
+ *	Write the log buffer to disk.
+ */
+static int
+__log_write(dblp, addr, len)
+	DB_LOG *dblp;
+	void *addr;
+	u_int32_t len;
+{
+	LOG *lp;
+	ssize_t nw;
+	int ret;
+
+	/*
+	 * If we haven't opened the log file yet or the current one
+	 * has changed, acquire a new log file.
+	 */
+	lp = dblp->lp;
+	if (dblp->lfd == -1 || dblp->lfname != lp->lsn.file)
+		if ((ret = __log_newfd(dblp)) != 0)
+			return (ret);
+
+	/*
+	 * Seek to the offset in the file (someone may have written it
+	 * since we last did).
+	 */
+	if ((ret = __db_lseek(dblp->lfd, 0, 0, lp->w_off, SEEK_SET)) != 0)
+		return (ret);
+	if ((ret = __db_write(dblp->lfd, addr, len, &nw)) != 0)
+		return (ret);
+	if (nw != (int32_t)len)
+		return (EIO);
+
+	/* Update the seek offset and reset the buffer offset. */
+	lp->b_off = 0;
+	lp->w_off += len;
+	lp->written += len;
+
+	return (0);
+}
+
+/*
+ * log_file --
+ *	Map a DB_LSN to a file name.
+ */
+int
+log_file(dblp, lsn, namep, len)
+	DB_LOG *dblp;
+	const DB_LSN *lsn;
+	char *namep;
+	size_t len;
+{
+	int ret;
+	char *p;
+
+	LOCK_LOGREGION(dblp);
+
+	ret = __log_name(dblp->dbenv, lsn->file, &p);
+
+	UNLOCK_LOGREGION(dblp);
+
+	if (ret != 0)
+		return (ret);
+
+	/* Check to make sure there's enough room and copy the name. */
+	if (len < strlen(p)) {
+		*namep = '\0';
+		return (ENOMEM);
+	}
+	(void)strcpy(namep, p);
+	free(p);
+
+	return (0);
+}
+
+/*
+ * __log_newfd --
+ *	Acquire a file descriptor for the current log file.
+ */
+static int
+__log_newfd(dblp)
+	DB_LOG *dblp;
+{
+	int ret;
+	char *p;
+
+	/* Close any previous file descriptor. */
+	if (dblp->lfd != -1) {
+		(void)__db_close(dblp->lfd);
+		dblp->lfd = -1;
+	}
+
+	/* Get the path of the new file and open it. */
+	dblp->lfname = dblp->lp->lsn.file;
+	if ((ret = __log_name(dblp->dbenv, dblp->lfname, &p)) != 0)
+		return (ret);
+	if ((ret = __db_fdopen(p,
+	    DB_CREATE | DB_SEQUENTIAL,
+	    DB_CREATE | DB_SEQUENTIAL,
+	    dblp->lp->persist.mode, &dblp->lfd)) != 0)
+		__db_err(dblp->dbenv,
+		    "log_put: %s: %s", p, strerror(errno));
+	FREES(p);
+	return (ret);
+}
+
+/*
+ * __log_name --
+ *	Return the log name for a particular file.
+ *
+ * PUBLIC: int __log_name __P((DB_ENV *, int, char **));
+ */
+int
+__log_name(dbenv, fn, np)
+	DB_ENV *dbenv;
+	int fn;
+	char **np;
+{
+	char name[sizeof(LFNAME) + 10];
+
+	(void)snprintf(name, sizeof(name), LFNAME, fn);
+	return (__db_appname(dbenv, DB_APP_LOG, NULL, name, NULL, np));
+}
diff --git a/db2/log/log_rec.c b/db2/log/log_rec.c
new file mode 100644
index 0000000000..dbc5960731
--- /dev/null
+++ b/db2/log/log_rec.c
@@ -0,0 +1,332 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *	The President and Fellows of Harvard University.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_rec.c	10.11 (Sleepycat) 8/20/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "log.h"
+#include "db_dispatch.h"
+#include "common_ext.h"
+
+static int __log_open_file __P((DB_LOG *, 
+    u_int8_t *, char *, DBTYPE, u_int32_t));
+
+/*
+ * PUBLIC: int __log_register_recover
+ * PUBLIC:     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__log_register_recover(logp, dbtp, lsnp, redo, info)
+	DB_LOG *logp;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	int redo;
+	void *info;
+{
+	__log_register_args *argp;
+	int ret;
+
+#ifdef DEBUG_RECOVER
+	__log_register_print(logp, dbtp, lsnp, redo, info);
+#endif
+	info = info;				/* XXX: Shut the compiler up. */
+	lsnp = lsnp;
+
+	F_SET(logp, DB_AM_RECOVER);
+
+	if ((ret = __log_register_read(dbtp->data, &argp)) != 0)
+		goto out;
+
+	ret = __log_open_file(logp,
+	    argp->uid.data, argp->name.data, argp->ftype, argp->id);
+	if (ret == ENOENT) {
+		if (redo == TXN_OPENFILES)
+			__db_err(logp->dbenv,
+			    "warning: file %s not found", argp->name.data);
+		ret = 0;
+	}
+
+out:	F_CLR(logp, DB_AM_RECOVER);
+	if (argp != NULL)
+		free(argp);
+	return (ret);
+}
+
+/*
+ * PUBLIC: int __log_unregister_recover
+ * PUBLIC:     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__log_unregister_recover(logp, dbtp, lsnp, redo, info)
+	DB_LOG *logp;
+	DBT *dbtp;
+	DB_LSN *lsnp;
+	int redo;
+	void *info;
+{
+	__log_unregister_args *argp;
+	int ret;
+
+#ifdef DEBUG_RECOVER
+	__log_unregister_print(logp, dbtp, lsnp, redo, info);
+#endif
+	info = info;				/* XXX: Shut the compiler up. */
+	lsnp = lsnp;
+
+	if (redo == TXN_OPENFILES ||
+	    redo == TXN_BACKWARD_ROLL || redo == TXN_UNDO)
+		return (0);
+
+	F_SET(logp, DB_AM_RECOVER);
+	if ((ret = __log_unregister_read(dbtp->data, &argp)) != 0)
+		goto out;
+
+	LOCK_LOGTHREAD(logp);
+	if (logp->dbentry[argp->id].dbp == NULL)
+		ret = EINVAL;
+	else if (--logp->dbentry[argp->id].refcount == 0) {
+		ret = logp->dbentry[argp->id].dbp->close(
+		    logp->dbentry[argp->id].dbp, 0);
+		logp->dbentry[argp->id].dbp = NULL;
+	}
+	UNLOCK_LOGTHREAD(logp);
+
+out:	F_CLR(logp, DB_AM_RECOVER);
+	if (argp != NULL)
+		free(argp);
+	return (ret);
+}
+
+/* Hand coded routines. */
+
+/*
+ * Called during log_register recovery.  Make sure that we have an
+ * entry in the dbentry table for this ndx.
+ * Returns 0 on success, non-zero on error.
+ */
+static int
+__log_open_file(lp, uid, name, ftype, ndx)
+	DB_LOG *lp;
+	u_int8_t *uid;
+	char *name;
+	DBTYPE ftype;
+	u_int32_t ndx;
+{
+	DB *dbp;
+	int ret;
+
+	LOCK_LOGTHREAD(lp);
+	if (ndx < lp->dbentry_cnt &&
+	    (lp->dbentry[ndx].deleted == 1 || lp->dbentry[ndx].dbp != NULL)) {
+		lp->dbentry[ndx].refcount++;
+
+		UNLOCK_LOGTHREAD(lp);
+		return (0);
+	}
+	UNLOCK_LOGTHREAD(lp);
+
+	/* Need to open file. */
+	dbp = NULL;
+	if ((ret = db_open(name, ftype, 0, 0, lp->dbenv, NULL, &dbp)) == 0) {
+		/*
+		 * Verify that we are opening the same file that we were
+		 * referring to when we wrote this log record.
+		 */
+		if (memcmp(uid, dbp->lock.fileid, DB_FILE_ID_LEN) != 0) {
+			(void)dbp->close(dbp, 0);
+			dbp = NULL;
+			ret = ENOENT;
+		}
+	}
+
+	if (ret == 0 || ret == ENOENT)
+		(void)__log_add_logid(lp, dbp, ndx);
+
+	return (ret);
+}
+
+/*
+ * This function returns:
+ *	0 SUCCESS (the entry was not previously set and is now set or the
+ *		entry was previously set and we just inced the ref count.
+ *	>0 on system error (returns errno value).
+ * PUBLIC: int __log_add_logid __P((DB_LOG *, DB *, u_int32_t));
+ */
+int
+__log_add_logid(logp, dbp, ndx)
+	DB_LOG *logp;
+	DB *dbp;
+	u_int32_t ndx;
+{
+	DB_ENTRY *temp_entryp;
+	u_int32_t i;
+	int ret;
+
+	ret = 0;
+
+	LOCK_LOGTHREAD(logp);
+	/*
+	 * Check if we need to grow the table.
+	 */
+	if (logp->dbentry_cnt <= ndx) {
+		if (logp->dbentry_cnt == 0) {
+			logp->dbentry =
+			    (DB_ENTRY *)malloc(DB_GROW_SIZE * sizeof(DB_ENTRY));
+			if (logp->dbentry == NULL) {
+				ret = ENOMEM;
+				goto err;
+			}
+		} else {
+			temp_entryp = (DB_ENTRY *)realloc(logp->dbentry,
+			    (DB_GROW_SIZE + logp->dbentry_cnt) *
+			    sizeof(DB_ENTRY));
+			if (temp_entryp == NULL) {
+				ret = ENOMEM;
+				goto err;
+			}
+			logp->dbentry = temp_entryp;
+
+		}
+		/* Initialize the new entries. */
+		for (i = logp->dbentry_cnt;
+		    i < logp->dbentry_cnt + DB_GROW_SIZE; i++) {
+			logp->dbentry[i].dbp = NULL;
+			logp->dbentry[i].deleted = 0;
+		}
+
+		logp->dbentry_cnt += DB_GROW_SIZE;
+	}
+
+	if (logp->dbentry[ndx].deleted == 0 && logp->dbentry[ndx].dbp == NULL) {
+		logp->dbentry[ndx].dbp = dbp;
+		logp->dbentry[ndx].refcount = 1;
+		logp->dbentry[ndx].deleted = dbp == NULL;
+	} else
+		logp->dbentry[ndx].refcount++;
+
+err:	UNLOCK_LOGTHREAD(logp);
+	return (ret);
+}
+
+
+/*
+ * __db_fileid_to_db --
+ *	Return the DB corresponding to the specified fileid.
+ *
+ * PUBLIC: int __db_fileid_to_db __P((DB_LOG *, DB **, u_int32_t));
+ */
+int
+__db_fileid_to_db(logp, dbpp, ndx)
+	DB_LOG *logp;
+	DB **dbpp;
+	u_int32_t ndx;
+{
+	int ret;
+
+	ret = 0;
+	LOCK_LOGTHREAD(logp);
+
+	/*
+	 * Return DB_DELETED if the file has been deleted
+	 * (it's not an error).
+	 */
+	if (logp->dbentry[ndx].deleted) {
+		ret = DB_DELETED;
+		goto err;
+	}
+
+	/*
+	 * Otherwise return 0, but if we don't have a corresponding DB,
+	 * it's an error.
+	 */
+	if ((*dbpp = logp->dbentry[ndx].dbp) == NULL)
+		ret = ENOENT;
+
+err:	UNLOCK_LOGTHREAD(logp);
+	return (ret);
+}
+
+/*
+ * Close files that were opened by the recovery daemon.
+ *
+ * PUBLIC: void __log_close_files __P((DB_LOG *));
+ */
+void
+__log_close_files(logp)
+	DB_LOG *logp;
+{
+	u_int32_t i;
+
+	LOCK_LOGTHREAD(logp);
+	for (i = 0; i < logp->dbentry_cnt; i++)
+		if (logp->dbentry[i].dbp)
+			logp->dbentry[i].dbp->close(logp->dbentry[i].dbp, 0);
+	UNLOCK_LOGTHREAD(logp);
+}
+
+/*
+ * PUBLIC: void __log_rem_logid __P((DB_LOG *, u_int32_t));
+ */
+void
+__log_rem_logid(logp, ndx)
+	DB_LOG *logp;
+	u_int32_t ndx;
+{
+	LOCK_LOGTHREAD(logp);
+	if (--logp->dbentry[ndx].refcount == 0) {
+		logp->dbentry[ndx].dbp = NULL;
+		logp->dbentry[ndx].deleted = 0;
+	}
+	UNLOCK_LOGTHREAD(logp);
+}
diff --git a/db2/log/log_register.c b/db2/log/log_register.c
new file mode 100644
index 0000000000..582eab9408
--- /dev/null
+++ b/db2/log/log_register.c
@@ -0,0 +1,199 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_register.c	10.10 (Sleepycat) 8/20/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "log.h"
+#include "common_ext.h"
+
+/*
+ * log_register --
+ *	Register a file name.
+ */
+int
+log_register(dblp, dbp, name, type, idp)
+	DB_LOG *dblp;
+	DB *dbp;
+	const char *name;
+	DBTYPE type;
+	u_int32_t *idp;
+{
+	DBT r_name;
+	DBT fid_dbt;
+	DB_LSN r_unused;
+	FNAME *fnp;
+	size_t len;
+	u_int32_t fid;
+	int inserted, ret;
+	char *fullname;
+	void *fidp, *namep;
+
+	fid = 0;
+	inserted = 0;
+	fullname = NULL;
+	fnp = fidp = namep = NULL;
+
+	/* Check the arguments. */
+	if (type != DB_BTREE && type != DB_HASH && type != DB_RECNO) {
+		__db_err(dblp->dbenv, "log_register: unknown DB file type");
+		return (EINVAL);
+	}
+
+	/* Get the log file id. */
+	if ((ret = __db_appname(dblp->dbenv,
+	    DB_APP_DATA, NULL, name, NULL, &fullname)) != 0)
+		return (ret);
+
+	LOCK_LOGREGION(dblp);
+
+	/*
+	 * See if we've already got this file in the log, finding the
+	 * next-to-lowest file id currently in use as we do it.
+	 */
+	for (fid = 1, fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
+	    fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
+		if (fid <= fnp->id)
+			fid = fnp->id + 1;
+		if (!memcmp(dbp->lock.fileid,
+		    ADDR(dblp, fnp->fileid_off), DB_FILE_ID_LEN)) {
+			++fnp->ref;
+			fid = fnp->id;
+			if (!F_ISSET(dblp, DB_AM_RECOVER) &&
+			    (ret = __log_add_logid(dblp, dbp, fid) != 0))
+				goto err;
+			goto ret1;
+		}
+	}
+
+	/* Allocate a new file name structure. */
+	if ((ret = __db_shalloc(dblp->addr, sizeof(FNAME), 0, &fnp)) != 0)
+		goto err;
+	fnp->ref = 1;
+	fnp->id = fid;
+	fnp->s_type = type;
+
+	if ((ret = __db_shalloc(dblp->addr, DB_FILE_ID_LEN, 0, &fidp)) != 0)
+		goto err;
+	/*
+	 * XXX Now that uids are fixed size, we can put them in the fnp
+	 * structure.
+	 */
+	fnp->fileid_off = OFFSET(dblp, fidp);
+	memcpy(fidp, dbp->lock.fileid, DB_FILE_ID_LEN);
+
+	len = strlen(name) + 1;
+	if ((ret = __db_shalloc(dblp->addr, len, 0, &namep)) != 0)
+		goto err;
+	fnp->name_off = OFFSET(dblp, namep);
+	memcpy(namep, name, len);
+
+	SH_TAILQ_INSERT_HEAD(&dblp->lp->fq, fnp, q, __fname);
+	inserted = 1;
+
+	/* Log the registry. */
+	if (!F_ISSET(dblp, DB_AM_RECOVER)) {
+		r_name.data = (void *)name;		/* XXX: Yuck! */
+		r_name.size = strlen(name) + 1;
+		memset(&fid_dbt, 0, sizeof(fid_dbt));
+		fid_dbt.data = dbp->lock.fileid;
+		fid_dbt.size = DB_FILE_ID_LEN;
+		if ((ret = __log_register_log(dblp, NULL, &r_unused,
+		    0, &r_name, &fid_dbt, fid, type)) != 0)
+			goto err;
+		if ((ret = __log_add_logid(dblp, dbp, fid)) != 0)
+			goto err;
+	}
+
+	if (0) {
+err:		/*
+		 * XXX
+		 * We should grow the region.
+		 */
+		if (inserted)
+			SH_TAILQ_REMOVE(&dblp->lp->fq, fnp, q, __fname);
+		if (namep != NULL)
+			__db_shalloc_free(dblp->addr, namep);
+		if (fidp != NULL)
+			__db_shalloc_free(dblp->addr, fidp);
+		if (fnp != NULL)
+			__db_shalloc_free(dblp->addr, fnp);
+	}
+
+ret1:	UNLOCK_LOGREGION(dblp);
+
+	if (fullname != NULL)
+		FREES(fullname);
+
+	if (idp != NULL)
+		*idp = fid;
+	return (ret);
+}
+
+/*
+ * log_unregister --
+ *	Discard a registered file name.
+ */
+int
+log_unregister(dblp, fid)
+	DB_LOG *dblp;
+	u_int32_t fid;
+{
+	DB_LSN r_unused;
+	FNAME *fnp;
+	int ret;
+
+	ret = 0;
+	LOCK_LOGREGION(dblp);
+
+	/* Unlog the registry. */
+	if (!F_ISSET(dblp, DB_AM_RECOVER) &&
+	    (ret = __log_unregister_log(dblp, NULL, &r_unused, 0, fid)) != 0)
+		return (ret);
+
+	/* Find the entry in the log. */
+	for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
+	    fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname))
+		if (fid == fnp->id)
+			break;
+	if (fnp == NULL) {
+		__db_err(dblp->dbenv, "log_unregister: non-existent file id");
+		ret = EINVAL;
+		goto ret1;
+	}
+
+	/* If more than 1 reference, decrement the reference and return. */
+	if (fnp->ref > 1) {
+		--fnp->ref;
+		goto ret1;
+	}
+
+	/* Free the unique file information, name and structure. */
+	__db_shalloc_free(dblp->addr, ADDR(dblp, fnp->fileid_off));
+	__db_shalloc_free(dblp->addr, ADDR(dblp, fnp->name_off));
+	SH_TAILQ_REMOVE(&dblp->lp->fq, fnp, q, __fname);
+	__db_shalloc_free(dblp->addr, fnp);
+
+	/* Remove from the process local table. */
+	__log_rem_logid(dblp, fid);
+
+ret1:	UNLOCK_LOGREGION(dblp);
+
+	return (ret);
+}