about summary refs log tree commit diff
path: root/db2/btree/bt_recno.c
diff options
context:
space:
mode:
Diffstat (limited to 'db2/btree/bt_recno.c')
-rw-r--r--db2/btree/bt_recno.c1195
1 files changed, 1195 insertions, 0 deletions
diff --git a/db2/btree/bt_recno.c b/db2/btree/bt_recno.c
new file mode 100644
index 0000000000..cd8872a064
--- /dev/null
+++ b/db2/btree/bt_recno.c
@@ -0,0 +1,1195 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_recno.c	10.12 (Sleepycat) 8/25/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+
+static int __ram_add __P((DB *, db_recno_t *, DBT *, int, int));
+static int __ram_c_close __P((DBC *));
+static int __ram_c_del __P((DBC *, int));
+static int __ram_c_get __P((DBC *, DBT *, DBT *, int));
+static int __ram_c_put __P((DBC *, DBT *, DBT *, int));
+static int __ram_fmap __P((DB *, db_recno_t));
+static int __ram_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
+static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
+static int __ram_source __P((DB *, RECNO *, const char *));
+static int __ram_sync __P((DB *, int));
+static int __ram_update __P((DB *, db_recno_t, int));
+static int __ram_vmap __P((DB *, db_recno_t));
+static int __ram_writeback __P((DB *));
+
+/*
+ * If we're renumbering records, then we have to detect in the cursor that a
+ * record was deleted, and adjust the cursor as necessary.  If not renumbering
+ * records, then we can detect this by looking at the actual record, so we
+ * ignore the cursor delete flag.
+ */
+#define	CD_SET(dbp, cp) {						\
+	if (F_ISSET(dbp, DB_RE_RENUMBER))				\
+		F_SET(cp, CR_DELETED);					\
+}
+#define	CD_CLR(dbp, cp) {						\
+	if (F_ISSET(dbp, DB_RE_RENUMBER))				\
+		F_CLR(cp, CR_DELETED);					\
+}
+#define	CD_ISSET(dbp, cp)						\
+	(F_ISSET(dbp, DB_RE_RENUMBER) && F_ISSET(cp, CR_DELETED))
+
+/*
+ * __ram_open --
+ *	Recno open function.
+ *
+ * PUBLIC: int __ram_open __P((DB *, DBTYPE, DB_INFO *));
+ */
+int
+__ram_open(dbp, type, dbinfo)
+	DB *dbp;
+	DBTYPE type;
+	DB_INFO *dbinfo;
+{
+	BTREE *t;
+	RECNO *rp;
+	int ret;
+
+	ret = 0;
+
+	/* Allocate and initialize the private RECNO structure. */
+	if ((rp = (RECNO *)calloc(1, sizeof(*rp))) == NULL)
+		return (errno);
+
+	if (dbinfo != NULL) {
+		/*
+		 * If the user specified a source tree, open it and map it in.
+		 *
+		 * !!!
+		 * We don't complain if the user specified transactions or
+		 * threads.  It's possible to make it work, but you'd better
+		 * know what you're doing!
+		 */
+		if (dbinfo->re_source == NULL) {
+			rp->re_fd = -1;
+			F_SET(rp, RECNO_EOF);
+		} else {
+			if ((ret =
+			    __ram_source(dbp, rp, dbinfo->re_source)) != 0)
+			goto err;
+		}
+
+		/* Copy delimiter, length and padding values. */
+		rp->re_delim =
+		    F_ISSET(dbp, DB_RE_DELIMITER) ? dbinfo->re_delim : '\n';
+		rp->re_pad = F_ISSET(dbp, DB_RE_PAD) ? dbinfo->re_pad : ' ';
+
+		if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+			if ((rp->re_len = dbinfo->re_len) == 0) {
+				__db_err(dbp->dbenv,
+				    "record length must be greater than 0");
+				ret = EINVAL;
+				goto err;
+			}
+		} else
+			rp->re_len = 0;
+	} else {
+		rp->re_delim = '\n';
+		rp->re_pad = ' ';
+		rp->re_fd = -1;
+		F_SET(rp, RECNO_EOF);
+	}
+
+	/* Open the underlying btree. */
+	if ((ret = __bam_open(dbp, DB_RECNO, dbinfo)) != 0)
+		goto err;
+
+	/* Set the routines necessary to make it look like a recno tree. */
+	dbp->cursor = __ram_cursor;
+	dbp->del = __ram_delete;
+	dbp->get = __ram_get;
+	dbp->put = __ram_put;
+	dbp->sync = __ram_sync;
+
+	/* Link in the private recno structure. */
+	((BTREE *)dbp->internal)->bt_recno = rp;
+
+	/* If we're snapshotting an underlying source file, do it now. */
+	if (dbinfo != NULL && F_ISSET(dbinfo, DB_SNAPSHOT))
+		if ((ret = __ram_snapshot(dbp)) != 0 && ret != DB_NOTFOUND)
+			goto err;
+
+	return (0);
+
+err:	/* If we mmap'd a source file, discard it. */
+	if (rp->re_smap != NULL)
+		(void)__db_munmap(rp->re_smap, rp->re_msize);
+
+	/* If we opened a source file, discard it. */
+	if (rp->re_fd != -1)
+		(void)__db_close(rp->re_fd);
+	if (rp->re_source != NULL)
+		FREES(rp->re_source);
+
+	/* If we allocated room for key/data return, discard it. */
+	t = dbp->internal;
+	if (t->bt_rkey.data != NULL)
+		free(t->bt_rkey.data);
+
+	FREE(rp, sizeof(*rp));
+
+	return (ret);
+}
+
+/*
+ * __ram_cursor --
+ *	Recno db->cursor function.
+ *
+ * PUBLIC: int __ram_cursor __P((DB *, DB_TXN *, DBC **));
+ */
+int
+__ram_cursor(dbp, txn, dbcp)
+	DB *dbp;
+	DB_TXN *txn;
+	DBC **dbcp;
+{
+	RCURSOR *cp;
+	DBC *dbc;
+
+	DEBUG_LWRITE(dbp, txn, "ram_cursor", NULL, NULL, 0);
+
+	if ((dbc = (DBC *)calloc(1, sizeof(DBC))) == NULL)
+		return (ENOMEM);
+	if ((cp = (RCURSOR *)calloc(1, sizeof(RCURSOR))) == NULL) {
+		free(dbc);
+		return (ENOMEM);
+	}
+
+	cp->dbc = dbc;
+	cp->recno = RECNO_OOB;
+
+	dbc->dbp = dbp;
+	dbc->txn = txn;
+	dbc->internal = cp;
+	dbc->c_close = __ram_c_close;
+	dbc->c_del = __ram_c_del;
+	dbc->c_get = __ram_c_get;
+	dbc->c_put = __ram_c_put;
+
+	/* All cursor structures hang off the main DB structure. */
+	DB_THREAD_LOCK(dbp);
+	TAILQ_INSERT_HEAD(&dbp->curs_queue, dbc, links);
+	DB_THREAD_UNLOCK(dbp);
+
+	*dbcp = dbc;
+	return (0);
+}
+
+/*
+ * __ram_get --
+ *	Recno db->get function.
+ */
+static int
+__ram_get(argdbp, txn, key, data, flags)
+	DB *argdbp;
+	DB_TXN *txn;
+	DBT *key, *data;
+	int flags;
+{
+	BTREE *t;
+	DB *dbp;
+	PAGE *h;
+	db_indx_t indx;
+	db_recno_t recno;
+	int exact, ret, stack;
+
+	stack = 0;
+
+	DEBUG_LWRITE(argdbp, txn, "ram_get", key, NULL, flags);
+
+	/* Check for invalid flags. */
+	if ((ret = __db_getchk(argdbp, key, data, flags)) != 0)
+		return (ret);
+
+	GETHANDLE(argdbp, txn, &dbp, ret);
+	t = dbp->internal;
+
+	/* Check the user's record number and fill in as necessary. */
+	if ((ret = __ram_getno(dbp, key, &recno, 0)) != 0)
+		goto done;
+
+	/* Search the tree for the record. */
+	if ((ret = __bam_rsearch(dbp, &recno, S_FIND, 1, &exact)) != 0)
+		goto done;
+	if (!exact)
+		return (DB_NOTFOUND);
+	stack = 1;
+
+	h = t->bt_csp->page;
+	indx = t->bt_csp->indx;
+
+	/* If the record has already been deleted, we couldn't have found it. */
+	if (GET_BKEYDATA(h, indx)->deleted) {
+		ret = DB_KEYEMPTY;
+		goto done;
+	}
+
+	/* Return the data item. */
+	ret = __db_ret(dbp,
+	    h, indx, data, &t->bt_rdata.data, &t->bt_rdata.ulen);
+	++t->lstat.bt_get;
+
+done:	/* Discard the stack. */
+	if (stack)
+		__bam_stkrel(dbp);
+
+	PUTHANDLE(dbp);
+	return (ret);
+}
+
+/*
+ * __ram_put --
+ *	Recno db->put function.
+ */
+static int
+__ram_put(argdbp, txn, key, data, flags)
+	DB *argdbp;
+	DB_TXN *txn;
+	DBT *key, *data;
+	int flags;
+{
+	BTREE *t;
+	DB *dbp;
+	db_recno_t recno;
+	int ret;
+
+	DEBUG_LWRITE(argdbp, txn, "ram_put", key, data, flags);
+
+	/* Check for invalid flags. */
+	if ((ret = __db_putchk(argdbp,
+	    key, data, flags, F_ISSET(argdbp, DB_AM_RDONLY), 0)) != 0)
+		return (ret);
+
+	GETHANDLE(argdbp, txn, &dbp, ret);
+
+	/*
+	 * If we're appending to the tree, make sure we've read in all of
+	 * the backing source file.  Otherwise, check the user's record
+	 * number and fill in as necessary.
+	 */
+	ret = LF_ISSET(DB_APPEND) ?
+	    __ram_snapshot(dbp) : __ram_getno(dbp, key, &recno, 1);
+
+	/* Add the record. */
+	if (ret == 0)
+		ret = __ram_add(dbp, &recno, data, flags, 0);
+
+	/* If we're appending to the tree, we have to return the record. */
+	if (ret == 0 && LF_ISSET(DB_APPEND)) {
+		t = dbp->internal;
+		ret = __db_retcopy(key, &recno, sizeof(recno),
+		    &t->bt_rkey.data, &t->bt_rkey.ulen, dbp->db_malloc);
+	}
+
+	PUTHANDLE(dbp);
+	return (ret);
+}
+
+/*
+ * __ram_sync --
+ *	Recno db->sync function.
+ */
+static int
+__ram_sync(argdbp, flags)
+	DB *argdbp;
+	int flags;
+{
+	DB *dbp;
+	int ret;
+
+	DEBUG_LWRITE(argdbp, NULL, "ram_sync", NULL, NULL, flags);
+
+	/* Sync the underlying btree. */
+	if ((ret = __bam_sync(argdbp, flags)) != 0)
+		return (ret);
+
+	/* Copy back the backing source file. */
+	GETHANDLE(argdbp, NULL, &dbp, ret);
+	ret = __ram_writeback(dbp);
+	PUTHANDLE(dbp);
+
+	return (ret);
+}
+
+/*
+ * __ram_close --
+ *	Recno db->close function.
+ *
+ * PUBLIC: int __ram_close __P((DB *));
+ */
+int
+__ram_close(argdbp)
+	DB *argdbp;
+{
+	RECNO *rp;
+
+	DEBUG_LWRITE(argdbp, NULL, "ram_close", NULL, NULL, 0);
+
+	rp = ((BTREE *)argdbp->internal)->bt_recno;
+
+	/* Close any underlying mmap region. */
+	if (rp->re_smap != NULL)
+		(void)__db_munmap(rp->re_smap, rp->re_msize);
+
+	/* Close any backing source file descriptor. */
+	if (rp->re_fd != -1)
+		(void)__db_close(rp->re_fd);
+
+	/* Free any backing source file name. */
+	if (rp->re_source != NULL)
+		FREES(rp->re_source);
+
+	/* Free allocated memory. */
+	FREE(rp, sizeof(RECNO));
+	((BTREE *)argdbp->internal)->bt_recno = NULL;
+
+	/* Close the underlying btree. */
+	return (__bam_close(argdbp));
+}
+
+/*
+ * __ram_c_close --
+ *	Recno cursor->close function.
+ */
+static int
+__ram_c_close(dbc)
+	DBC *dbc;
+{
+	DB *dbp;
+
+	DEBUG_LWRITE(dbc->dbp, dbc->txn, "ram_c_close", NULL, NULL, 0);
+
+	dbp = dbc->dbp;
+
+	/* Remove the cursor from the queue. */
+	DB_THREAD_LOCK(dbp);
+	TAILQ_REMOVE(&dbp->curs_queue, dbc, links);
+	DB_THREAD_UNLOCK(dbp);
+
+	/* Discard the structures. */
+	FREE(dbc->internal, sizeof(RCURSOR));
+	FREE(dbc, sizeof(DBC));
+
+	return (0);
+}
+
+/*
+ * __ram_c_del --
+ *	Recno cursor->c_del function.
+ */
+static int
+__ram_c_del(dbc, flags)
+	DBC *dbc;
+	int flags;
+{
+	DBT key;
+	RCURSOR *cp;
+	int ret;
+
+	DEBUG_LWRITE(dbc->dbp, dbc->txn, "ram_c_del", NULL, NULL, flags);
+
+	cp = dbc->internal;
+
+	/* Check for invalid flags. */
+	if ((ret = __db_cdelchk(dbc->dbp, flags,
+	    F_ISSET(dbc->dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0)
+		return (ret);
+
+	/* If already deleted, return failure. */
+	if (CD_ISSET(dbc->dbp, cp))
+		return (DB_KEYEMPTY);
+
+	/* Build a normal delete request. */
+	memset(&key, 0, sizeof(key));
+	key.data = &cp->recno;
+	key.size = sizeof(db_recno_t);
+	if ((ret = __ram_delete(dbc->dbp, dbc->txn, &key, 0)) == 0)
+		CD_SET(dbc->dbp, cp);
+
+	return (ret);
+}
+
+/*
+ * __ram_c_get --
+ *	Recno cursor->c_get function.
+ */
+static int
+__ram_c_get(dbc, key, data, flags)
+	DBC *dbc;
+	DBT *key, *data;
+	int flags;
+{
+	BTREE *t;
+	DB *dbp;
+	RCURSOR *cp, copy;
+	int ret;
+
+	DEBUG_LREAD(dbc->dbp, dbc->txn, "ram_c_get",
+	    flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
+	    NULL, flags);
+
+	cp = dbc->internal;
+	dbp = dbc->dbp;
+
+	/* Check for invalid flags. */
+	if ((ret = __db_cgetchk(dbc->dbp,
+	    key, data, flags, cp->recno != RECNO_OOB)) != 0)
+		return (ret);
+
+	GETHANDLE(dbc->dbp, dbc->txn, &dbp, ret);
+	t = dbp->internal;
+
+	/* Initialize the cursor for a new retrieval. */
+	copy = *cp;
+
+retry:	/* Update the record number. */
+	switch (flags) {
+	case DB_CURRENT:
+		if (CD_ISSET(dbp, cp)) {
+			PUTHANDLE(dbp);
+			return (DB_KEYEMPTY);
+		}
+		break;
+	case DB_NEXT:
+		if (CD_ISSET(dbp, cp))
+			break;
+		if (cp->recno != RECNO_OOB) {
+			++cp->recno;
+			break;
+		}
+		/* FALLTHROUGH */
+	case DB_FIRST:
+		flags = DB_NEXT;
+		cp->recno = 1;
+		break;
+	case DB_PREV:
+		if (cp->recno != RECNO_OOB) {
+			if (cp->recno == 1)
+				return (DB_NOTFOUND);
+			--cp->recno;
+			break;
+		}
+		/* FALLTHROUGH */
+	case DB_LAST:
+		flags = DB_PREV;
+		if (((ret = __ram_snapshot(dbp)) != 0) && ret != DB_NOTFOUND)
+			goto err;
+		if ((ret = __bam_nrecs(dbp, &cp->recno)) != 0)
+			goto err;
+		if (cp->recno == 0)
+			return (DB_NOTFOUND);
+		break;
+	case DB_SET:
+	case DB_SET_RANGE:
+		if ((ret = __ram_getno(dbp, key, &cp->recno, 0)) != 0)
+			goto err;
+		break;
+	}
+
+	/*
+	 * Return the key if the user didn't give us one, and then pass it
+	 * into __ram_get().
+	 */
+	if (flags != DB_SET && flags != DB_SET_RANGE &&
+	    (ret = __db_retcopy(key, &cp->recno, sizeof(cp->recno),
+	    &t->bt_rkey.data, &t->bt_rkey.ulen, dbp->db_malloc)) != 0)
+		return (ret);
+
+	/*
+	 * The cursor was reset, so the delete adjustment is no
+	 * longer necessary.
+	 */
+	CD_CLR(dbp, cp);
+
+	/*
+	 * Retrieve the record.
+	 *
+	 * Skip any keys that don't really exist.
+	 */
+	if ((ret = __ram_get(dbp, dbc->txn, key, data, 0)) != 0)
+		if (ret == DB_KEYEMPTY &&
+		    (flags == DB_NEXT || flags == DB_PREV))
+			goto retry;
+
+err:	if (ret != 0)
+		*cp = copy;
+
+	PUTHANDLE(dbp);
+	return (ret);
+}
+
+/*
+ * __ram_c_put --
+ *	Recno cursor->c_put function.
+ */
+static int
+__ram_c_put(dbc, key, data, flags)
+	DBC *dbc;
+	DBT *key, *data;
+	int flags;
+{
+	BTREE *t;
+	RCURSOR *cp, copy;
+	DB *dbp;
+	int exact, ret;
+	void *arg;
+
+	DEBUG_LWRITE(dbc->dbp, dbc->txn, "ram_c_put", NULL, data, flags);
+
+	cp = dbc->internal;
+
+	if ((ret = __db_cputchk(dbc->dbp, key, data, flags,
+	    F_ISSET(dbc->dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0)
+		return (ret);
+
+	GETHANDLE(dbc->dbp, dbc->txn, &dbp, ret);
+	t = dbp->internal;
+
+	/* Initialize the cursor for a new retrieval. */
+	copy = *cp;
+
+	/*
+	 * To split, we need a valid key for the page.  Since it's a cursor,
+	 * we have to build one.
+	 *
+	 * The split code discards all short-term locks and stack pages.
+	 */
+	if (0) {
+split:		arg = &cp->recno;
+		if ((ret = __bam_split(dbp, arg)) != 0)
+			goto err;
+	}
+
+	if ((ret = __bam_rsearch(dbp, &cp->recno, S_INSERT, 1, &exact)) != 0)
+		goto err;
+	if (!exact) {
+		ret = DB_NOTFOUND;
+		goto err;
+	}
+	if ((ret = __bam_iitem(dbp, &t->bt_csp->page,
+	    &t->bt_csp->indx, key, data, flags, 0)) == DB_NEEDSPLIT) {
+		if ((ret = __bam_stkrel(dbp)) != 0)
+			goto err;
+		goto split;
+	}
+	if ((ret = __bam_stkrel(dbp)) != 0)
+		goto err;
+
+	if (flags != DB_CURRENT) {
+		/* Adjust the counts. */
+		if ((ret = __bam_adjust(dbp, t, 1)) != 0)
+			goto err;
+
+		switch (flags) {
+		case DB_AFTER:
+			/* Adjust the cursors. */
+			__ram_ca(dbp, cp->recno, CA_IAFTER);
+
+			/* Set this cursor to reference the new record. */
+			cp->recno = copy.recno + 1;
+			break;
+		case DB_BEFORE:
+			/* Adjust the cursors. */
+			__ram_ca(dbp, cp->recno, CA_IBEFORE);
+
+			/* Set this cursor to reference the new record. */
+			cp->recno = copy.recno;
+			break;
+		}
+
+	}
+
+	/*
+	 * The cursor was reset, so the delete adjustment is no
+	 * longer necessary.
+	 */
+	CD_CLR(dbp, cp);
+
+err:	if (ret != 0)
+		*cp = copy;
+
+	PUTHANDLE(dbp);
+	return (ret);
+}
+
+/*
+ * __ram_ca --
+ *	Adjust cursors.
+ *
+ * PUBLIC: void __ram_ca __P((DB *, db_recno_t, ca_recno_arg));
+ */
+void
+__ram_ca(dbp, recno, op)
+	DB *dbp;
+	db_recno_t recno;
+	ca_recno_arg op;
+{
+	DBC *dbc;
+	RCURSOR *cp;
+
+	/*
+	 * Adjust the cursors.  See the comment in __bam_ca_delete().
+	 */
+	DB_THREAD_LOCK(dbp);
+	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
+	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+		cp = (RCURSOR *)dbc->internal;
+		switch (op) {
+		case CA_DELETE:
+			if (recno > cp->recno)
+				--cp->recno;
+			break;
+		case CA_IAFTER:
+			if (recno > cp->recno)
+				++cp->recno;
+			break;
+		case CA_IBEFORE:
+			if (recno >= cp->recno)
+				++cp->recno;
+			break;
+		}
+	}
+	DB_THREAD_UNLOCK(dbp);
+}
+
+#ifdef DEBUG
+/*
+ * __ram_cprint --
+ *	Display the current recno cursor list.
+ */
+int
+__ram_cprint(dbp)
+	DB *dbp;
+{
+	DBC *dbc;
+	RCURSOR *cp;
+
+	DB_THREAD_LOCK(dbp);
+	for (dbc = TAILQ_FIRST(&dbp->curs_queue);
+	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+		cp = (RCURSOR *)dbc->internal;
+		fprintf(stderr,
+		    "%#0x: recno: %lu\n", (u_int)cp, (u_long)cp->recno);
+	}
+	DB_THREAD_UNLOCK(dbp);
+	return (0);
+}
+#endif /* DEBUG */
+
+/*
+ * __ram_getno --
+ *	Check the user's record number, and make sure we've seen it.
+ *
+ * PUBLIC: int __ram_getno __P((DB *, const DBT *, db_recno_t *, int));
+ */
+int
+__ram_getno(dbp, key, rep, can_create)
+	DB *dbp;
+	const DBT *key;
+	db_recno_t *rep;
+	int can_create;
+{
+	db_recno_t recno;
+
+	/* Check the user's record number. */
+	if ((recno = *(db_recno_t *)key->data) == 0) {
+		__db_err(dbp->dbenv, "illegal record number of 0");
+		return (EINVAL);
+	}
+	if (rep != NULL)
+		*rep = recno;
+
+	/*
+	 * Btree can neither create records or read them in.  Recno can
+	 * do both, see if we can find the record.
+	 */
+	return (dbp->type == DB_RECNO ?
+	    __ram_update(dbp, recno, can_create) : 0);
+}
+
+/*
+ * __ram_snapshot --
+ *	Read in any remaining records from the backing input file.
+ *
+ * PUBLIC: int __ram_snapshot __P((DB *));
+ */
+int
+__ram_snapshot(dbp)
+	DB *dbp;
+{
+	return (__ram_update(dbp, DB_MAX_RECORDS, 0));
+}
+
+/*
+ * __ram_update --
+ *	Ensure the tree has records up to and including the specified one.
+ */
+static int
+__ram_update(dbp, recno, can_create)
+	DB *dbp;
+	db_recno_t recno;
+	int can_create;
+{
+	BTREE *t;
+	RECNO *rp;
+	db_recno_t nrecs;
+	int ret;
+
+	t = dbp->internal;
+	rp = t->bt_recno;
+
+	/*
+	 * If we can't create records and we've read the entire backing input
+	 * file, we're done.
+	 */
+	if (!can_create && F_ISSET(rp, RECNO_EOF))
+		return (0);
+
+	/*
+	 * If we haven't seen this record yet, try to get it from the original
+	 * file.
+	 */
+	if ((ret = __bam_nrecs(dbp, &nrecs)) != 0)
+		return (ret);
+	if (!F_ISSET(rp, RECNO_EOF) && recno > nrecs) {
+		if ((ret = rp->re_irec(dbp, recno)) != 0)
+			return (ret);
+		if ((ret = __bam_nrecs(dbp, &nrecs)) != 0)
+			return (ret);
+	}
+
+	/*
+	 * If we can create records, create empty ones up to the requested
+	 * record.
+	 */
+	if (!can_create || recno <= nrecs + 1)
+		return (0);
+
+	t->bt_rdata.dlen = 0;
+	t->bt_rdata.doff = 0;
+	t->bt_rdata.flags = 0;
+	if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+		if (t->bt_rdata.ulen < rp->re_len) {
+			t->bt_rdata.data = t->bt_rdata.data == NULL ?
+			    (void *)malloc(rp->re_len) :
+			    (void *)realloc(t->bt_rdata.data, rp->re_len);
+			if (t->bt_rdata.data == NULL) {
+				t->bt_rdata.ulen = 0;
+				return (ENOMEM);
+			}
+			t->bt_rdata.ulen = rp->re_len;
+		}
+		t->bt_rdata.size = rp->re_len;
+		memset(t->bt_rdata.data, rp->re_pad, rp->re_len);
+	} else
+		t->bt_rdata.size = 0;
+
+	while (recno > ++nrecs)
+		if ((ret = __ram_add(dbp,
+		    &nrecs, &t->bt_rdata, 0, BI_DELETED)) != 0)
+			return (ret);
+	return (0);
+}
+
+/*
+ * __ram_source --
+ *	Load information about the backing file.
+ */
+static int
+__ram_source(dbp, rp, fname)
+	DB *dbp;
+	RECNO *rp;
+	const char *fname;
+{
+	off_t size;
+	int oflags, ret;
+
+	if ((ret = __db_appname(dbp->dbenv,
+	    DB_APP_DATA, NULL, fname, NULL, &rp->re_source)) != 0)
+		return (ret);
+
+	oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0;
+	if ((ret =
+	    __db_fdopen(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) {
+		__db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
+		goto err;
+	}
+
+	/*
+	 * XXX
+	 * We'd like to test to see if the file is too big to mmap.  Since we
+	 * don't know what size or type off_t's or size_t's are, or the largest
+	 * unsigned integral type is, or what random insanity the local C
+	 * compiler will perpetrate, doing the comparison in a portable way is
+	 * flatly impossible.  Hope that mmap fails if the file is too large.
+	 */
+	if ((ret =
+	    __db_stat(dbp->dbenv, rp->re_source, rp->re_fd, &size, NULL)) != 0)
+		goto err;
+	if (size == 0) {
+		F_SET(rp, RECNO_EOF);
+		return (0);
+	}
+
+	if ((ret = __db_mmap(rp->re_fd, (size_t)size, 1, 1, &rp->re_smap)) != 0)
+		goto err;
+	rp->re_cmap = rp->re_smap;
+	rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size);
+	rp->re_irec = F_ISSET(dbp, DB_RE_FIXEDLEN) ?  __ram_fmap : __ram_vmap;
+	return (0);
+
+err:	FREES(rp->re_source)
+	return (ret);
+}
+
+/*
+ * __ram_writeback --
+ *	Rewrite the backing file.
+ */
+static int
+__ram_writeback(dbp)
+	DB *dbp;
+{
+	RECNO *rp;
+	DBT key, data;
+	db_recno_t keyno;
+	ssize_t nw;
+	int fd, ret, t_ret;
+	u_int8_t delim, *pad;
+
+	rp = ((BTREE *)dbp->internal)->bt_recno;
+
+	/* If the file wasn't modified, we're done. */
+	if (!F_ISSET(rp, RECNO_MODIFIED))
+		return (0);
+
+	/* If there's no backing source file, we're done. */
+	if (rp->re_source == NULL) {
+		F_CLR(rp, RECNO_MODIFIED);
+		return (0);
+	}
+
+	/*
+	 * Read any remaining records into the tree.
+	 *
+	 * XXX
+	 * This is why we can't support transactions when applications specify
+	 * backing (re_source) files.  At this point we have to read in the
+	 * rest of the records from the file so that we can write all of the
+	 * records back out again, which could modify a page for which we'd
+	 * have to log changes and which we don't have locked.  This could be
+	 * partially fixed by taking a snapshot of the entire file during the
+	 * db_open(), or, since db_open() isn't transaction protected, as part
+	 * of the first DB operation.  But, if a checkpoint occurs then, the
+	 * part of the log holding the copy of the file could be discarded, and
+	 * that would make it impossible to recover in the face of disaster.
+	 * This could all probably be fixed, but it would require transaction
+	 * protecting the backing source file, i.e. mpool would have to know
+	 * about it, and we don't want to go there.
+	 */
+	if ((ret = __ram_snapshot(dbp)) != 0 && ret != DB_NOTFOUND)
+		return (ret);
+
+	/*
+	 * !!!
+	 * Close any underlying mmap region.  This is required for Windows NT
+	 * (4.0, Service Pack 2) -- if the file is still mapped, the following
+	 * open will fail.
+	 */
+	if (rp->re_smap != NULL) {
+		(void)__db_munmap(rp->re_smap, rp->re_msize);
+		rp->re_smap = NULL;
+	}
+
+	/* Get rid of any backing file descriptor, just on GP's. */
+	if (rp->re_fd != -1) {
+		(void)__db_close(rp->re_fd);
+		rp->re_fd = -1;
+	}
+
+	/* Open the file, truncating it. */
+	if ((ret = __db_fdopen(rp->re_source,
+	    DB_SEQUENTIAL | DB_TRUNCATE,
+	    DB_SEQUENTIAL | DB_TRUNCATE, 0, &fd)) != 0) {
+		__db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
+		return (ret);
+	}
+
+	/*
+	 * We step through the records, writing each one out.  Use the record
+	 * number and the dbp->get() function, instead of a cursor, so we find
+	 * and write out "deleted" or non-existent records.
+	 */
+	memset(&key, 0, sizeof(key));
+	memset(&data, 0, sizeof(data));
+	key.size = sizeof(db_recno_t);
+	key.data = &keyno;
+
+	/*
+	 * We'll need the delimiter if we're doing variable-length records,
+	 * and the pad character if we're doing fixed-length records.
+	 */
+	delim = rp->re_delim;
+	if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+		if ((pad = malloc(rp->re_len)) == NULL) {
+			ret = ENOMEM;
+			goto err;
+		}
+		memset(pad, rp->re_pad, rp->re_len);
+	} else
+		pad = NULL;			/* XXX: Shut the compiler up. */
+	for (keyno = 1;; ++keyno) {
+		switch (ret = dbp->get(dbp, NULL, &key, &data, 0)) {
+		case 0:
+			if ((ret =
+			    __db_write(fd, data.data, data.size, &nw)) != 0)
+				goto err;
+			if (nw != (ssize_t)data.size) {
+				ret = EIO;
+				goto err;
+			}
+			break;
+		case DB_KEYEMPTY:
+			if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+				if ((ret =
+				    __db_write(fd, pad, rp->re_len, &nw)) != 0)
+					goto err;
+				if (nw != (ssize_t) rp->re_len) {
+					ret = EIO;
+					goto err;
+				}
+			}
+			break;
+		case DB_NOTFOUND:
+			ret = 0;
+			goto done;
+		}
+		if (!F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+			if ((ret = __db_write(fd, &delim, 1, &nw)) != 0)
+				goto err;
+			if (nw != 1) {
+				ret = EIO;
+				goto err;
+			}
+		}
+	}
+
+err:
+done:	/* Close the file descriptor. */
+	if ((t_ret = __db_close(fd)) != 0 || ret == 0)
+		ret = t_ret;
+
+	if (ret == 0)
+		F_CLR(rp, RECNO_MODIFIED);
+	return (ret);
+}
+
+/*
+ * __ram_fmap --
+ *	Get fixed length records from a file.
+ */
+static int
+__ram_fmap(dbp, top)
+	DB *dbp;
+	db_recno_t top;
+{
+	BTREE *t;
+	DBT data;
+	RECNO *rp;
+	db_recno_t recno;
+	u_int32_t len;
+	u_int8_t *sp, *ep, *p;
+	int ret;
+
+	if ((ret = __bam_nrecs(dbp, &recno)) != 0)
+		return (ret);
+
+	t = dbp->internal;
+	rp = t->bt_recno;
+	if (t->bt_rdata.ulen < rp->re_len) {
+		t->bt_rdata.data = t->bt_rdata.data == NULL ?
+		    (void *)malloc(rp->re_len) :
+		    (void *)realloc(t->bt_rdata.data, rp->re_len);
+		if (t->bt_rdata.data == NULL) {
+			t->bt_rdata.ulen = 0;
+			return (ENOMEM);
+		}
+		t->bt_rdata.ulen = rp->re_len;
+	}
+
+	memset(&data, 0, sizeof(data));
+	data.data = t->bt_rdata.data;
+	data.size = rp->re_len;
+
+	sp = (u_int8_t *)rp->re_cmap;
+	ep = (u_int8_t *)rp->re_emap;
+	while (recno <= top) {
+		if (sp >= ep) {
+			F_SET(rp, RECNO_EOF);
+			return (DB_NOTFOUND);
+		}
+		len = rp->re_len;
+		for (p = t->bt_rdata.data;
+		    sp < ep && len > 0; *p++ = *sp++, --len);
+
+		/*
+		 * Another process may have read some portion of the input
+		 * file already, in which case we just want to discard the
+		 * new record.
+		 *
+		 * XXX
+		 * We should just do a seek, since the records are fixed
+		 * length.
+		 */
+		if (rp->re_last >= recno) {
+			if (len != 0)
+				memset(p, rp->re_pad, len);
+
+			++recno;
+			if ((ret = __ram_add(dbp, &recno, &data, 0, 0)) != 0)
+				return (ret);
+		}
+		++rp->re_last;
+	}
+	rp->re_cmap = sp;
+	return (0);
+}
+
+/*
+ * __ram_vmap --
+ *	Get variable length records from a file.
+ */
+static int
+__ram_vmap(dbp, top)
+	DB *dbp;
+	db_recno_t top;
+{
+	BTREE *t;
+	DBT data;
+	RECNO *rp;
+	db_recno_t recno;
+	u_int8_t *sp, *ep;
+	int delim, ret;
+
+	t = dbp->internal;
+	rp = t->bt_recno;
+
+	if ((ret = __bam_nrecs(dbp, &recno)) != 0)
+		return (ret);
+
+	memset(&data, 0, sizeof(data));
+
+	delim = rp->re_delim;
+
+	sp = (u_int8_t *)rp->re_cmap;
+	ep = (u_int8_t *)rp->re_emap;
+	while (recno <= top) {
+		if (sp >= ep) {
+			F_SET(rp, RECNO_EOF);
+			return (DB_NOTFOUND);
+		}
+		for (data.data = sp; sp < ep && *sp != delim; ++sp);
+
+		/*
+		 * Another process may have read some portion of the input
+		 * file already, in which case we just want to discard the
+		 * new record.
+		 */
+		if (rp->re_last >= recno) {
+			data.size = sp - (u_int8_t *)data.data;
+			++recno;
+			if ((ret = __ram_add(dbp, &recno, &data, 0, 0)) != 0)
+				return (ret);
+		}
+		++rp->re_last;
+		++sp;
+	}
+	rp->re_cmap = sp;
+	return (0);
+}
+
+/*
+ * __ram_add --
+ *	Add records into the tree.
+ */
+static int
+__ram_add(dbp, recnop, data, flags, bi_flags)
+	DB *dbp;
+	db_recno_t *recnop;
+	DBT *data;
+	int flags, bi_flags;
+{
+	BTREE *t;
+	PAGE *h;
+	db_indx_t indx;
+	int exact, ret, stack;
+
+	t = dbp->internal;
+
+retry:	/* Find the slot for insertion. */
+	if ((ret = __bam_rsearch(dbp, recnop,
+	    S_INSERT | (LF_ISSET(DB_APPEND) ? S_APPEND : 0), 1, &exact)) != 0)
+		return (ret);
+	h = t->bt_csp->page;
+	indx = t->bt_csp->indx;
+	stack = 1;
+
+	/*
+	 * The recno access method doesn't currently support duplicates, so
+	 * if an identical key is already in the tree we're either overwriting
+	 * it or an error is returned.
+	 */
+	if (exact && LF_ISSET(DB_NOOVERWRITE)) {
+		ret = DB_KEYEXIST;
+		goto err;
+	}
+
+	/*
+	 * Select the arguments for __bam_iitem() and do the insert.  If the
+	 * key is an exact match, or we're replacing the data item with a
+	 * new data item.  If the key isn't an exact match, we're inserting
+	 * a new key/data pair, before the search location.
+	 */
+	if ((ret = __bam_iitem(dbp, &h, &indx, NULL,
+	    data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) == DB_NEEDSPLIT) {
+		(void)__bam_stkrel(dbp);
+		stack = 0;
+		if ((ret = __bam_split(dbp, recnop)) != 0)
+			goto err;
+		goto retry;
+	}
+
+	if (!exact && ret == 0)
+		__bam_adjust(dbp, t, 1);
+
+err:	if (stack)
+		__bam_stkrel(dbp);
+	return (ret);
+}