summary refs log tree commit diff
path: root/db2/include
diff options
context:
space:
mode:
Diffstat (limited to 'db2/include')
-rw-r--r--db2/include/btree.h233
-rw-r--r--db2/include/btree_ext.h76
-rw-r--r--db2/include/clib_ext.h6
-rw-r--r--db2/include/common_ext.h22
-rw-r--r--db2/include/db.h.src994
-rw-r--r--db2/include/db_am.h39
-rw-r--r--db2/include/db_auto.h13
-rw-r--r--db2/include/db_cxx.h158
-rw-r--r--db2/include/db_ext.h65
-rw-r--r--db2/include/db_int.h.src402
-rw-r--r--db2/include/db_join.h23
-rw-r--r--db2/include/db_page.h56
-rw-r--r--db2/include/hash.h156
-rw-r--r--db2/include/hash_ext.h61
-rw-r--r--db2/include/lock.h22
-rw-r--r--db2/include/lock_ext.h3
-rw-r--r--db2/include/log.h37
-rw-r--r--db2/include/log_ext.h5
-rw-r--r--db2/include/mp.h24
-rw-r--r--db2/include/mp_ext.h4
-rw-r--r--db2/include/os.h24
-rw-r--r--db2/include/os_ext.h28
-rw-r--r--db2/include/os_jump.h (renamed from db2/include/os_func.h)31
-rw-r--r--db2/include/txn.h33
-rw-r--r--db2/include/txn_auto.h26
-rw-r--r--db2/include/txn_ext.h22
-rw-r--r--db2/include/xa.h179
-rw-r--r--db2/include/xa_ext.h13
28 files changed, 764 insertions, 1991 deletions
diff --git a/db2/include/btree.h b/db2/include/btree.h
index 1660d331e7..b0c04b1508 100644
--- a/db2/include/btree.h
+++ b/db2/include/btree.h
@@ -43,38 +43,19 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)btree.h	10.21 (Sleepycat) 5/23/98
+ *	@(#)btree.h	10.26 (Sleepycat) 12/16/98
  */
 
 /* Forward structure declarations. */
 struct __btree;		typedef struct __btree BTREE;
 struct __cursor;	typedef struct __cursor CURSOR;
 struct __epg;		typedef struct __epg EPG;
-struct __rcursor;	typedef struct __rcursor RCURSOR;
 struct __recno;		typedef struct __recno RECNO;
 
-#undef	DEFMINKEYPAGE			/* Minimum keys per page */
 #define	DEFMINKEYPAGE	 (2)
 
-#undef	ISINTERNAL			/* If an internal page. */
-#define	ISINTERNAL(p)	(TYPE(p) == P_IBTREE || TYPE(p) ==  P_IRECNO)
-#undef	ISLEAF				/* If a leaf page. */
-#define	ISLEAF(p)	(TYPE(p) == P_LBTREE || TYPE(p) ==  P_LRECNO)
-
-/* Allocate and discard thread structures. */
-#define	GETHANDLE(dbp, set_txn, dbpp, ret) {				\
-	if (F_ISSET(dbp, DB_AM_THREAD)) {				\
-		if ((ret = __db_gethandle(dbp, __bam_bdup, dbpp)) != 0)	\
-			return (ret);					\
-	} else								\
-		*dbpp = dbp;						\
-	*dbpp->txn = set_txn;						\
-}
-#define	PUTHANDLE(dbp) {						\
-	dbp->txn = NULL;						\
-	if (F_ISSET(dbp, DB_AM_THREAD))					\
-		__db_puthandle(dbp);					\
-}
+#define	ISINTERNAL(p)	(TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO)
+#define	ISLEAF(p)	(TYPE(p) == P_LBTREE || TYPE(p) == P_LRECNO)
 
 /*
  * If doing transactions we have to hold the locks associated with a data item
@@ -82,15 +63,15 @@ struct __recno;		typedef struct __recno RECNO;
  * locks associated with walking the tree.  Distinguish between the two so that
  * we don't tie up the internal pages of the tree longer than necessary.
  */
-#define	__BT_LPUT(dbp, lock)						\
-	(F_ISSET((dbp), DB_AM_LOCKING) ?				\
-	    lock_put((dbp)->dbenv->lk_info, lock) : 0)
-#define	__BT_TLPUT(dbp, lock)						\
-	(F_ISSET((dbp), DB_AM_LOCKING) && (dbp)->txn == NULL ?		\
-	    lock_put((dbp)->dbenv->lk_info, lock) : 0)
+#define	__BT_LPUT(dbc, lock)						\
+	(F_ISSET((dbc)->dbp, DB_AM_LOCKING) ?				\
+	    lock_put((dbc)->dbp->dbenv->lk_info, lock) : 0)
+#define	__BT_TLPUT(dbc, lock)						\
+	(F_ISSET((dbc)->dbp, DB_AM_LOCKING) && (dbc)->txn == NULL ?	\
+	    lock_put((dbc)->dbp->dbenv->lk_info, lock) : 0)
 
 /*
- * Flags to __bt_search() and __rec_search().
+ * Flags to __bam_search() and __bam_rsearch().
  *
  * Note, internal page searches must find the largest record less than key in
  * the tree so that descents work.  Leaf page searches must find the smallest
@@ -113,22 +94,19 @@ struct __recno;		typedef struct __recno RECNO;
 #define	S_EXACT		0x00400		/* Exact items only. */
 #define	S_PARENT	0x00800		/* Lock page pair. */
 #define	S_STACK		0x01000		/* Need a complete stack. */
+#define	S_PAST_EOF	0x02000		/* If doing insert search (or keyfirst
+					 * or keylast operations), or a split
+					 * on behalf of an insert, it's okay to
+					 * return an entry one past end-of-page.
+					 */
 
 #define	S_DELETE	(S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT | S_STACK)
 #define	S_FIND		(S_READ | S_DUPFIRST | S_DELNO)
-#define	S_INSERT	(S_WRITE | S_DUPLAST | S_STACK)
-#define	S_KEYFIRST	(S_WRITE | S_DUPFIRST | S_STACK)
-#define	S_KEYLAST	(S_WRITE | S_DUPLAST | S_STACK)
-#define	S_WRPAIR	(S_WRITE | S_DUPLAST | S_PARENT)
-
-/*
- * If doing insert search (including keyfirst or keylast operations) or a
- * split search on behalf of an insert, it's okay to return the entry one
- * past the end of the page.
- */
-#define	PAST_END_OK(f)							\
-	((f) == S_INSERT ||						\
-	(f) == S_KEYFIRST || (f) == S_KEYLAST || (f) == S_WRPAIR)
+#define	S_FIND_WR	(S_WRITE | S_DUPFIRST | S_DELNO)
+#define	S_INSERT	(S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK)
+#define	S_KEYFIRST	(S_WRITE | S_DUPFIRST | S_PAST_EOF | S_STACK)
+#define	S_KEYLAST	(S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK)
+#define	S_WRPAIR	(S_WRITE | S_DUPLAST | S_PAST_EOF | S_PARENT)
 
 /*
  * Flags to __bam_iitem().
@@ -149,23 +127,32 @@ struct __epg {
 };
 
 /*
- * All cursors are queued from the master DB structure.  Convert the user's
- * DB reference to the master DB reference.  We lock the master DB mutex
- * so that we can walk the cursor queue.  There's no race in accessing the
- * cursors, because if we're modifying a page, we have a write lock on it,
- * and therefore no other thread than the current one can have a cursor that
- * references the page.
+ * We maintain a stack of the pages that we're locking in the tree.  Btree's
+ * (currently) only save two levels of the tree at a time, so the default
+ * stack is always large enough.  Recno trees have to lock the entire tree to
+ * do inserts/deletes, however.  Grow the stack as necessary.
  */
-#define	CURSOR_SETUP(dbp) {						\
-	(dbp) = (dbp)->master;						\
-	DB_THREAD_LOCK(dbp);						\
-}
-#define	CURSOR_TEARDOWN(dbp)						\
-	DB_THREAD_UNLOCK(dbp);
+#define	BT_STK_CLR(c)							\
+	((c)->csp = (c)->sp)
+
+#define	BT_STK_ENTER(c, pagep, page_indx, lock, ret) do {		\
+	if ((ret =							\
+	    (c)->csp == (c)->esp ? __bam_stkgrow(c) : 0) == 0) {	\
+		(c)->csp->page = pagep;					\
+		(c)->csp->indx = page_indx;				\
+		(c)->csp->lock = lock;					\
+	}								\
+} while (0)
+
+#define	BT_STK_PUSH(c, pagep, page_indx, lock, ret) do {		\
+	BT_STK_ENTER(c, pagep, page_indx, lock, ret);			\
+	++(c)->csp;							\
+} while (0)
+
+#define	BT_STK_POP(c)							\
+	((c)->csp == (c)->stack ? NULL : --(c)->csp)
 
 /*
- * Btree cursor.
- *
  * Arguments passed to __bam_ca_replace().
  */
 typedef enum {
@@ -173,9 +160,27 @@ typedef enum {
 	REPLACE_SUCCESS,
 	REPLACE_FAILED
 } ca_replace_arg;
+
+/* Arguments passed to __ram_ca(). */
+typedef enum {
+	CA_DELETE,
+	CA_IAFTER,
+	CA_IBEFORE
+} ca_recno_arg;
+
+#define	RECNO_OOB	0		/* Illegal record number. */
+
+/* Btree/Recno cursor. */
 struct __cursor {
 	DBC		*dbc;		/* Enclosing DBC. */
 
+	/* Per-thread information: shared by btree/recno. */
+	EPG		*sp;		/* Stack pointer. */
+	EPG	 	*csp;		/* Current stack entry. */
+	EPG		*esp;		/* End stack pointer. */
+	EPG		 stack[5];
+
+	/* Per-thread information: btree private. */
 	PAGE		*page;		/* Cursor page. */
 
 	db_pgno_t	 pgno;		/* Page. */
@@ -187,90 +192,25 @@ struct __cursor {
 	DB_LOCK		 lock;		/* Cursor read lock. */
 	db_lockmode_t	 mode;		/* Lock mode. */
 
-	/*
-	 * If a cursor record is deleted, the key/data pair has to remain on
-	 * the page so that subsequent inserts/deletes don't interrupt the
-	 * cursor progression through the file.  This results in interesting
-	 * cases when "standard" operations, e.g., dbp->put() are done in the
-	 * context of "deleted" cursors.
-	 *
-	 * C_DELETED -- The item referenced by the cursor has been "deleted"
-	 *		but not physically removed from the page.
-	 * C_REPLACE -- The "deleted" item referenced by a cursor has been
-	 *		replaced by a dbp->put(), so the cursor is no longer
-	 *		responsible for physical removal from the page.
-	 * C_REPLACE_SETUP --
-	 *		We are about to overwrite a "deleted" item, flag any
-	 *		cursors referencing it for transition to C_REPLACE
-	 *		state.
-	 */
-#define	C_DELETED	0x0001
-#define	C_REPLACE	0x0002
-#define	C_REPLACE_SETUP	0x0004
-
-	/*
-	 * Internal cursor held for DB->get; don't hold locks unless involved
-	 * in a TXN.
-	 */
-#define	C_INTERNAL	0x0008
-	u_int32_t	 flags;
-};
-
-/*
- * Recno cursor.
- *
- * Arguments passed to __ram_ca().
- */
-typedef enum {
-	CA_DELETE,
-	CA_IAFTER,
-	CA_IBEFORE
-} ca_recno_arg;
-struct __rcursor {
-	DBC		*dbc;		/* Enclosing DBC. */
-
+	/* Per-thread information: recno private. */
 	db_recno_t	 recno;		/* Current record number. */
 
 	/*
-	 * Cursors referencing "deleted" records are positioned between
-	 * two records, and so must be specially adjusted until they are
-	 * moved.
+	 * Btree:
+	 * We set a flag in the cursor structure if the underlying object has
+	 * been deleted.  It's not strictly necessary, we could get the same
+	 * information by looking at the page itself.
+	 *
+	 * Recno:
+	 * When renumbering recno databases during deletes, cursors referencing
+	 * "deleted" records end up positioned between two records, and so must
+	 * be specially adjusted on the next operation.
 	 */
-#define	CR_DELETED	0x0001		/* Record deleted. */
+#define	C_DELETED	0x0001		/* Record was deleted. */
 	u_int32_t	 flags;
 };
 
 /*
- * We maintain a stack of the pages that we're locking in the tree.  Btree's
- * (currently) only save two levels of the tree at a time, so the default
- * stack is always large enough.  Recno trees have to lock the entire tree to
- * do inserts/deletes, however.  Grow the stack as necessary.
- */
-#undef	BT_STK_CLR
-#define	BT_STK_CLR(t)							\
-	((t)->bt_csp = (t)->bt_sp)
-
-#undef	BT_STK_ENTER
-#define	BT_STK_ENTER(t, pagep, page_indx, lock, ret) do {		\
-	if ((ret =							\
-	    (t)->bt_csp == (t)->bt_esp ? __bam_stkgrow(t) : 0) == 0) {	\
-		(t)->bt_csp->page = pagep;				\
-		(t)->bt_csp->indx = page_indx;				\
-		(t)->bt_csp->lock = lock;				\
-	}								\
-} while (0)
-
-#undef	BT_STK_PUSH
-#define	BT_STK_PUSH(t, pagep, page_indx, lock, ret) do {		\
-	BT_STK_ENTER(t, pagep, page_indx, lock, ret);			\
-	++(t)->bt_csp;							\
-} while (0)
-
-#undef	BT_STK_POP
-#define	BT_STK_POP(t)							\
-	((t)->bt_csp == (t)->bt_stack ? NULL : --(t)->bt_csp)
-
-/*
  * The in-memory recno data structure.
  *
  * !!!
@@ -278,9 +218,6 @@ struct __rcursor {
  * are no transaction semantics associated with backing files, nor is there
  * any thread protection.
  */
-#undef	RECNO_OOB
-#define	RECNO_OOB	0		/* Illegal record number. */
-
 struct __recno {
 	int		 re_delim;	/* Variable-length delimiting byte. */
 	int		 re_pad;	/* Fixed-length padding byte. */
@@ -294,7 +231,7 @@ struct __recno {
 	void		*re_emap;	/* End of mapped space. */
 	size_t		 re_msize;	/* Size of mapped region. */
 					/* Recno input function. */
-	int (*re_irec) __P((DB *, db_recno_t));
+	int (*re_irec) __P((DBC *, db_recno_t));
 
 #define	RECNO_EOF	0x0001		/* EOF on backing source file. */
 #define	RECNO_MODIFIED	0x0002		/* Tree was modified. */
@@ -302,31 +239,11 @@ struct __recno {
 };
 
 /*
- * The in-memory btree data structure.
+ * The in-memory, per-tree btree data structure.
  */
 struct __btree {
-/*
- * These fields are per-thread and are initialized when the BTREE structure
- * is created.
- */
 	db_pgno_t	 bt_lpgno;	/* Last insert location. */
 
-	DBT		 bt_rkey;	/* Returned key. */
-	DBT		 bt_rdata;	/* Returned data. */
-
-	EPG		*bt_sp;		/* Stack pointer. */
-	EPG	 	*bt_csp;	/* Current stack entry. */
-	EPG		*bt_esp;	/* End stack pointer. */
-	EPG		 bt_stack[5];
-
-	RECNO		*bt_recno;	/* Private recno structure. */
-
-	DB_BTREE_LSTAT lstat;		/* Btree local statistics. */
-
-/*
- * These fields are copied from the original BTREE structure and never
- * change.
- */
 	db_indx_t 	 bt_maxkey;	/* Maximum keys per page. */
 	db_indx_t 	 bt_minkey;	/* Minimum keys per page. */
 
@@ -336,6 +253,8 @@ struct __btree {
 	    __P((const DBT *, const DBT *));
 
 	db_indx_t	 bt_ovflsize;	/* Maximum key/data on-page size. */
+
+	RECNO		*recno;		/* Private recno structure. */
 };
 
 #include "btree_auto.h"
diff --git a/db2/include/btree_ext.h b/db2/include/btree_ext.h
index b8a137364c..fbc2ed958f 100644
--- a/db2/include/btree_ext.h
+++ b/db2/include/btree_ext.h
@@ -1,45 +1,41 @@
 /* DO NOT EDIT: automatically built by dist/distrib. */
 #ifndef _btree_ext_h_
 #define _btree_ext_h_
-int __bam_close __P((DB *));
-int __bam_sync __P((DB *, u_int32_t));
-int __bam_cmp __P((DB *, const DBT *, EPG *));
+int __bam_cmp __P((DB *, const DBT *,
+   PAGE *, u_int32_t, int (*)(const DBT *, const DBT *)));
 int __bam_defcmp __P((const DBT *, const DBT *));
 size_t __bam_defpfx __P((const DBT *, const DBT *));
 int __bam_pgin __P((db_pgno_t, void *, DBT *));
 int __bam_pgout __P((db_pgno_t, void *, DBT *));
 int __bam_mswap __P((PAGE *));
-int __bam_cursor __P((DB *, DB_TXN *, DBC **));
-int __bam_c_iclose __P((DB *, DBC *));
-int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
-int __bam_ovfl_chk __P((DB *, CURSOR *, u_int32_t, int));
 int __bam_cprint __P((DB *));
-int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *, int));
+int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, int));
 void __bam_ca_di __P((DB *, db_pgno_t, u_int32_t, int));
 void __bam_ca_dup __P((DB *,
    db_pgno_t, u_int32_t, u_int32_t, db_pgno_t, u_int32_t));
-void __bam_ca_move __P((DB *, db_pgno_t, db_pgno_t));
-void __bam_ca_replace
-   __P((DB *, db_pgno_t, u_int32_t, ca_replace_arg));
+void __bam_ca_rsplit __P((DB *, db_pgno_t, db_pgno_t));
 void __bam_ca_split __P((DB *,
    db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int));
+int __bam_c_init __P((DBC *));
+int __bam_dup __P((DBC *, CURSOR *, u_int32_t, int));
 int __bam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
-int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
-int __bam_ditem __P((DB *, PAGE *, u_int32_t));
-int __bam_adjindx __P((DB *, PAGE *, u_int32_t, u_int32_t, int));
-int __bam_dpage __P((DB *, const DBT *));
-int __bam_open __P((DB *, DBTYPE, DB_INFO *));
-int __bam_bdup __P((DB *, DB *));
-int __bam_new __P((DB *, u_int32_t, PAGE **));
-int __bam_free __P((DB *, PAGE *));
-int __bam_lt __P((DB *));
-int __bam_lget __P((DB *, int, db_pgno_t, db_lockmode_t, DB_LOCK *));
-int __bam_lput __P((DB *, DB_LOCK));
-int __bam_pget __P((DB *, PAGE **, db_pgno_t *, u_int32_t));
-int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
-int __bam_iitem __P((DB *,
+int __bam_ditem __P((DBC *, PAGE *, u_int32_t));
+int __bam_adjindx __P((DBC *, PAGE *, u_int32_t, u_int32_t, int));
+int __bam_dpage __P((DBC *, const DBT *));
+int __bam_dpages __P((DBC *));
+int __bam_open __P((DB *, DB_INFO *));
+int __bam_close __P((DB *));
+void __bam_setovflsize __P((DB *));
+int __bam_read_root __P((DB *));
+int __bam_new __P((DBC *, u_int32_t, PAGE **));
+int __bam_lput __P((DBC *, DB_LOCK));
+int __bam_free __P((DBC *, PAGE *));
+int __bam_lt __P((DBC *));
+int __bam_lget
+   __P((DBC *, int, db_pgno_t, db_lockmode_t, DB_LOCK *));
+int __bam_iitem __P((DBC *,
    PAGE **, db_indx_t *, DBT *, DBT *, u_int32_t, u_int32_t));
-int __bam_ritem __P((DB *, PAGE *, u_int32_t, DBT *));
+int __bam_ritem __P((DBC *, PAGE *, u_int32_t, DBT *));
 int __bam_pg_alloc_recover
   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __bam_pg_free_recover
@@ -56,28 +52,24 @@ int __bam_cdel_recover
   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __bam_repl_recover
   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
-int __ram_open __P((DB *, DBTYPE, DB_INFO *));
-int __ram_cursor __P((DB *, DB_TXN *, DBC **));
+int __ram_open __P((DB *, DB_INFO *));
 int __ram_close __P((DB *));
-int __ram_c_iclose __P((DB *, DBC *));
+int __ram_c_del __P((DBC *, u_int32_t));
+int __ram_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
+int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
 void __ram_ca __P((DB *, db_recno_t, ca_recno_arg));
-int __ram_cprint __P((DB *));
-int __ram_getno __P((DB *, const DBT *, db_recno_t *, int));
-int __ram_snapshot __P((DB *));
-int __bam_rsearch __P((DB *, db_recno_t *, u_int32_t, int, int *));
-int __bam_adjust __P((DB *, BTREE *, int32_t));
-int __bam_nrecs __P((DB *, db_recno_t *));
+int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int));
+int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *));
+int __bam_adjust __P((DBC *, int32_t));
+int __bam_nrecs __P((DBC *, db_recno_t *));
 db_recno_t __bam_total __P((PAGE *));
-int __bam_search __P((DB *,
+int __bam_search __P((DBC *,
     const DBT *, u_int32_t, int, db_recno_t *, int *));
-int __bam_stkrel __P((DB *));
-int __bam_stkgrow __P((BTREE *));
-int __bam_split __P((DB *, void *));
-int __bam_broot __P((DB *, PAGE *, PAGE *, PAGE *));
-int __ram_root __P((DB *, PAGE *, PAGE *, PAGE *));
+int __bam_stkrel __P((DBC *, int));
+int __bam_stkgrow __P((CURSOR *));
+int __bam_split __P((DBC *, void *));
 int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t));
 int __bam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t));
-void __bam_add_mstat __P((DB_BTREE_LSTAT *, DB_BTREE_LSTAT *));
 int __bam_pg_alloc_log
     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
     u_int32_t, DB_LSN *, DB_LSN *, db_pgno_t,
diff --git a/db2/include/clib_ext.h b/db2/include/clib_ext.h
index f5510a1629..2566b849ce 100644
--- a/db2/include/clib_ext.h
+++ b/db2/include/clib_ext.h
@@ -37,12 +37,6 @@ void *memcpy __P((void *, const void *, size_t));
 #ifndef HAVE_MEMMOVE
 void *memmove __P((void *, const void *, size_t));
 #endif
-#ifndef HAVE_MEMCPY
-void *memcpy __P((void *, const void *, size_t));
-#endif
-#ifndef HAVE_MEMMOVE
-void *memmove __P((void *, const void *, size_t));
-#endif
 #ifndef HAVE_RAISE
 int raise __P((int));
 #endif
diff --git a/db2/include/common_ext.h b/db2/include/common_ext.h
index 4674f9ce01..33fb0cb218 100644
--- a/db2/include/common_ext.h
+++ b/db2/include/common_ext.h
@@ -5,26 +5,18 @@ int __db_appname __P((DB_ENV *,
    APPNAME, const char *, const char *, u_int32_t, int *, char **));
 int __db_apprec __P((DB_ENV *, u_int32_t));
 int __db_byteorder __P((DB_ENV *, int));
+int __db_fchk __P((DB_ENV *, const char *, u_int32_t, u_int32_t));
+int __db_fcchk
+   __P((DB_ENV *, const char *, u_int32_t, u_int32_t, u_int32_t));
+int __db_ferr __P((const DB_ENV *, const char *, int));
 #ifdef __STDC__
 void __db_err __P((const DB_ENV *dbenv, const char *fmt, ...));
 #else
 void __db_err();
 #endif
-int __db_panic __P((DB *));
-int __db_fchk __P((DB_ENV *, const char *, u_int32_t, u_int32_t));
-int __db_fcchk
-   __P((DB_ENV *, const char *, u_int32_t, u_int32_t, u_int32_t));
-int __db_cdelchk __P((const DB *, u_int32_t, int, int));
-int __db_cgetchk __P((const DB *, DBT *, DBT *, u_int32_t, int));
-int __db_cputchk __P((const DB *,
-   const DBT *, DBT *, u_int32_t, int, int));
-int __db_delchk __P((const DB *, DBT *, u_int32_t, int));
-int __db_getchk __P((const DB *, const DBT *, DBT *, u_int32_t));
-int __db_putchk
-   __P((const DB *, DBT *, const DBT *, u_int32_t, int, int));
-int __db_statchk __P((const DB *, u_int32_t));
-int __db_syncchk __P((const DB *, u_int32_t));
-int __db_ferr __P((const DB_ENV *, const char *, int));
+int __db_pgerr __P((DB *, db_pgno_t));
+int __db_pgfmt __P((DB *, db_pgno_t));
+int __db_panic __P((DB_ENV *, int));
 u_int32_t __db_log2 __P((u_int32_t));
 int __db_rattach __P((REGINFO *));
 int __db_rdetach __P((REGINFO *));
diff --git a/db2/include/db.h.src b/db2/include/db.h.src
deleted file mode 100644
index 97ad55693f..0000000000
--- a/db2/include/db.h.src
+++ /dev/null
@@ -1,994 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- *	Sleepycat Software.  All rights reserved.
- *
- *	@(#)db.h.src	10.131 (Sleepycat) 6/2/98
- */
-
-#ifndef _DB_H_
-#define	_DB_H_
-
-#ifndef __NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <stdio.h>
-#endif
-
-/*
- * XXX
- * MacOS: ensure that Metrowerks C makes enumeration types int sized.
- */
-#ifdef __MWERKS__
-#pragma enumsalwaysint on
-#endif
-
-/*
- * XXX
- * Handle function prototypes and the keyword "const".  This steps on name
- * space that DB doesn't control, but all of the other solutions are worse.
- *
- * XXX
- * While Microsoft's compiler is ANSI C compliant, it doesn't have _STDC_
- * defined by default, you specify a command line flag or #pragma to turn
- * it on.  Don't do that, however, because some of Microsoft's own header
- * files won't compile.
- */
-#undef	__P
-#if defined(__STDC__) || defined(__cplusplus) || defined(_MSC_VER)
-#define	__P(protos)	protos		/* ANSI C prototypes */
-#else
-#define	const
-#define	__P(protos)	()		/* K&R C preprocessor */
-#endif
-
-/*
- * !!!
- * DB needs basic information about specifically sized types.  If they're
- * not provided by the system, typedef them here.
- *
- * We protect them against multiple inclusion using __BIT_TYPES_DEFINED__,
- * as does BIND and Kerberos, since we don't know for sure what #include
- * files the user is using.
- *
- * !!!
- * We also provide the standard u_int, u_long etc., if they're not provided
- * by the system.
- */
-#ifndef	__BIT_TYPES_DEFINED__
-#define	__BIT_TYPES_DEFINED__
-@u_int8_decl@
-@int16_decl@
-@u_int16_decl@
-@int32_decl@
-@u_int32_decl@
-#endif
-
-@u_char_decl@
-@u_short_decl@
-@u_int_decl@
-@u_long_decl@
-
-#define	DB_VERSION_MAJOR	2
-#define	DB_VERSION_MINOR	4
-#define	DB_VERSION_PATCH	14
-#define	DB_VERSION_STRING	"Sleepycat Software: DB 2.4.14: (6/2/98)"
-
-typedef	u_int32_t	db_pgno_t;	/* Page number type. */
-typedef	u_int16_t	db_indx_t;	/* Page offset type. */
-#define	DB_MAX_PAGES	0xffffffff	/* >= # of pages in a file */
-
-typedef	u_int32_t	db_recno_t;	/* Record number type. */
-typedef size_t		DB_LOCK;	/* Object returned by lock manager. */
-#define	DB_MAX_RECORDS	0xffffffff	/* >= # of records in a tree */
-
-#define	DB_FILE_ID_LEN		20	/* DB file ID length. */
-
-/* Forward structure declarations, so applications get type checking. */
-struct __db;		typedef struct __db DB;
-#ifdef DB_DBM_HSEARCH
-			typedef struct __db DBM;
-#endif
-struct __db_bt_stat;	typedef struct __db_bt_stat DB_BTREE_STAT;
-struct __db_dbt;	typedef struct __db_dbt DBT;
-struct __db_env;	typedef struct __db_env DB_ENV;
-struct __db_info;	typedef struct __db_info DB_INFO;
-struct __db_lock_stat;	typedef struct __db_lock_stat DB_LOCK_STAT;
-struct __db_lockregion;	typedef struct __db_lockregion DB_LOCKREGION;
-struct __db_lockreq;	typedef struct __db_lockreq DB_LOCKREQ;
-struct __db_locktab;	typedef struct __db_locktab DB_LOCKTAB;
-struct __db_log;	typedef struct __db_log DB_LOG;
-struct __db_log_stat;	typedef struct __db_log_stat DB_LOG_STAT;
-struct __db_lsn;	typedef struct __db_lsn DB_LSN;
-struct __db_mpool;	typedef struct __db_mpool DB_MPOOL;
-struct __db_mpool_finfo;typedef struct __db_mpool_finfo DB_MPOOL_FINFO;
-struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT;
-struct __db_mpool_stat;	typedef struct __db_mpool_stat DB_MPOOL_STAT;
-struct __db_mpoolfile;	typedef struct __db_mpoolfile DB_MPOOLFILE;
-struct __db_txn;	typedef struct __db_txn DB_TXN;
-struct __db_txn_active;	typedef struct __db_txn_active DB_TXN_ACTIVE;
-struct __db_txn_stat;	typedef struct __db_txn_stat DB_TXN_STAT;
-struct __db_txnmgr;	typedef struct __db_txnmgr DB_TXNMGR;
-struct __db_txnregion;	typedef struct __db_txnregion DB_TXNREGION;
-struct __dbc;		typedef struct __dbc DBC;
-
-/* Key/data structure -- a Data-Base Thang. */
-struct __db_dbt {
-	void	 *data;			/* key/data */
-	u_int32_t size;			/* key/data length */
-	u_int32_t ulen;			/* RO: length of user buffer. */
-	u_int32_t dlen;			/* RO: get/put record length. */
-	u_int32_t doff;			/* RO: get/put record offset. */
-
-#define	DB_DBT_INTERNAL	0x01		/* Perform any mallocs using regular
-					   malloc, not the user's malloc. */
-#define	DB_DBT_MALLOC	0x02		/* Return in allocated memory. */
-#define	DB_DBT_PARTIAL	0x04		/* Partial put/get. */
-#define	DB_DBT_USERMEM	0x08		/* Return in user's memory. */
-	u_int32_t flags;
-};
-
-/*
- * DB internal configuration.
- *
- * There are a set of functions that the application can replace with its
- * own versions, and some other knobs which can be turned at run-time.
- */
-#define	DB_FUNC_CALLOC	 1	/* DELETED: ANSI C calloc. */
-#define	DB_FUNC_CLOSE	 2		/* POSIX 1003.1 close. */
-#define	DB_FUNC_DIRFREE	 3		/* DB: free directory list. */
-#define	DB_FUNC_DIRLIST	 4		/* DB: create directory list. */
-#define	DB_FUNC_EXISTS	 5		/* DB: return if file exists. */
-#define	DB_FUNC_FREE	 6		/* ANSI C free. */
-#define	DB_FUNC_FSYNC	 7		/* POSIX 1003.1 fsync. */
-#define	DB_FUNC_IOINFO	 8		/* DB: return file I/O information. */
-#define	DB_FUNC_MALLOC	 9		/* ANSI C malloc. */
-#define	DB_FUNC_MAP	10		/* DB: map file into shared memory. */
-#define	DB_FUNC_OPEN	11		/* POSIX 1003.1 open. */
-#define	DB_FUNC_READ	12		/* POSIX 1003.1 read. */
-#define	DB_FUNC_REALLOC	13		/* ANSI C realloc. */
-#define	DB_FUNC_SEEK	14		/* POSIX 1003.1 lseek. */
-#define	DB_FUNC_SLEEP	15		/* DB: sleep secs/usecs. */
-#define	DB_FUNC_STRDUP	16	/* DELETED: DB: strdup(3). */
-#define	DB_FUNC_UNLINK	17		/* POSIX 1003.1 unlink. */
-#define	DB_FUNC_UNMAP	18		/* DB: unmap shared memory file. */
-#define	DB_FUNC_WRITE	19		/* POSIX 1003.1 write. */
-#define	DB_FUNC_YIELD	20		/* DB: yield thread to scheduler. */
-#define	DB_TSL_SPINS	21		/* DB: initialize spin count. */
-#define	DB_FUNC_RUNLINK	22		/* DB: remove a shared region. */
-#define	DB_REGION_ANON	23		/* DB: anonymous, unnamed regions. */
-#define	DB_REGION_INIT	24		/* DB: page-fault regions in create. */
-#define	DB_REGION_NAME	25		/* DB: anonymous, named regions. */
-#define	DB_MUTEXLOCKS	26		/* DB: turn off all mutex locks. */
-#define	DB_PAGEYIELD	27		/* DB: yield the CPU on pool get. */
-
-/*
- * Database configuration and initialization.
- */
- /*
-  * Flags understood by both db_open(3) and db_appinit(3).
-  */
-#define	DB_CREATE	      0x000001	/* O_CREAT: create file as necessary. */
-#define	DB_NOMMAP	      0x000002	/* Don't mmap underlying file. */
-#define	DB_THREAD	      0x000004	/* Free-thread DB package handles. */
-
-/*
- * Flags understood by db_appinit(3).
- */
-/*			      0x000007	   COMMON MASK. */
-#define	DB_INIT_LOCK	      0x000008	/* Initialize locking. */
-#define	DB_INIT_LOG	      0x000010	/* Initialize logging. */
-#define	DB_INIT_MPOOL	      0x000020	/* Initialize mpool. */
-#define	DB_INIT_TXN	      0x000040	/* Initialize transactions. */
-#define	DB_MPOOL_PRIVATE      0x000080	/* Mpool: private memory pool. */
-#define	__UNUSED_100	      0x000100
-#define	DB_RECOVER	      0x000200	/* Run normal recovery. */
-#define	DB_RECOVER_FATAL      0x000400	/* Run catastrophic recovery. */
-#define	DB_TXN_NOSYNC	      0x000800	/* Do not sync log on commit. */
-#define	DB_USE_ENVIRON	      0x001000	/* Use the environment. */
-#define	DB_USE_ENVIRON_ROOT   0x002000	/* Use the environment if root. */
-
-/* CURRENTLY UNUSED LOCK FLAGS. */
-#define	DB_TXN_LOCK_2PL	      0x000000	/* Two-phase locking. */
-#define	DB_TXN_LOCK_OPTIMIST  0x000000	/* Optimistic locking. */
-#define	DB_TXN_LOCK_MASK      0x000000	/* Lock flags mask. */
-
-/* CURRENTLY UNUSED LOG FLAGS. */
-#define	DB_TXN_LOG_REDO	      0x000000	/* Redo-only logging. */
-#define	DB_TXN_LOG_UNDO	      0x000000	/* Undo-only logging. */
-#define	DB_TXN_LOG_UNDOREDO   0x000000	/* Undo/redo write-ahead logging. */
-#define	DB_TXN_LOG_MASK	      0x000000	/* Log flags mask. */
-
-/*
- * Flags understood by db_open(3).
- *
- * DB_EXCL and DB_TEMPORARY are internal only, and are not documented.
- * DB_SEQUENTIAL is currently internal, but may be exported some day.
- */
-/*			      0x000007	   COMMON MASK. */
-/*			      0x003fff	   ALREADY USED. */
-#define	__UNUSED_4000	      0x004000
-#define	DB_EXCL		      0x008000	/* O_EXCL: exclusive open. */
-#define	DB_RDONLY	      0x010000	/* O_RDONLY: read-only. */
-#define	DB_SEQUENTIAL	      0x020000	/* Indicate sequential access. */
-#define	DB_TEMPORARY	      0x040000	/* Remove on last close. */
-#define	DB_TRUNCATE	      0x080000	/* O_TRUNCATE: replace existing DB. */
-
-/*
- * Deadlock detector modes; used in the DBENV structure to configure the
- * locking subsystem.
- */
-#define	DB_LOCK_NORUN		0x0
-#define	DB_LOCK_DEFAULT		0x1	/* Default policy. */
-#define	DB_LOCK_OLDEST		0x2	/* Abort oldest transaction. */
-#define	DB_LOCK_RANDOM		0x3	/* Abort random transaction. */
-#define	DB_LOCK_YOUNGEST	0x4	/* Abort youngest transaction. */
-
-struct __db_env {
-	int		 db_lorder;	/* Byte order. */
-
-					/* Error message callback. */
-	void (*db_errcall) __P((const char *, char *));
-	FILE		*db_errfile;	/* Error message file stream. */
-	const char	*db_errpfx;	/* Error message prefix. */
-	int		 db_verbose;	/* Generate debugging messages. */
-
-	/* User paths. */
-	char		*db_home;	/* Database home. */
-	char		*db_log_dir;	/* Database log file directory. */
-	char		*db_tmp_dir;	/* Database tmp file directory. */
-
-	char	       **db_data_dir;	/* Database data file directories. */
-	int		 data_cnt;	/* Database data file slots. */
-	int		 data_next;	/* Next Database data file slot. */
-
-	/* Locking. */
-	DB_LOCKTAB	*lk_info;	/* Return from lock_open(). */
-	u_int8_t	*lk_conflicts;	/* Two dimensional conflict matrix. */
-	u_int32_t	 lk_modes;	/* Number of lock modes in table. */
-	u_int32_t	 lk_max;	/* Maximum number of locks. */
-	u_int32_t	 lk_detect;	/* Deadlock detect on all conflicts. */
-
-	/* Logging. */
-	DB_LOG		*lg_info;	/* Return from log_open(). */
-	u_int32_t	 lg_max;	/* Maximum file size. */
-
-	/* Memory pool. */
-	DB_MPOOL	*mp_info;	/* Return from memp_open(). */
-	size_t		 mp_mmapsize;	/* Maximum file size for mmap. */
-	size_t		 mp_size;	/* Bytes in the mpool cache. */
-
-	/* Transactions. */
-	DB_TXNMGR	*tx_info;	/* Return from txn_open(). */
-	u_int32_t	 tx_max;	/* Maximum number of transactions. */
-	int (*tx_recover)		/* Dispatch function for recovery. */
-	    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
-
-#define	DB_ENV_APPINIT		0x01	/* Paths initialized by db_appinit(). */
-#define	DB_ENV_STANDALONE	0x02	/* Test: freestanding environment. */
-#define	DB_ENV_THREAD		0x04	/* DB_ENV is multi-threaded. */
-	u_int32_t	 flags;		/* Flags. */
-};
-
-/*******************************************************
- * Access methods.
- *******************************************************/
-/*
- * XXX
- * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
- */
-typedef enum {
-	DB_BTREE=1,			/* B+tree. */
-	DB_HASH,			/* Extended Linear Hashing. */
-	DB_RECNO,			/* Fixed and variable-length records. */
-	DB_UNKNOWN			/* Figure it out on open. */
-} DBTYPE;
-
-#define	DB_BTREEVERSION	6		/* Current btree version. */
-#define	DB_BTREEOLDVER	6		/* Oldest btree version supported. */
-#define	DB_BTREEMAGIC	0x053162
-
-#define	DB_HASHVERSION	5		/* Current hash version. */
-#define	DB_HASHOLDVER	4		/* Oldest hash version supported. */
-#define	DB_HASHMAGIC	0x061561
-
-#define	DB_LOGVERSION	2		/* Current log version. */
-#define	DB_LOGOLDVER	2		/* Oldest log version supported. */
-#define	DB_LOGMAGIC	0x040988
-
-struct __db_info {
-	int		 db_lorder;	/* Byte order. */
-	size_t		 db_cachesize;	/* Underlying cache size. */
-	size_t		 db_pagesize;	/* Underlying page size. */
-
-					/* Local heap allocation. */
-	void *(*db_malloc) __P((size_t));
-
-	/* Btree access method. */
-	u_int32_t	 bt_maxkey;	/* Maximum keys per page. */
-	u_int32_t	 bt_minkey;	/* Minimum keys per page. */
-	int (*bt_compare)		/* Comparison function. */
-	    __P((const DBT *, const DBT *));
-	size_t (*bt_prefix)		/* Prefix function. */
-	    __P((const DBT *, const DBT *));
-
-	/* Hash access method. */
-	u_int32_t 	 h_ffactor;	/* Fill factor. */
-	u_int32_t	 h_nelem;	/* Number of elements. */
-	u_int32_t      (*h_hash)	/* Hash function. */
-	    __P((const void *, u_int32_t));
-
-	/* Recno access method. */
-	int		 re_pad;	/* Fixed-length padding byte. */
-	int		 re_delim;	/* Variable-length delimiting byte. */
-	u_int32_t	 re_len;	/* Length for fixed-length records. */
-	char		*re_source;	/* Source file name. */
-
-#define	DB_DELIMITER		0x0001	/* Recno: re_delim set. */
-#define	DB_DUP			0x0002	/* Btree, Hash: duplicate keys. */
-#define	DB_FIXEDLEN		0x0004	/* Recno: fixed-length records. */
-#define	DB_PAD			0x0008	/* Recno: re_pad set. */
-#define	DB_RECNUM		0x0010	/* Btree: record numbers. */
-#define	DB_RENUMBER		0x0020	/* Recno: renumber on insert/delete. */
-#define	DB_SNAPSHOT		0x0040	/* Recno: snapshot the input. */
-	u_int32_t	 flags;
-};
-
-/*
- * DB access method and cursor operation codes.  These are implemented as
- * bit fields for future flexibility, but currently only a single one may
- * be specified to any function.
- */
-#define	DB_AFTER	0x000001	/* c_put() */
-#define	DB_APPEND	0x000002	/* put() */
-#define	DB_BEFORE	0x000004	/* c_put() */
-#define	DB_CHECKPOINT	0x000008	/* log_put(), log_get() */
-#define	DB_CURRENT	0x000010	/* c_get(), c_put(), log_get() */
-#define	DB_FIRST	0x000020	/* c_get(), log_get() */
-#define	DB_FLUSH	0x000040	/* log_put() */
-#define	DB_GET_RECNO	0x000080	/* get(), c_get() */
-#define	DB_KEYFIRST	0x000100	/* c_put() */
-#define	DB_KEYLAST	0x000200	/* c_put() */
-#define	DB_LAST		0x000400	/* c_get(), log_get() */
-#define	DB_NEXT		0x000800	/* c_get(), log_get() */
-#define	DB_NOOVERWRITE	0x001000	/* put() */
-#define	DB_NOSYNC	0x002000	/* close() */
-#define	DB_PREV		0x004000	/* c_get(), log_get() */
-#define	DB_RECORDCOUNT	0x008000	/* stat() */
-#define	DB_SET		0x010000	/* c_get(), log_get() */
-#define	DB_SET_RANGE	0x020000	/* c_get() */
-#define	DB_SET_RECNO	0x040000	/* c_get() */
-#define	DB_CURLSN	0x080000	/* log_put() */
-
-/*
- * DB (user visible) error return codes.
- *
- * XXX
- * Changes to any of the user visible error return codes must be reflected
- * in java/src/com/sleepycat/db/Db.java.
- */
-#define	DB_INCOMPLETE		( -1)	/* Sync didn't finish. */
-#define	DB_KEYEMPTY		( -2)	/* The key/data pair was deleted or
-					   was never created by the user. */
-#define	DB_KEYEXIST		( -3)	/* The key/data pair already exists. */
-#define	DB_LOCK_DEADLOCK	( -4)	/* Locker killed to resolve deadlock. */
-#define	DB_LOCK_NOTGRANTED	( -5)	/* Lock unavailable, no-wait set. */
-#define	DB_LOCK_NOTHELD		( -6)	/* Lock not held by locker. */
-#define	DB_NOTFOUND		( -7)	/* Key/data pair not found (EOF). */
-
-/* DB (private) error return codes. */
-#define	DB_DELETED		( -8)	/* Recovery file marked deleted. */
-#define	DB_NEEDSPLIT		( -9)	/* Page needs to be split. */
-#define	DB_REGISTERED		(-10)	/* Entry was previously registered. */
-#define	DB_SWAPBYTES		(-11)	/* Database needs byte swapping. */
-#define DB_TXN_CKP		(-12)	/* Encountered ckp record in log. */
-
-struct __db_ilock {			/* Internal DB access method lock. */
-	db_pgno_t	pgno;		/* Page being locked. */
-					/* File id. */
-	u_int8_t	fileid[DB_FILE_ID_LEN];
-};
-
-/* DB access method description structure. */
-struct __db {
-	void	*mutexp;		/* Synchronization for free threading */
-	DBTYPE	 type;			/* DB access method. */
-	DB_ENV	*dbenv;			/* DB_ENV structure. */
-	DB_ENV	*mp_dbenv;		/* DB_ENV for local mpool creation. */
-
-	DB	*master;		/* Original DB created by db_open. */
-	void	*internal;		/* Access method private. */
-
-	DB_MPOOL	*mp;		/* The access method's mpool. */
-	DB_MPOOLFILE	*mpf;		/* The access method's mpool file. */
-
-	/*
-	 * XXX
-	 * Explicit representations of structures in queue.h.
-	 *
-	 * TAILQ_HEAD(curs_queue, __dbc);
-	 */
-	struct {
-		struct __dbc *tqh_first;
-		struct __dbc **tqh_last;
-	} curs_queue;
-
-	/*
-	 * XXX
-	 * Explicit representations of structures in queue.h.
-	 *
-	 * LIST_HEAD(handleq, __db);
-	 * LIST_ENTRY(__db);
-	 */
-	struct {
-		struct __db *lh_first;
-	} handleq;			/* List of handles for this DB. */
-	struct {
-		struct __db *le_next;
-		struct __db **le_prev;
-	} links;			/* Links for the handle list. */
-
-	u_int32_t log_fileid;		/* Logging file id. */
-
-	DB_TXN	 *txn;			/* Current transaction. */
-	u_int32_t locker;		/* Default process' locker id. */
-	DBT	  lock_dbt;		/* DBT referencing lock. */
-	struct __db_ilock lock;		/* Lock. */
-
-	size_t	  pgsize;		/* Logical page size of file. */
-
-					/* Local heap allocation. */
-	void *(*db_malloc) __P((size_t));
-
-					/* Functions. */
-	int (*close)	__P((DB *, u_int32_t));
-	int (*cursor)	__P((DB *, DB_TXN *, DBC **));
-	int (*del)	__P((DB *, DB_TXN *, DBT *, u_int32_t));
-	int (*fd)	__P((DB *, int *));
-	int (*get)	__P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
-	int (*put)	__P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
-	int (*stat)	__P((DB *, void *, void *(*)(size_t), u_int32_t));
-	int (*sync)	__P((DB *, u_int32_t));
-
-#define	DB_AM_DUP	0x000001	/* DB_DUP (internal). */
-#define	DB_AM_INMEM	0x000002	/* In-memory; no sync on close. */
-#define	DB_AM_LOCKING	0x000004	/* Perform locking. */
-#define	DB_AM_LOGGING	0x000008	/* Perform logging. */
-#define	DB_AM_MLOCAL	0x000010	/* Database memory pool is local. */
-#define	DB_AM_PGDEF	0x000020	/* Page size was defaulted. */
-#define	DB_AM_RDONLY	0x000040	/* Database is readonly. */
-#define	DB_AM_RECOVER	0x000080	/* In recovery (do not log or lock). */
-#define	DB_AM_SWAP	0x000100	/* Pages need to be byte-swapped. */
-#define	DB_AM_THREAD	0x000200	/* DB is multi-threaded. */
-#define	DB_BT_RECNUM	0x000400	/* DB_RECNUM (internal) */
-#define	DB_HS_DIRTYMETA 0x000800	/* Hash: Metadata page modified. */
-#define	DB_RE_DELIMITER	0x001000	/* DB_DELIMITER (internal). */
-#define	DB_RE_FIXEDLEN	0x002000	/* DB_FIXEDLEN (internal). */
-#define	DB_RE_PAD	0x004000	/* DB_PAD (internal). */
-#define	DB_RE_RENUMBER	0x008000	/* DB_RENUMBER (internal). */
-#define	DB_RE_SNAPSHOT	0x010000	/* DB_SNAPSHOT (internal). */
-	u_int32_t flags;
-};
-
-/* Cursor description structure. */
-struct __dbc {
-	DB *dbp;			/* Related DB access method. */
-	DB_TXN	 *txn;			/* Associated transaction. */
-
-	/*
-	 * XXX
-	 * Explicit representations of structures in queue.h.
-	 *
-	 * TAILQ_ENTRY(__dbc);
-	 */
-	struct {
-		struct __dbc *tqe_next;
-		struct __dbc **tqe_prev;
-	} links;
-
-	void	 *internal;		/* Access method private. */
-
-	int (*c_close)	__P((DBC *));
-	int (*c_del)	__P((DBC *, u_int32_t));
-	int (*c_get)	__P((DBC *, DBT *, DBT *, u_int32_t));
-	int (*c_put)	__P((DBC *, DBT *, DBT *, u_int32_t));
-};
-
-/* Btree/recno statistics structure. */
-struct __db_bt_stat {
-	u_int32_t bt_flags;		/* Open flags. */
-	u_int32_t bt_maxkey;		/* Maxkey value. */
-	u_int32_t bt_minkey;		/* Minkey value. */
-	u_int32_t bt_re_len;		/* Fixed-length record length. */
-	u_int32_t bt_re_pad;		/* Fixed-length record pad. */
-	u_int32_t bt_pagesize;		/* Page size. */
-	u_int32_t bt_levels;		/* Tree levels. */
-	u_int32_t bt_nrecs;		/* Number of records. */
-	u_int32_t bt_int_pg;		/* Internal pages. */
-	u_int32_t bt_leaf_pg;		/* Leaf pages. */
-	u_int32_t bt_dup_pg;		/* Duplicate pages. */
-	u_int32_t bt_over_pg;		/* Overflow pages. */
-	u_int32_t bt_free;		/* Pages on the free list. */
-	u_int32_t bt_freed;		/* Pages freed for reuse. */
-	u_int32_t bt_int_pgfree;	/* Bytes free in internal pages. */
-	u_int32_t bt_leaf_pgfree;	/* Bytes free in leaf pages. */
-	u_int32_t bt_dup_pgfree;	/* Bytes free in duplicate pages. */
-	u_int32_t bt_over_pgfree;	/* Bytes free in overflow pages. */
-	u_int32_t bt_pfxsaved;		/* Bytes saved by prefix compression. */
-	u_int32_t bt_split;		/* Total number of splits. */
-	u_int32_t bt_rootsplit;		/* Root page splits. */
-	u_int32_t bt_fastsplit;		/* Fast splits. */
-	u_int32_t bt_added;		/* Items added. */
-	u_int32_t bt_deleted;		/* Items deleted. */
-	u_int32_t bt_get;		/* Items retrieved. */
-	u_int32_t bt_cache_hit;		/* Hits in fast-insert code. */
-	u_int32_t bt_cache_miss;	/* Misses in fast-insert code. */
-	u_int32_t bt_magic;		/* Magic number. */
-	u_int32_t bt_version;		/* Version number. */
-};
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-int   db_appinit __P((const char *, char * const *, DB_ENV *, u_int32_t));
-int   db_appexit __P((DB_ENV *));
-int   db_jump_set __P((void *, int));
-int   db_open __P((const char *,
-	  DBTYPE, u_int32_t, int, DB_ENV *, DB_INFO *, DB **));
-int   db_value_set __P((int, int));
-char *db_version __P((int *, int *, int *));
-#if defined(__cplusplus)
-}
-#endif
-
-/*******************************************************
- * Locking
- *******************************************************/
-#define	DB_LOCKVERSION	1
-#define	DB_LOCKMAGIC	0x090193
-
-/* Flag values for lock_vec(). */
-#define	DB_LOCK_NOWAIT		0x01	/* Don't wait on unavailable lock. */
-
-/* Flag values for lock_detect(). */
-#define	DB_LOCK_CONFLICT	0x01	/* Run on any conflict. */
-
-/*
- * Request types.
- *
- * XXX
- * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
- */
-typedef enum {
-	DB_LOCK_DUMP=0,			/* Display held locks. */
-	DB_LOCK_GET,			/* Get the lock. */
-	DB_LOCK_PUT,			/* Release the lock. */
-	DB_LOCK_PUT_ALL,		/* Release locker's locks. */
-	DB_LOCK_PUT_OBJ			/* Release locker's locks on obj. */
-} db_lockop_t;
-
-/*
- * Simple R/W lock modes and for multi-granularity intention locking.
- *
- * XXX
- * Changes here must be reflected in java/src/com/sleepycat/db/Db.java.
- */
-typedef enum {
-	DB_LOCK_NG=0,			/* Not granted. */
-	DB_LOCK_READ,			/* Shared/read. */
-	DB_LOCK_WRITE,			/* Exclusive/write. */
-	DB_LOCK_IREAD,			/* Intent to share/read. */
-	DB_LOCK_IWRITE,			/* Intent exclusive/write. */
-	DB_LOCK_IWR			/* Intent to read and write. */
-} db_lockmode_t;
-
-/*
- * Status of a lock.
- */
-typedef enum {
-	DB_LSTAT_ABORTED,		/* Lock belongs to an aborted txn. */
-	DB_LSTAT_ERR,			/* Lock is bad. */
-	DB_LSTAT_FREE,			/* Lock is unallocated. */
-	DB_LSTAT_HELD,			/* Lock is currently held. */
-	DB_LSTAT_NOGRANT,		/* Lock was not granted. */
-	DB_LSTAT_PENDING,		/* Lock was waiting and has been
-					 * promoted; waiting for the owner
-					 * to run and upgrade it to held. */
-	DB_LSTAT_WAITING		/* Lock is on the wait queue. */
-} db_status_t;
-
-/* Lock request structure. */
-struct __db_lockreq {
-	db_lockop_t	 op;		/* Operation. */
-	db_lockmode_t	 mode;		/* Requested mode. */
-	u_int32_t	 locker;	/* Locker identity. */
-	DBT		*obj;		/* Object being locked. */
-	DB_LOCK		 lock;		/* Lock returned. */
-};
-
-/*
- * Commonly used conflict matrices.
- *
- * Standard Read/Write (or exclusive/shared) locks.
- */
-#define	DB_LOCK_RW_N	3
-extern const u_int8_t db_rw_conflicts[];
-
-/* Multi-granularity locking. */
-#define	DB_LOCK_RIW_N	6
-extern const u_int8_t db_riw_conflicts[];
-
-struct __db_lock_stat {
-	u_int32_t st_magic;		/* Lock file magic number. */
-	u_int32_t st_version;		/* Lock file version number. */
-	u_int32_t st_maxlocks;		/* Maximum number of locks in table. */
-	u_int32_t st_nmodes;		/* Number of lock modes. */
-	u_int32_t st_numobjs;		/* Number of objects. */
-	u_int32_t st_nlockers;		/* Number of lockers. */
-	u_int32_t st_nconflicts;	/* Number of lock conflicts. */
-	u_int32_t st_nrequests;		/* Number of lock gets. */
-	u_int32_t st_nreleases;		/* Number of lock puts. */
-	u_int32_t st_ndeadlocks;	/* Number of lock deadlocks. */
-	u_int32_t st_region_wait;	/* Region lock granted after wait. */
-	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
-	u_int32_t st_refcnt;		/* Region reference count. */
-	u_int32_t st_regsize;		/* Region size. */
-};
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-int	  lock_close __P((DB_LOCKTAB *));
-int	  lock_detect __P((DB_LOCKTAB *, u_int32_t, u_int32_t));
-int	  lock_get __P((DB_LOCKTAB *,
-	    u_int32_t, u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *));
-int	  lock_id __P((DB_LOCKTAB *, u_int32_t *));
-int	  lock_open __P((const char *,
-	    u_int32_t, int, DB_ENV *, DB_LOCKTAB **));
-int	  lock_put __P((DB_LOCKTAB *, DB_LOCK));
-int	  lock_stat __P((DB_LOCKTAB *, DB_LOCK_STAT **, void *(*)(size_t)));
-int	  lock_unlink __P((const char *, int, DB_ENV *));
-int	  lock_vec __P((DB_LOCKTAB *,
-	    u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **));
-#if defined(__cplusplus)
-}
-#endif
-
-/*******************************************************
- * Logging.
- *******************************************************/
-/* Flag values for log_archive(). */
-#define	DB_ARCH_ABS		0x001	/* Absolute pathnames. */
-#define	DB_ARCH_DATA		0x002	/* Data files. */
-#define	DB_ARCH_LOG		0x004	/* Log files. */
-
-/*
- * A DB_LSN has two parts, a fileid which identifies a specific file, and an
- * offset within that file.  The fileid is an unsigned 4-byte quantity that
- * uniquely identifies a file within the log directory -- currently a simple
- * counter inside the log.  The offset is also an unsigned 4-byte value.  The
- * log manager guarantees the offset is never more than 4 bytes by switching
- * to a new log file before the maximum length imposed by an unsigned 4-byte
- * offset is reached.
- */
-struct __db_lsn {
-	u_int32_t	file;		/* File ID. */
-	u_int32_t	offset;		/* File offset. */
-};
-
-/* Log statistics structure. */
-struct __db_log_stat {
-	u_int32_t st_magic;		/* Log file magic number. */
-	u_int32_t st_version;		/* Log file version number. */
-	int st_mode;			/* Log file mode. */
-	u_int32_t st_lg_max;		/* Maximum log file size. */
-	u_int32_t st_w_bytes;		/* Bytes to log. */
-	u_int32_t st_w_mbytes;		/* Megabytes to log. */
-	u_int32_t st_wc_bytes;		/* Bytes to log since checkpoint. */
-	u_int32_t st_wc_mbytes;		/* Megabytes to log since checkpoint. */
-	u_int32_t st_wcount;		/* Total syncs to the log. */
-	u_int32_t st_scount;		/* Total writes to the log. */
-	u_int32_t st_region_wait;	/* Region lock granted after wait. */
-	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
-	u_int32_t st_cur_file;		/* Current log file number. */
-	u_int32_t st_cur_offset;	/* Current log file offset. */
-	u_int32_t st_refcnt;		/* Region reference count. */
-	u_int32_t st_regsize;		/* Region size. */
-};
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-int	 log_archive __P((DB_LOG *, char **[], u_int32_t, void *(*)(size_t)));
-int	 log_close __P((DB_LOG *));
-int	 log_compare __P((const DB_LSN *, const DB_LSN *));
-int	 log_file __P((DB_LOG *, const DB_LSN *, char *, size_t));
-int	 log_flush __P((DB_LOG *, const DB_LSN *));
-int	 log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t));
-int	 log_open __P((const char *, u_int32_t, int, DB_ENV *, DB_LOG **));
-int	 log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
-int	 log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *));
-int	 log_stat __P((DB_LOG *, DB_LOG_STAT **, void *(*)(size_t)));
-int	 log_unlink __P((const char *, int, DB_ENV *));
-int	 log_unregister __P((DB_LOG *, u_int32_t));
-#if defined(__cplusplus)
-}
-#endif
-
-/*******************************************************
- * Mpool
- *******************************************************/
-/* Flag values for memp_fget(). */
-#define	DB_MPOOL_CREATE		0x001	/* Create a page. */
-#define	DB_MPOOL_LAST		0x002	/* Return the last page. */
-#define	DB_MPOOL_NEW		0x004	/* Create a new page. */
-
-/* Flag values for memp_fput(), memp_fset(). */
-#define	DB_MPOOL_CLEAN		0x001	/* Clear modified bit. */
-#define	DB_MPOOL_DIRTY		0x002	/* Page is modified. */
-#define	DB_MPOOL_DISCARD	0x004	/* Don't cache the page. */
-
-/* Mpool statistics structure. */
-struct __db_mpool_stat {
-	size_t st_cachesize;		/* Cache size. */
-	u_int32_t st_cache_hit;		/* Pages found in the cache. */
-	u_int32_t st_cache_miss;	/* Pages not found in the cache. */
-	u_int32_t st_map;		/* Pages from mapped files. */
-	u_int32_t st_page_create;	/* Pages created in the cache. */
-	u_int32_t st_page_in;		/* Pages read in. */
-	u_int32_t st_page_out;		/* Pages written out. */
-	u_int32_t st_ro_evict;		/* Clean pages forced from the cache. */
-	u_int32_t st_rw_evict;		/* Dirty pages forced from the cache. */
-	u_int32_t st_hash_buckets;	/* Number of hash buckets. */
-	u_int32_t st_hash_searches;	/* Total hash chain searches. */
-	u_int32_t st_hash_longest;	/* Longest hash chain searched. */
-	u_int32_t st_hash_examined;	/* Total hash entries searched. */
-	u_int32_t st_page_clean;	/* Clean pages. */
-	u_int32_t st_page_dirty;	/* Dirty pages. */
-	u_int32_t st_page_trickle;	/* Pages written by memp_trickle. */
-	u_int32_t st_region_wait;	/* Region lock granted after wait. */
-	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
-	u_int32_t st_refcnt;		/* Region reference count. */
-	u_int32_t st_regsize;		/* Region size. */
-};
-
-/* Mpool file open information structure. */
-struct __db_mpool_finfo {
-	int	   ftype;		/* File type. */
-	DBT	  *pgcookie;		/* Byte-string passed to pgin/pgout. */
-	u_int8_t  *fileid;		/* Unique file ID. */
-	int32_t	   lsn_offset;		/* LSN offset in page. */
-	u_int32_t  clear_len;		/* Cleared length on created pages. */
-};
-
-/* Mpool file statistics structure. */
-struct __db_mpool_fstat {
-	char *file_name;		/* File name. */
-	size_t st_pagesize;		/* Page size. */
-	u_int32_t st_cache_hit;		/* Pages found in the cache. */
-	u_int32_t st_cache_miss;	/* Pages not found in the cache. */
-	u_int32_t st_map;		/* Pages from mapped files. */
-	u_int32_t st_page_create;	/* Pages created in the cache. */
-	u_int32_t st_page_in;		/* Pages read in. */
-	u_int32_t st_page_out;		/* Pages written out. */
-};
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-int	memp_close __P((DB_MPOOL *));
-int	memp_fclose __P((DB_MPOOLFILE *));
-int	memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, u_int32_t, void *));
-int	memp_fopen __P((DB_MPOOL *, const char *,
-	    u_int32_t, int, size_t, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
-int	memp_fput __P((DB_MPOOLFILE *, void *, u_int32_t));
-int	memp_fset __P((DB_MPOOLFILE *, void *, u_int32_t));
-int	memp_fsync __P((DB_MPOOLFILE *));
-int	memp_open __P((const char *, u_int32_t, int, DB_ENV *, DB_MPOOL **));
-int	memp_register __P((DB_MPOOL *, int,
-	    int (*)(db_pgno_t, void *, DBT *),
-	    int (*)(db_pgno_t, void *, DBT *)));
-int	memp_stat __P((DB_MPOOL *,
-	    DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, void *(*)(size_t)));
-int	memp_sync __P((DB_MPOOL *, DB_LSN *));
-int	memp_trickle __P((DB_MPOOL *, int, int *));
-int	memp_unlink __P((const char *, int, DB_ENV *));
-#if defined(__cplusplus)
-}
-#endif
-
-/*******************************************************
- * Transactions.
- *******************************************************/
-#define	DB_TXNVERSION	1
-#define	DB_TXNMAGIC	0x041593
-
-/* Operations values to the tx_recover() function. */
-#define	DB_TXN_BACKWARD_ROLL	1	/* Read the log backwards. */
-#define	DB_TXN_FORWARD_ROLL	2	/* Read the log forwards. */
-#define	DB_TXN_OPENFILES	3	/* Read for open files. */
-#define	DB_TXN_REDO		4	/* Redo the operation. */
-#define	DB_TXN_UNDO		5	/* Undo the operation. */
-
-/* Internal transaction status values. */
-
-/* Transaction statistics structure. */
-struct __db_txn_active {
-	u_int32_t	txnid;		/* Transaction ID */
-	DB_LSN		lsn;		/* Lsn of the begin record */
-};
-
-struct __db_txn_stat {
-	DB_LSN	  st_last_ckp;		/* lsn of the last checkpoint */
-	DB_LSN	  st_pending_ckp;	/* last checkpoint did not finish */
-	time_t	  st_time_ckp;		/* time of last checkpoint */
-	u_int32_t st_last_txnid;	/* last transaction id given out */
-	u_int32_t st_maxtxns;	/* maximum number of active txns */
-	u_int32_t st_naborts;	/* number of aborted transactions */
-	u_int32_t st_nbegins;	/* number of begun transactions */
-	u_int32_t st_ncommits;	/* number of committed transactions */
-	u_int32_t st_nactive;	/* number of active transactions */
-	DB_TXN_ACTIVE
-		 *st_txnarray;	/* array of active transactions */
-	u_int32_t st_region_wait;	/* Region lock granted after wait. */
-	u_int32_t st_region_nowait;	/* Region lock granted without wait. */
-	u_int32_t st_refcnt;		/* Region reference count. */
-	u_int32_t st_regsize;		/* Region size. */
-};
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-int	  txn_abort __P((DB_TXN *));
-int	  txn_begin __P((DB_TXNMGR *, DB_TXN *, DB_TXN **));
-int	  txn_checkpoint __P((const DB_TXNMGR *, u_int32_t, u_int32_t));
-int	  txn_commit __P((DB_TXN *));
-int	  txn_close __P((DB_TXNMGR *));
-u_int32_t txn_id __P((DB_TXN *));
-int	  txn_open __P((const char *, u_int32_t, int, DB_ENV *, DB_TXNMGR **));
-int	  txn_prepare __P((DB_TXN *));
-int	  txn_stat __P((DB_TXNMGR *, DB_TXN_STAT **, void *(*)(size_t)));
-int	  txn_unlink __P((const char *, int, DB_ENV *));
-#if defined(__cplusplus)
-}
-#endif
-
-#ifndef DB_DBM_HSEARCH
-#define	DB_DBM_HSEARCH	0		/* No historic interfaces by default. */
-#endif
-#if DB_DBM_HSEARCH != 0
-/*******************************************************
- * Dbm/Ndbm historic interfaces.
- *******************************************************/
-#define	DBM_INSERT	0		/* Flags to dbm_store(). */
-#define	DBM_REPLACE	1
-
-/*
- * The db(3) support for ndbm(3) always appends this suffix to the
- * file name to avoid overwriting the user's original database.
- */
-#define	DBM_SUFFIX	".db"
-
-#if defined(_XPG4_2)
-typedef struct {
-	char *dptr;
-	size_t dsize;
-} datum;
-#else
-typedef struct {
-	char *dptr;
-	int dsize;
-} datum;
-#endif
-
-/*
- * Translate DBM calls into DB calls so that DB doesn't step on the
- * application's name space.
- *
- * The global variables dbrdonly, dirf and pagf were not retained when
- * 4BSD replaced the dbm interface with ndbm, and are not support here.
- */
-#define	dbminit(a)	__db_dbm_init(a)
-#if !defined(__cplusplus)
-#define	delete(a)	__db_dbm_delete(a)
-#endif
-#define	fetch(a)	__db_dbm_fetch(a)
-#define	firstkey	__db_dbm_firstkey
-#define	nextkey(a)	__db_dbm_nextkey(a)
-#define	store(a, b)	__db_dbm_store(a, b)
-
-/* Prototype the DB calls. */
-#if defined(__cplusplus)
-extern "C" {
-#endif
-int	 __db_dbm_init __P((char *));
-int	 __db_dbm_delete __P((datum));
-int	 __db_dbm_dbrdonly __P((void));
-int	 __db_dbm_dirf __P((void));
-datum	 __db_dbm_fetch __P((datum));
-datum	 __db_dbm_firstkey __P((void));
-datum	 __db_dbm_nextkey __P((datum));
-int	 __db_dbm_pagf __P((void));
-int	 __db_dbm_store __P((datum, datum));
-#if defined(__cplusplus)
-}
-#endif
-
-/*
- * Translate NDBM calls into DB calls so that DB doesn't step on the
- * application's name space.
- */
-#define	dbm_clearerr(a)		__db_ndbm_clearerr(a)
-#define	dbm_close(a)		__db_ndbm_close(a)
-#define	dbm_delete(a, b)	__db_ndbm_delete(a, b)
-#define	dbm_dirfno(a)		__db_ndbm_dirfno(a)
-#define	dbm_error(a)		__db_ndbm_error(a)
-#define	dbm_fetch(a, b)		__db_ndbm_fetch(a, b)
-#define	dbm_firstkey(a)		__db_ndbm_firstkey(a)
-#define	dbm_nextkey(a)		__db_ndbm_nextkey(a)
-#define	dbm_open(a, b, c)	__db_ndbm_open(a, b, c)
-#define	dbm_pagfno(a)		__db_ndbm_pagfno(a)
-#define	dbm_rdonly(a)		__db_ndbm_rdonly(a)
-#define	dbm_store(a, b, c, d)	__db_ndbm_store(a, b, c, d)
-
-/* Prototype the DB calls. */
-#if defined(__cplusplus)
-extern "C" {
-#endif
-int	 __db_ndbm_clearerr __P((DBM *));
-void	 __db_ndbm_close __P((DBM *));
-int	 __db_ndbm_delete __P((DBM *, datum));
-int	 __db_ndbm_dirfno __P((DBM *));
-int	 __db_ndbm_error __P((DBM *));
-datum	 __db_ndbm_fetch __P((DBM *, datum));
-datum	 __db_ndbm_firstkey __P((DBM *));
-datum	 __db_ndbm_nextkey __P((DBM *));
-DBM	*__db_ndbm_open __P((const char *, int, int));
-int	 __db_ndbm_pagfno __P((DBM *));
-int	 __db_ndbm_rdonly __P((DBM *));
-int	 __db_ndbm_store __P((DBM *, datum, datum, int));
-#if defined(__cplusplus)
-}
-#endif
-
-/*******************************************************
- * Hsearch historic interface.
- *******************************************************/
-typedef enum {
-	FIND, ENTER
-} ACTION;
-
-typedef struct entry {
-	char *key;
-	char *data;
-} ENTRY;
-
-/*
- * Translate HSEARCH calls into DB calls so that DB doesn't step on the
- * application's name space.
- */
-#define	hcreate(a)	__db_hcreate(a)
-#define	hdestroy	__db_hdestroy
-#define	hsearch(a, b)	__db_hsearch(a, b)
-
-/* Prototype the DB calls. */
-#if defined(__cplusplus)
-extern "C" {
-#endif
-int	 __db_hcreate __P((size_t));
-void	 __db_hdestroy __P((void));
-ENTRY	*__db_hsearch __P((ENTRY, ACTION));
-#if defined(__cplusplus)
-}
-#endif
-#endif /* DB_DBM_HSEARCH */
-
-/*
- * XXX
- * MacOS: Reset Metrowerks C enum sizes.
- */
-#ifdef __MWERKS__
-#pragma enumsalwaysint reset
-#endif
-#endif /* !_DB_H_ */
diff --git a/db2/include/db_am.h b/db2/include/db_am.h
index 0c189244a2..fe2176d772 100644
--- a/db2/include/db_am.h
+++ b/db2/include/db_am.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_am.h	10.9 (Sleepycat) 4/10/98
+ *	@(#)db_am.h	10.15 (Sleepycat) 11/22/98
  */
 #ifndef _DB_AM_H
 #define _DB_AM_H
@@ -16,6 +16,8 @@
 #define	DB_REM_BIG	0x40
 #define	DB_SPLITOLD	0x50
 #define	DB_SPLITNEW	0x60
+#define	DB_ADD_PAGE	0x70
+#define	DB_REM_PAGE	0x80
 
 /*
  * Standard initialization and shutdown macros for all recovery functions.
@@ -27,34 +29,31 @@
  *	int ret;
  */
 #define	REC_INTRO(func) {						\
-	file_dbp = mdbp = NULL;						\
+	file_dbp = NULL;						\
+	dbc = NULL;							\
 	if ((ret = func(dbtp->data, &argp)) != 0)			\
 		goto out;						\
-	if ((ret = __db_fileid_to_db(logp, &mdbp, argp->fileid)) != 0) {\
-		if (ret	== DB_DELETED)					\
+	if ((ret =							\
+	    __db_fileid_to_db(logp, &file_dbp, argp->fileid)) != 0) {	\
+		if (ret	== DB_DELETED) {				\
 			ret = 0;					\
+			goto done;					\
+		}							\
 		goto out;						\
 	}								\
-	if (mdbp == NULL)						\
+	if (file_dbp == NULL)						\
 		goto out;						\
-	if (F_ISSET(mdbp, DB_AM_THREAD)) {				\
-		if ((ret = __db_gethandle(mdbp,				\
-		    mdbp->type == DB_HASH ? __ham_hdup : __bam_bdup,	\
-		    &file_dbp)) != 0)					\
-			goto out;					\
-	} else								\
-		file_dbp = mdbp;					\
-	F_SET(file_dbp, DB_AM_RECOVER);					\
+	if ((ret = file_dbp->cursor(file_dbp, NULL, &dbc, 0)) != 0)	\
+		goto out;						\
+	F_SET(dbc, DBC_RECOVER);					\
 	mpf = file_dbp->mpf;						\
 }
+
 #define	REC_CLOSE {							\
 	if (argp != NULL)						\
-		__db_free(argp);					\
-	if (file_dbp != NULL) {						\
-		F_CLR(file_dbp, DB_AM_RECOVER);				\
-		if (F_ISSET(file_dbp, DB_AM_THREAD))			\
-			__db_puthandle(file_dbp);			\
-	}								\
+		__os_free(argp, sizeof(*argp));				\
+	if (dbc != NULL)						\
+		dbc->c_close(dbc);					\
 	return (ret);							\
 }
 
@@ -67,7 +66,7 @@
 }
 #define	REC_NOOP_CLOSE {						\
 	if (argp != NULL)						\
-		__db_free(argp);					\
+		__os_free(argp, sizeof(*argp));				\
 	return (ret);							\
 }
 
diff --git a/db2/include/db_auto.h b/db2/include/db_auto.h
index 1b07c748e8..0d1e43a26a 100644
--- a/db2/include/db_auto.h
+++ b/db2/include/db_auto.h
@@ -70,6 +70,7 @@ typedef struct _db_relink_args {
 	u_int32_t type;
 	DB_TXN *txnid;
 	DB_LSN prev_lsn;
+	u_int32_t	opcode;
 	u_int32_t	fileid;
 	db_pgno_t	pgno;
 	DB_LSN 	lsn;
@@ -107,16 +108,4 @@ typedef struct _db_debug_args {
 	u_int32_t	arg_flags;
 } __db_debug_args;
 
-
-#define	DB_db_noop	(DB_db_BEGIN + 8)
-
-typedef struct _db_noop_args {
-	u_int32_t type;
-	DB_TXN *txnid;
-	DB_LSN prev_lsn;
-	u_int32_t	fileid;
-	db_pgno_t	pgno;
-	DB_LSN 	prevlsn;
-} __db_noop_args;
-
 #endif
diff --git a/db2/include/db_cxx.h b/db2/include/db_cxx.h
index fc04d5d66b..f415d594b5 100644
--- a/db2/include/db_cxx.h
+++ b/db2/include/db_cxx.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_cxx.h	10.17 (Sleepycat) 5/2/98
+ *	@(#)db_cxx.h	10.30 (Sleepycat) 11/22/98
  */
 
 #ifndef _DB_CXX_H_
@@ -49,7 +49,8 @@
 // Forward declarations
 //
 
-#include "db.h"
+#include <iostream.h>
+#include <db.h>
 
 class Db;                                        // forward
 class Dbc;                                       // forward
@@ -66,6 +67,19 @@ class Dbt;                                       // forward
 class DbTxn;                                     // forward
 class DbTxnMgr;                                  // forward
 
+// These classes are not defined here and should be invisible
+// to the user, but some compilers require forward references.
+// There is one for each use of the DEFINE_DB_CLASS macro.
+
+class DbLockTabImp;
+class DbLogImp;
+class DbMpoolImp;
+class DbMpoolFileImp;
+class DbImp;
+class DbTxnImp;
+class DbTxnMgrImp;
+
+
 ////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////
 //
@@ -175,15 +189,11 @@ private:
 
 class _exported DbLock
 {
-    friend DbLockTab;
+    friend class DbLockTab;
 
 public:
-    DbLock(u_int);
     DbLock();
 
-    u_int get_lock_id();
-    void set_lock_id(u_int);
-
     int put(DbLockTab *locktab);
 
     DbLock(const DbLock &);
@@ -194,18 +204,21 @@ protected:
     // since its contained class is not allocated by db.
     // (see comment at top)
 
+    DbLock(DB_LOCK);
     DB_LOCK lock_;
 };
 
 class _exported DbLockTab
 {
-friend DbEnv;
+    friend class DbEnv;
+
 public:
     int close();
     int detect(u_int32_t flags, int atype);
     int get(u_int32_t locker, u_int32_t flags, const Dbt *obj,
             db_lockmode_t lock_mode, DbLock *lock);
     int id(u_int32_t *idp);
+    int stat(DB_LOCK_STAT **statp, void *(*db_malloc)(size_t));
     int vec(u_int32_t locker, u_int32_t flags, DB_LOCKREQ list[],
 	    int nlist, DB_LOCKREQ **elistp);
 
@@ -244,13 +257,14 @@ private:
 
 class _exported DbLsn : protected DB_LSN
 {
-    friend DbLog;               // friendship needed to cast to base class
-    friend DbMpool;
+    friend class DbLog;          // friendship needed to cast to base class
+    friend class DbMpool;
 };
 
 class _exported DbLog
 {
-friend DbEnv;
+    friend class DbEnv;
+
 public:
     int archive(char **list[], u_int32_t flags, void *(*db_malloc)(size_t));
     int close();
@@ -300,7 +314,8 @@ private:
 
 class _exported DbMpoolFile
 {
-friend DbEnv;
+    friend class DbEnv;
+
 public:
     int close();
     int get(db_pgno_t *pgnoaddr, u_int32_t flags, void *pagep);
@@ -337,7 +352,8 @@ private:
 
 class _exported DbMpool
 {
-friend DbEnv;
+    friend class DbEnv;
+
 public:
     int close();
 
@@ -388,7 +404,8 @@ private:
 
 class _exported DbTxnMgr
 {
-friend DbEnv;
+    friend class DbEnv;
+
 public:
     int begin(DbTxn *pid, DbTxn **tid);
     int checkpoint(u_int32_t kbyte, u_int32_t min) const;
@@ -422,7 +439,8 @@ private:
 
 class _exported DbTxn
 {
-friend DbTxnMgr;
+    friend class DbTxnMgr;
+
 public:
     int abort();
     int commit();
@@ -461,90 +479,78 @@ private:
 //
 class _exported DbInfo : protected DB_INFO
 {
-    friend DbEnv;
-    friend Db;
+    friend class DbEnv;
+    friend class Db;
 
 public:
     DbInfo();
     ~DbInfo();
 
     // Byte order.
-    int	get_lorder() const;
     void set_lorder(int);
 
     // Underlying cache size.
-    size_t get_cachesize() const;
     void set_cachesize(size_t);
 
     // Underlying page size.
-    size_t get_pagesize() const;
     void set_pagesize(size_t);
 
     // Local heap allocation.
     typedef void *(*db_malloc_fcn)(size_t);
-    db_malloc_fcn get_malloc() const;
     void set_malloc(db_malloc_fcn);
 
+    // Duplicate compare function.
+    typedef int (*dup_compare_fcn)(const DBT *, const DBT *);
+    void set_dup_compare(dup_compare_fcn);
+
     ////////////////////////////////////////////////////////////////
     // Btree access method.
 
     // Maximum keys per page.
-    int	get_bt_maxkey() const;
     void set_bt_maxkey(int);
 
     // Minimum keys per page.
-    int	get_bt_minkey() const;
     void set_bt_minkey(int);
 
     // Comparison function.
     typedef int (*bt_compare_fcn)(const DBT *, const DBT *);
-    bt_compare_fcn get_bt_compare() const;
     void set_bt_compare(bt_compare_fcn);
 
     // Prefix function.
     typedef size_t (*bt_prefix_fcn)(const DBT *, const DBT *);
-    bt_prefix_fcn get_bt_prefix() const;
     void set_bt_prefix(bt_prefix_fcn);
 
     ////////////////////////////////////////////////////////////////
     // Hash access method.
 
     // Fill factor.
-    u_int32_t get_h_ffactor() const;
     void set_h_ffactor(u_int32_t);
 
     // Number of elements.
-    u_int32_t get_h_nelem() const;
     void set_h_nelem(u_int32_t);
 
     // Hash function.
     typedef u_int32_t (*h_hash_fcn)(const void *, u_int32_t);
-    h_hash_fcn get_h_hash() const;
     void set_h_hash(h_hash_fcn);
 
     ////////////////////////////////////////////////////////////////
     // Recno access method.
 
     // Fixed-length padding byte.
-    int	get_re_pad() const;
     void set_re_pad(int);
 
     // Variable-length delimiting byte.
-    int	get_re_delim() const;
     void set_re_delim(int);
 
     // Length for fixed-length records.
-    u_int32_t get_re_len() const;
     void set_re_len(u_int32_t);
 
     // Source file name.
-    char *get_re_source() const;
     void set_re_source(char *);
 
     // Note: some flags are set as side effects of calling
     // above "set" methods.
     //
-    u_int32_t get_flags() const;
     void set_flags(u_int32_t);
 
 
@@ -570,11 +576,11 @@ private:
 //
 class _exported DbEnv : protected DB_ENV
 {
-friend DbTxnMgr;
-friend DbLog;
-friend DbLockTab;
-friend DbMpool;
-friend Db;
+    friend class DbTxnMgr;
+    friend class DbLog;
+    friend class DbLockTab;
+    friend class DbMpool;
+    friend class Db;
 
 public:
 
@@ -603,6 +609,10 @@ public:
     //
     int appexit();
 
+    // Version information.  A static method so it can be obtained anytime.
+    //
+    static char *version(int *major, int *minor, int *patch);
+
     ////////////////////////////////////////////////////////////////
     // simple get/set access methods
     //
@@ -610,74 +620,41 @@ public:
     // use the default constructor along with appinit().
 
     // Byte order.
-    int	get_lorder() const;
     void set_lorder(int);
 
+    // Panic callback.
+    typedef void (*db_paniccall_fcn)(DbEnv *, int);
+    void set_paniccall(db_paniccall_fcn);
+
     // Error message callback.
     typedef void (*db_errcall_fcn)(const char *, char *);
-    db_errcall_fcn get_errcall() const;
     void set_errcall(db_errcall_fcn);
 
     // Error message file stream.
-    FILE *get_errfile() const;
     void set_errfile(FILE *);
 
     // Error message prefix.
-    const char *get_errpfx() const;
     void set_errpfx(const char *);
 
     // Generate debugging messages.
-    int get_verbose() const;
     void set_verbose(int);
 
     ////////////////////////////////////////////////////////////////
-    // User paths.
-
-    // Database home.
-    char *get_home() const;
-    void set_home(char *);
-
-    // Database log file directory.
-    char *get_log_dir() const;
-    void set_log_dir(char *);
-
-    // Database tmp file directory.
-    char *get_tmp_dir() const;
-    void set_tmp_dir(char *);
-
-    // Database data file directories.
-    char **get_data_dir() const;
-    void set_data_dir(char **);
-
-    // Database data file slots.
-    int get_data_cnt() const;
-    void set_data_cnt(int);
-
-    // Next Database data file slot.
-    int get_data_next() const;
-    void set_data_next(int);
-
-
-    ////////////////////////////////////////////////////////////////
     // Locking.
 
     // Return from lock_open().
     DbLockTab *get_lk_info() const;
 
     // Two dimensional conflict matrix.
-    u_int8_t *get_lk_conflicts() const;
     void set_lk_conflicts(u_int8_t *);
 
     // Number of lock modes in table.
-    int get_lk_modes() const;
     void set_lk_modes(int);
 
     // Maximum number of locks.
-    u_int32_t get_lk_max() const;
     void set_lk_max(u_int32_t);
 
     // Deadlock detect on every conflict.
-    u_int32_t get_lk_detect() const;
     void set_lk_detect(u_int32_t);
 
 
@@ -688,7 +665,6 @@ public:
     DbLog *get_lg_info() const;
 
     // Maximum file size.
-    u_int32_t get_lg_max() const;
     void set_lg_max(u_int32_t);
 
 
@@ -699,11 +675,9 @@ public:
     DbMpool *get_mp_info() const;
 
     // Maximum file size for mmap.
-    size_t get_mp_mmapsize() const;
     void set_mp_mmapsize(size_t);
 
     // Bytes in the mpool cache.
-    size_t get_mp_size() const;
     void set_mp_size(size_t);
 
 
@@ -714,16 +688,13 @@ public:
     DbTxnMgr *get_tx_info() const;
 
     // Maximum number of transactions.
-    u_int32_t get_tx_max() const;
     void set_tx_max(u_int32_t);
 
     // Dispatch function for recovery.
     typedef int (*tx_recover_fcn)(DB_LOG *, DBT *, DB_LSN *, int, void *);
-    tx_recover_fcn get_tx_recover() const;
     void set_tx_recover(tx_recover_fcn);
 
     // Flags.
-    u_int32_t get_flags() const;
     void set_flags(u_int32_t);
 
     ////////////////////////////////////////////////////////////////
@@ -736,7 +707,6 @@ public:
     //
     enum ErrorModel { Exception, ErrorReturn };
     void set_error_model(ErrorModel);
-    ErrorModel get_error_model() const;
 
     // If an error is detected and the error call function
     // or stream is set, a message is dispatched or printed.
@@ -747,11 +717,11 @@ public:
     // call set_error_stream() to force all errors to a C++ stream.
     // It is unwise to mix these approaches.
     //
-    class ostream* get_error_stream() const;
     void set_error_stream(class ostream*);
 
     // used internally
-    static int runtime_error(const char *caller, int err, int in_destructor = 0);
+    static int runtime_error(const char *caller, int err,
+                             int in_destructor = 0, int force_throw = 0);
 
 private:
     // We can add data to this class if needed
@@ -778,23 +748,27 @@ private:
 //
 class _exported Db
 {
-    friend DbEnv;
+    friend class DbEnv;
 
 public:
     int close(u_int32_t flags);
-    int cursor(DbTxn *txnid, Dbc **cursorp);
+    int cursor(DbTxn *txnid, Dbc **cursorp, u_int32_t flags);
     int del(DbTxn *txnid, Dbt *key, u_int32_t flags);
     int fd(int *fdp);
     int get(DbTxn *txnid, Dbt *key, Dbt *data, u_int32_t flags);
+    int join(Dbc **curslist, u_int32_t flags, Dbc **dbcp);
     int put(DbTxn *txnid, Dbt *key, Dbt *data, u_int32_t flags);
     int stat(void *sp, void *(*db_malloc)(size_t), u_int32_t flags);
     int sync(u_int32_t flags);
 
+    int get_byteswapped() const;
     DBTYPE get_type() const;
 
     static int open(const char *fname, DBTYPE type, u_int32_t flags,
                     int mode, DbEnv *dbenv, DbInfo *info, Db **dbpp);
 
+    static int xa_open(const char *fname, DBTYPE type, u_int32_t flags,
+                    int mode, DbInfo *info, Db **dbpp);
 private:
     // We can add data to this class if needed
     // since it is implemented via a pointer.
@@ -817,11 +791,11 @@ private:
 //
 class _exported Dbt : private DBT
 {
-    friend Dbc;
-    friend Db;
-    friend DbLog;
-    friend DbMpoolFile;
-    friend DbLockTab;
+    friend class Dbc;
+    friend class Db;
+    friend class DbLog;
+    friend class DbMpoolFile;
+    friend class DbLockTab;
 
 public:
 
@@ -863,7 +837,7 @@ private:
 
 class _exported Dbc : protected DBC
 {
-    friend Db;
+    friend class Db;
 
 public:
     int close();
diff --git a/db2/include/db_ext.h b/db2/include/db_ext.h
index 8a03db9f64..1ad1643bfa 100644
--- a/db2/include/db_ext.h
+++ b/db2/include/db_ext.h
@@ -1,8 +1,11 @@
 /* DO NOT EDIT: automatically built by dist/distrib. */
 #ifndef _db_ext_h_
 #define _db_ext_h_
-int __db_pgerr __P((DB *, db_pgno_t));
-int __db_pgfmt __P((DB *, db_pgno_t));
+int __db_close __P((DB *, u_int32_t));
+int __db_init_wrapper __P((DB *));
+int __db_cprint __P((DB *));
+int __db_c_destroy __P((DBC *));
+int __db_sync __P((DB *, u_int32_t));
 int __db_addrem_log
     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
     u_int32_t, u_int32_t, db_pgno_t, u_int32_t,
@@ -33,8 +36,8 @@ int __db_ovref_print
 int __db_ovref_read __P((void *, __db_ovref_args **));
 int __db_relink_log
     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
-    u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
-    DB_LSN *, db_pgno_t, DB_LSN *));
+    u_int32_t, u_int32_t, db_pgno_t, DB_LSN *,
+    db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *));
 int __db_relink_print
    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __db_relink_read __P((void *, __db_relink_args **));
@@ -52,12 +55,6 @@ int __db_debug_log
 int __db_debug_print
    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __db_debug_read __P((void *, __db_debug_args **));
-int __db_noop_log
-    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
-    u_int32_t, db_pgno_t, DB_LSN *));
-int __db_noop_print
-   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
-int __db_noop_read __P((void *, __db_noop_args **));
 int __db_init_print __P((DB_ENV *));
 int __db_init_recover __P((DB_ENV *));
 int __db_pgin __P((db_pgno_t, size_t, void *));
@@ -71,23 +68,40 @@ int __db_txnlist_find __P((void *, u_int32_t));
 void __db_txnlist_end __P((void *));
 void __db_txnlist_gen __P((void *, int));
 void __db_txnlist_print __P((void *));
-int __db_dput __P((DB *,
-   DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **)));
-int __db_drem __P((DB *,
-   PAGE **, u_int32_t, int (*)(DB *, PAGE *)));
-int __db_dend __P((DB *, db_pgno_t, PAGE **));
- int __db_ditem __P((DB *, PAGE *, u_int32_t, u_int32_t));
+int __db_dput __P((DBC *, DBT *,
+   PAGE **, db_indx_t *, int (*)(DBC *, u_int32_t, PAGE **)));
+int __db_drem __P((DBC *,
+   PAGE **, u_int32_t, int (*)(DBC *, PAGE *)));
+int __db_dend __P((DBC *, db_pgno_t, PAGE **));
+ int __db_ditem __P((DBC *, PAGE *, u_int32_t, u_int32_t));
 int __db_pitem
-    __P((DB *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *));
-int __db_relink __P((DB *, PAGE *, PAGE **, int));
-int __db_ddup __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
+    __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *));
+int __db_relink __P((DBC *, u_int32_t, PAGE *, PAGE **, int));
+int __db_ddup __P((DBC *, db_pgno_t, int (*)(DBC *, PAGE *)));
+int __db_dsearch __P((DBC *,
+    int, DBT *, db_pgno_t, db_indx_t *, PAGE **, int *));
+int __db_cdelchk __P((const DB *, u_int32_t, int, int));
+int __db_cgetchk __P((const DB *, DBT *, DBT *, u_int32_t, int));
+int __db_cputchk __P((const DB *,
+   const DBT *, DBT *, u_int32_t, int, int));
+int __db_closechk __P((const DB *, u_int32_t));
+int __db_delchk __P((const DB *, DBT *, u_int32_t, int));
+int __db_getchk __P((const DB *, const DBT *, DBT *, u_int32_t));
+int __db_joinchk __P((const DB *, u_int32_t));
+int __db_putchk
+   __P((const DB *, DBT *, const DBT *, u_int32_t, int, int));
+int __db_statchk __P((const DB *, u_int32_t));
+int __db_syncchk __P((const DB *, u_int32_t));
+int __db_eopnotsup __P((const DB_ENV *));
+int __db_join __P((DB *, DBC **, u_int32_t, DBC **));
 int __db_goff __P((DB *, DBT *,
     u_int32_t, db_pgno_t, void **, u_int32_t *));
-int __db_poff __P((DB *, const DBT *, db_pgno_t *,
-    int (*)(DB *, u_int32_t, PAGE **)));
-int __db_ovref __P((DB *, db_pgno_t, int32_t));
-int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
-int __db_moff __P((DB *, const DBT *, db_pgno_t));
+int __db_poff __P((DBC *, const DBT *, db_pgno_t *,
+    int (*)(DBC *, u_int32_t, PAGE **)));
+int __db_ovref __P((DBC *, db_pgno_t, int32_t));
+int __db_doff __P((DBC *, db_pgno_t, int (*)(DBC *, PAGE *)));
+int __db_moff __P((DB *, const DBT *, db_pgno_t, u_int32_t,
+    int (*)(const DBT *, const DBT *), int *));
 void __db_loadme __P((void));
 FILE *__db_prinit __P((FILE *));
 int __db_dump __P((DB *, char *, int));
@@ -111,11 +125,8 @@ int __db_relink_recover
 int __db_addpage_recover
    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __db_debug_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
-int __db_noop_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __db_ret __P((DB *,
    PAGE *, u_int32_t, DBT *, void **, u_int32_t *));
 int __db_retcopy __P((DBT *,
    void *, u_int32_t, void **, u_int32_t *, void *(*)(size_t)));
-int __db_gethandle __P((DB *, int (*)(DB *, DB *), DB **));
-int __db_puthandle __P((DB *));
 #endif /* _db_ext_h_ */
diff --git a/db2/include/db_int.h.src b/db2/include/db_int.h.src
deleted file mode 100644
index d67e2c428c..0000000000
--- a/db2/include/db_int.h.src
+++ /dev/null
@@ -1,402 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998
- *	Sleepycat Software.  All rights reserved.
- *
- *	@(#)db_int.h.src	10.62 (Sleepycat) 5/23/98
- */
-
-#ifndef _DB_INTERNAL_H_
-#define	_DB_INTERNAL_H_
-
-#include "db.h"				/* Standard DB include file. */
-#include "queue.h"
-
-/*******************************************************
- * General purpose constants and macros.
- *******************************************************/
-#define	UINT16_T_MAX	    0xffff	/* Maximum 16 bit unsigned. */
-#define	UINT32_T_MAX	0xffffffff	/* Maximum 32 bit unsigned. */
-
-#define	DB_MIN_PGSIZE	0x000200	/* Minimum page size. */
-#define	DB_MAX_PGSIZE	0x010000	/* Maximum page size. */
-
-#define	DB_MINCACHE	10		/* Minimum cached pages */
-
-#define	MEGABYTE	1048576
-
-/*
- * If we are unable to determine the underlying filesystem block size, use
- * 8K on the grounds that most OS's use less than 8K as their VM page size.
- */
-#define	DB_DEF_IOSIZE	(8 * 1024)
-
-/*
- * Aligning items to particular sizes or in pages or memory.  ALIGNP is a
- * separate macro, as we've had to cast the pointer to different integral
- * types on different architectures.
- *
- * We cast pointers into unsigned longs when manipulating them because C89
- * guarantees that u_long is the largest available integral type and further,
- * to never generate overflows.  However, neither C89 or C9X  requires that
- * any integer type be large enough to hold a pointer, although C9X created
- * the intptr_t type, which is guaranteed to hold a pointer but may or may
- * not exist.  At some point in the future, we should test for intptr_t and
- * use it where available.
- */
-#undef	ALIGNTYPE
-#define	ALIGNTYPE		u_long
-#undef	ALIGNP
-#define	ALIGNP(value, bound)	ALIGN((ALIGNTYPE)value, bound)
-#undef	ALIGN
-#define	ALIGN(value, bound)	(((value) + (bound) - 1) & ~((bound) - 1))
-
-/*
- * There are several on-page structures that are declared to have a number of
- * fields followed by a variable length array of items.  The structure size
- * without including the variable length array or the address of the first of
- * those elements can be found using SSZ.
- *
- * This macro can also be used to find the offset of a structure element in a
- * structure.  This is used in various places to copy structure elements from
- * unaligned memory references, e.g., pointers into a packed page.
- *
- * There are two versions because compilers object if you take the address of
- * an array.
- */
-#undef	SSZ
-#define SSZ(name, field)	((int)&(((name *)0)->field))
-
-#undef	SSZA
-#define SSZA(name, field)	((int)&(((name *)0)->field[0]))
-
-/* Macros to return per-process address, offsets based on shared regions. */
-#define	R_ADDR(base, offset)	((void *)((u_int8_t *)((base)->addr) + offset))
-#define	R_OFFSET(base, p)	((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
-
-/* Free and free-string macros that overwrite memory. */
-#ifdef DIAGNOSTIC
-#undef	FREE
-#define	FREE(p, len) {							\
-	memset(p, 0xff, len);						\
-	__db_free(p);							\
-}
-#undef	FREES
-#define	FREES(p) {							\
-	FREE(p, strlen(p));						\
-}
-#else
-#undef	FREE
-#define	FREE(p, len) {							\
-	__db_free(p);							\
-}
-#undef	FREES
-#define	FREES(p) {							\
-	__db_free(p);							\
-}
-#endif
-
-/* Structure used to print flag values. */
-typedef struct __fn {
-	u_int32_t mask;			/* Flag value. */
-	const char *name;		/* Flag name. */
-} FN;
-
-/* Set, clear and test flags. */
-#define	F_SET(p, f)	(p)->flags |= (f)
-#define	F_CLR(p, f)	(p)->flags &= ~(f)
-#define	F_ISSET(p, f)	((p)->flags & (f))
-#define	LF_SET(f)	(flags |= (f))
-#define	LF_CLR(f)	(flags &= ~(f))
-#define	LF_ISSET(f)	(flags & (f))
-
-/* Display separator string. */
-#undef	DB_LINE
-#define	DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
-
-/* Global variables. */
-typedef struct __db_globals {
-	int db_mutexlocks;		/* DB_MUTEXLOCKS */
-	int db_region_anon;		/* DB_REGION_ANON, DB_REGION_NAME */
-	int db_region_init;		/* DB_REGION_INIT */
-	int db_tsl_spins;		/* DB_TSL_SPINS */
-	int db_pageyield;		/* DB_PAGEYIELD */
-} DB_GLOBALS;
-extern	DB_GLOBALS	__db_global_values;
-#define	DB_GLOBAL(v)	__db_global_values.v
-
-/* Unused, or not-used-yet variable.  "Shut that bloody compiler up!" */
-#define	COMPQUIET(n, v)	(n) = (v)
-
-/*
- * Win16 needs specific syntax on callback functions.  Nobody else cares.
- */
-#ifndef	DB_CALLBACK
-#define	DB_CALLBACK	/* Nothing. */
-#endif
-
-/*******************************************************
- * Files.
- *******************************************************/
- /*
-  * We use 1024 as the maximum path length.  It's too hard to figure out what
-  * the real path length is, as it was traditionally stored in <sys/param.h>,
-  * and that file isn't always available.
-  */
-#undef	MAXPATHLEN
-#define	MAXPATHLEN	1024
-
-#define	PATH_DOT	"."	/* Current working directory. */
-#define	PATH_SEPARATOR	"/"	/* Path separator character. */
-
-/*******************************************************
- * Mutex support.
- *******************************************************/
-@spin_line1@
-@spin_line2@
-@spin_line3@
-
-/*
- * !!!
- * Various systems require different alignments for mutexes (the worst we've
- * seen so far is 16-bytes on some HP architectures).  The mutex (tsl_t) must
- * be first in the db_mutex_t structure, which must itself be first in the
- * region.  This ensures the alignment is as returned by mmap(2), which should
- * be sufficient.  All other mutex users must ensure proper alignment locally.
- */
-#define	MUTEX_ALIGNMENT	@mutex_align@
-
-/*
- * The offset of a mutex in memory.
- *
- * !!!
- * Not an off_t, so backing file offsets MUST be less than 4Gb.  See the
- * off field of the db_mutex_t as well.
- */
-#define	MUTEX_LOCK_OFFSET(a, b)	((u_int32_t)((u_int8_t *)b - (u_int8_t *)a))
-
-typedef struct _db_mutex_t {
-#ifdef HAVE_SPINLOCKS
-	tsl_t	  tsl_resource;		/* Resource test and set. */
-#ifdef DIAGNOSTIC
-	u_int32_t pid;			/* Lock holder: 0 or process pid. */
-#endif
-#else
-	u_int32_t off;			/* Backing file offset. */
-	u_int32_t pid;			/* Lock holder: 0 or process pid. */
-#endif
-	u_int32_t spins;		/* Spins before block. */
-	u_int32_t mutex_set_wait;	/* Granted after wait. */
-	u_int32_t mutex_set_nowait;	/* Granted without waiting. */
-} db_mutex_t;
-
-#include "mutex_ext.h"
-
-/*******************************************************
- * Access methods.
- *******************************************************/
-/* Lock/unlock a DB thread. */
-#define	DB_THREAD_LOCK(dbp)						\
-	if (F_ISSET(dbp, DB_AM_THREAD))					\
-	    (void)__db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1);
-#define	DB_THREAD_UNLOCK(dbp)						\
-	if (F_ISSET(dbp, DB_AM_THREAD))					\
-	    (void)__db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1);
-
-/* Btree/recno local statistics structure. */
-struct __db_bt_lstat;	typedef struct __db_bt_lstat DB_BTREE_LSTAT;
-struct __db_bt_lstat {
-	u_int32_t bt_freed;		/* Pages freed for reuse. */
-	u_int32_t bt_pfxsaved;		/* Bytes saved by prefix compression. */
-	u_int32_t bt_split;		/* Total number of splits. */
-	u_int32_t bt_rootsplit;		/* Root page splits. */
-	u_int32_t bt_fastsplit;		/* Fast splits. */
-	u_int32_t bt_added;		/* Items added. */
-	u_int32_t bt_deleted;		/* Items deleted. */
-	u_int32_t bt_get;		/* Items retrieved. */
-	u_int32_t bt_cache_hit;		/* Hits in fast-insert code. */
-	u_int32_t bt_cache_miss;	/* Misses in fast-insert code. */
-};
-
-/*******************************************************
- * Environment.
- *******************************************************/
-/* Type passed to __db_appname(). */
-typedef enum {
-	DB_APP_NONE=0,			/* No type (region). */
-	DB_APP_DATA,			/* Data file. */
-	DB_APP_LOG,			/* Log file. */
-	DB_APP_TMP			/* Temporary file. */
-} APPNAME;
-
-/*******************************************************
- * Shared memory regions.
- *******************************************************/
-/*
- * The shared memory regions share an initial structure so that the general
- * region code can handle races between the region being deleted and other
- * processes waiting on the region mutex.
- *
- * !!!
- * Note, the mutex must be the first entry in the region; see comment above.
- */
-typedef struct _rlayout {
-	db_mutex_t lock;		/* Region mutex. */
-#define	DB_REGIONMAGIC	0x120897
-	u_int32_t  valid;		/* Valid magic number. */
-	u_int32_t  refcnt;		/* Region reference count. */
-	size_t	   size;		/* Region length. */
-	int	   majver;		/* Major version number. */
-	int	   minver;		/* Minor version number. */
-	int	   patch;		/* Patch version number. */
-#define	INVALID_SEGID	-1
-	int	   segid;		/* shmget(2) ID, or Win16 segment ID. */
-
-#define	REGION_ANONYMOUS	0x01	/* Region is/should be in anon mem. */
-	u_int32_t  flags;
-} RLAYOUT;
-
-/*
- * DB creates all regions on 4K boundaries out of sheer paranoia, so that
- * we don't make the underlying VM unhappy.
- */
-#define	DB_VMPAGESIZE	(4 * 1024)
-#define	DB_ROUNDOFF(i) {						\
-	(i) += DB_VMPAGESIZE - 1;					\
-	(i) -= (i) % DB_VMPAGESIZE;					\
-}
-
-/*
- * The interface to region attach is nasty, there is a lot of complex stuff
- * going on, which has to be retained between create/attach and detach.  The
- * REGINFO structure keeps track of it.
- */
-struct __db_reginfo;	typedef struct __db_reginfo REGINFO;
-struct __db_reginfo {
-					/* Arguments. */
-	DB_ENV	   *dbenv;		/* Region naming info. */
-	APPNAME	    appname;		/* Region naming info. */
-	char	   *path;		/* Region naming info. */
-	const char *file;		/* Region naming info. */
-	int	    mode;		/* Region mode, if a file. */
-	size_t	    size;		/* Region size. */
-	u_int32_t   dbflags;		/* Region file open flags, if a file. */
-
-					/* Results. */
-	char	   *name;		/* Region name. */
-	void	   *addr;		/* Region address. */
-	int	    fd;			/* Fcntl(2) locking file descriptor.
-					   NB: this is only valid if a regular
-					   file is backing the shared region,
-					   and mmap(2) is being used to map it
-					   into our address space. */
-	int	    segid;		/* shmget(2) ID, or Win16 segment ID. */
-
-					/* Shared flags. */
-/*				0x0001	COMMON MASK with RLAYOUT structure. */
-#define	REGION_CANGROW		0x0002	/* Can grow. */
-#define	REGION_CREATED		0x0004	/* Created. */
-#define	REGION_HOLDINGSYS	0x0008	/* Holding system resources. */
-#define	REGION_LASTDETACH	0x0010	/* Delete on last detach. */
-#define	REGION_MALLOC		0x0020	/* Created in malloc'd memory. */
-#define	REGION_PRIVATE		0x0040	/* Private to thread/process. */
-#define	REGION_REMOVED		0x0080	/* Already deleted. */
-#define	REGION_SIZEDEF		0x0100	/* Use default region size if exists. */
-	u_int32_t   flags;
-};
-
-/*******************************************************
- * Mpool.
- *******************************************************/
-/*
- * File types for DB access methods.  Negative numbers are reserved to DB.
- */
-#define	DB_FTYPE_BTREE		-1	/* Btree. */
-#define	DB_FTYPE_HASH		-2	/* Hash. */
-
-/* Structure used as the DB pgin/pgout pgcookie. */
-typedef struct __dbpginfo {
-	size_t	db_pagesize;		/* Underlying page size. */
-	int	needswap;		/* If swapping required. */
-} DB_PGINFO;
-
-/*******************************************************
- * Log.
- *******************************************************/
-/* Initialize an LSN to 'zero'. */
-#define	ZERO_LSN(LSN) {							\
-	(LSN).file = 0;							\
-	(LSN).offset = 0;						\
-}
-
-/* Return 1 if LSN is a 'zero' lsn, otherwise return 0. */
-#define	IS_ZERO_LSN(LSN)	((LSN).file == 0)
-
-/* Test if we need to log a change. */
-#define	DB_LOGGING(dbp)							\
-	(F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER))
-
-#ifdef DIAGNOSTIC
-/*
- * Debugging macro to log operations.
- *	If DEBUG_WOP is defined, log operations that modify the database.
- *	If DEBUG_ROP is defined, log operations that read the database.
- *
- * D dbp
- * T txn
- * O operation (string)
- * K key
- * A data
- * F flags
- */
-#define	LOG_OP(D, T, O, K, A, F) {					\
-	DB_LSN _lsn;							\
-	DBT _op;							\
-	if (DB_LOGGING((D))) {						\
-		memset(&_op, 0, sizeof(_op));				\
-		_op.data = O;						\
-		_op.size = strlen(O) + 1;				\
-		(void)__db_debug_log((D)->dbenv->lg_info,		\
-		    T, &_lsn, 0, &_op, (D)->log_fileid, K, A, F);	\
-	}								\
-}
-#ifdef DEBUG_ROP
-#define	DEBUG_LREAD(D, T, O, K, A, F)	LOG_OP(D, T, O, K, A, F)
-#else
-#define	DEBUG_LREAD(D, T, O, K, A, F)
-#endif
-#ifdef DEBUG_WOP
-#define	DEBUG_LWRITE(D, T, O, K, A, F)	LOG_OP(D, T, O, K, A, F)
-#else
-#define	DEBUG_LWRITE(D, T, O, K, A, F)
-#endif
-#else
-#define	DEBUG_LREAD(D, T, O, K, A, F)
-#define	DEBUG_LWRITE(D, T, O, K, A, F)
-#endif /* DIAGNOSTIC */
-
-/*******************************************************
- * Transactions and recovery.
- *******************************************************/
-/*
- * Out of band value for a lock.  The locks are returned to callers as offsets
- * into the lock regions.  Since the RLAYOUT structure begins all regions, an
- * offset of 0 is guaranteed not to be a valid lock.
- */
-#define	LOCK_INVALID	0
-
-/* The structure allocated for every transaction. */
-struct __db_txn {
-	DB_TXNMGR	*mgrp;		/* Pointer to transaction manager. */
-	DB_TXN		*parent;	/* Pointer to transaction's parent. */
-	DB_LSN		last_lsn;	/* Lsn of last log write. */
-	u_int32_t	txnid;		/* Unique transaction id. */
-	size_t		off;		/* Detail structure within region. */
-	TAILQ_ENTRY(__db_txn) links;
-};
-
-#include "os_func.h"
-#include "os_ext.h"
-
-#endif /* !_DB_INTERNAL_H_ */
diff --git a/db2/include/db_join.h b/db2/include/db_join.h
new file mode 100644
index 0000000000..cb27e21f68
--- /dev/null
+++ b/db2/include/db_join.h
@@ -0,0 +1,23 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1998
+ *	Sleepycat Software.  All rights reserved.
+ *
+ *	@(#)db_join.h	10.2 (Sleepycat) 10/4/98
+ */
+
+#ifndef _DB_JOIN_H
+#define _DB_JOIN_H
+/*
+ * Joins use a join cursor that is similar to a regular DB cursor except
+ * that it only supports c_get and c_close functionality.  Also, it does
+ * not support the full range of flags for get.
+ */
+typedef struct __join_cursor {
+	u_int32_t j_init;		/* Set when cursor is initialized. */
+	DBC 	**j_curslist;		/* Array of cursors in the join. */
+	DB	 *j_primary;		/* Primary dbp. */
+	DBT	  j_key;		/* Used to do lookups. */
+} JOIN_CURSOR;
+#endif
diff --git a/db2/include/db_page.h b/db2/include/db_page.h
index e1846cbbbd..5c9ca674f1 100644
--- a/db2/include/db_page.h
+++ b/db2/include/db_page.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)db_page.h	10.15 (Sleepycat) 5/1/98
+ *	@(#)db_page.h	10.18 (Sleepycat) 12/2/98
  */
 
 #ifndef _DB_PAGE_H_
@@ -43,14 +43,6 @@
 
 /*
  * Btree metadata page layout:
- *
- *	+-----------------------------------+
- *	|    lsn    |   pgno    |   magic   |
- *	+-----------------------------------+
- *	|   version |  pagesize |   free    |
- *	+-----------------------------------+
- *	|    flags  |  unused ...	    |
- *	+-----------------------------------+
  */
 typedef struct _btmeta {
 	DB_LSN	  lsn;		/* 00-07: LSN. */
@@ -72,10 +64,6 @@ typedef struct _btmeta {
 	u_int32_t re_pad;	/* 44-47: Recno: fixed-length record pad. */
 				/* 48-67: Unique file ID. */
 	u_int8_t  uid[DB_FILE_ID_LEN];
-
-	u_int32_t spare[13];	/* 68-123: Save some room for growth. */
-
-	DB_BTREE_LSTAT stat;	/* 124-163: Statistics. */
 } BTMETA;
 
 /************************************************************************
@@ -84,18 +72,6 @@ typedef struct _btmeta {
 
 /*
  * Hash metadata page layout:
- *
- *	+-----------------------------------+
- *	|    lsn    |   magic   |  version  |
- *	+-----------------------------------+
- *	|  pagesize | ovfl_point| last_freed|
- *	+-----------------------------------+
- *	| max_bucket| high_mask | low_mask  |
- *	+-----------------------------------+
- * 	| ffactor   |   nelem   | charkey   |
- *	+-----------------------------------+
- *	| spares[32]|   flags   | unused    |
- *	+-----------------------------------+
  */
 /* Hash Table Information */
 typedef struct hashhdr {	/* Disk resident portion */
@@ -359,10 +335,6 @@ typedef struct _hkeydata {
 
 /*
  * The third type is the H_OFFPAGE, represented by the HOFFPAGE structure:
- *
- *	+-----------------------------------+
- *	|   type    |  pgno_t   | total len |
- *	+-----------------------------------+
  */
 typedef struct _hoffpage {
 	u_int8_t  type;		/*    00: Page type and delete flag. */
@@ -383,10 +355,6 @@ typedef struct _hoffpage {
 
 /*
  * The fourth type is H_OFFDUP represented by the HOFFDUP structure:
- *
- *	+-----------------------+
- *	|   type    |  pgno_t   |
- *	+-----------------------+
  */
 typedef struct _hoffdup {
 	u_int8_t  type;		/*    00: Page type and delete flag. */
@@ -431,10 +399,6 @@ typedef struct _hoffdup {
 
 /*
  * The first type is B_KEYDATA, represented by the BKEYDATA structure:
- *
- *	+-----------------------------------+
- *	|   length  |    type   | key/data  |
- *	+-----------------------------------+
  */
 typedef struct _bkeydata {
 	db_indx_t len;		/* 00-01: Key/data item length. */
@@ -457,13 +421,7 @@ typedef struct _bkeydata {
 
 /*
  * The second and third types are B_DUPLICATE and B_OVERFLOW, represented
- * by the BOVERFLOW structure:
- *
- *	+-----------------------------------+
- *	| total len |    type   |   unused  |
- *	+-----------------------------------+
- *	| nxt: page |  nxt: off | nxt: len  |
- *	+-----------------------------------+
+ * by the BOVERFLOW structure.
  */
 typedef struct _boverflow {
 	db_indx_t unused1;	/* 00-01: Padding, unused. */
@@ -501,10 +459,6 @@ typedef struct _boverflow {
 
 /*
  * Btree internal entry.
- *
- *	+-----------------------------------+
- *	| leaf pgno |   type    | data ...  |
- *	+-----------------------------------+
  */
 typedef struct _binternal {
 	db_indx_t  len;		/* 00-01: Key/data item length. */
@@ -535,12 +489,8 @@ typedef struct _binternal {
 /*
  * The recno internal entry.
  *
- *	+-----------------------+
- *	| leaf pgno | # of recs |
- *	+-----------------------+
- *
  * XXX
- * Why not fold this into the db_indx_t structure, it's fixed length.
+ * Why not fold this into the db_indx_t structure, it's fixed length?
  */
 typedef struct _rinternal {
 	db_pgno_t  pgno;	/* 00-03: Page number of referenced page. */
diff --git a/db2/include/hash.h b/db2/include/hash.h
index e55c2102cb..5d85a2a3a7 100644
--- a/db2/include/hash.h
+++ b/db2/include/hash.h
@@ -43,13 +43,22 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)hash.h	10.8 (Sleepycat) 4/10/98
+ *	@(#)hash.h	10.14 (Sleepycat) 10/4/98
  */
 
 /* Cursor structure definitions. */
 typedef struct cursor_t {
-	DBC		*db_cursor;
+	DBC		*dbc;
+
+	/* Per-thread information */
+	DB_LOCK hlock;			/* Metadata page lock. */
+	HASHHDR *hdr;			/* Pointer to meta-data page. */
+	PAGE *split_buf;		/* Temporary buffer for splits. */
+	struct __db_h_stat stats;	/* Hash statistics. */
+
+	/* Hash cursor information */
 	db_pgno_t	bucket;		/* Bucket we are traversing. */
+	db_pgno_t	lbucket;	/* Bucket for which we are locked. */
 	DB_LOCK		lock;		/* Lock held on the current bucket. */
 	PAGE		*pagep;		/* The current page. */
 	db_pgno_t	pgno;		/* Current page number. */
@@ -62,104 +71,83 @@ typedef struct cursor_t {
 	db_indx_t	dup_tlen;	/* Total length of duplicate entry. */
 	u_int32_t	seek_size;	/* Number of bytes we need for add. */
 	db_pgno_t	seek_found_page;/* Page on which we can insert. */
-	u_int32_t	big_keylen;	/* Length of big_key buffer. */
-	void		*big_key;	/* Temporary buffer for big keys. */
-	u_int32_t	big_datalen;	/* Length of big_data buffer. */
-	void		*big_data;	/* Temporary buffer for big data. */
-#define	H_OK		0x0001
-#define	H_NOMORE	0x0002
-#define	H_DELETED	0x0004
-#define	H_ISDUP		0x0008
-#define	H_EXPAND	0x0020
-	u_int32_t	flags;		/* Is cursor inside a dup set. */
+
+#define	H_DELETED	0x0001		/* Cursor item is deleted. */
+#define	H_DUPONLY	0x0002		/* Dups only; do not change key. */
+#define	H_EXPAND	0x0004		/* Table expanded. */
+#define	H_ISDUP		0x0008		/* Cursor is within duplicate set. */
+#define	H_NOMORE	0x0010		/* No more entries in bucket. */
+#define	H_OK		0x0020		/* Request succeeded. */
+#define H_DIRTY		0x0040		/* Meta-data page needs to be written */
+#define	H_ORIGINAL	0x0080		/* Bucket lock existed on entry. */
+	u_int32_t	flags;
 } HASH_CURSOR;
 
 #define	IS_VALID(C) ((C)->bucket != BUCKET_INVALID)
 
+#define	SAVE_CURSOR(ORIG, COPY) {					\
+	F_SET((ORIG), H_ORIGINAL);					\
+	*(COPY) = *(ORIG);						\
+}
 
-typedef struct htab {		/* Memory resident data structure. */
-	DB *dbp;		/* Pointer to parent db structure. */
-	DB_LOCK hlock;		/* Metadata page lock. */
-	HASHHDR *hdr;		/* Pointer to meta-data page. */
-	u_int32_t (*hash) __P((const void *, u_int32_t)); /* Hash Function */
-	PAGE *split_buf;	/* Temporary buffer for splits. */
-	int local_errno;	/* Error Number -- for DBM compatability */
-	u_long hash_accesses;	/* Number of accesses to this table. */
-	u_long hash_collisions;	/* Number of collisions on search. */
-	u_long hash_expansions;	/* Number of times we added a bucket. */
-	u_long hash_overflows;	/* Number of overflow pages. */
-	u_long hash_bigpages;	/* Number of big key/data pages. */
-} HTAB;
-
-/*
- * Macro used for interface functions to set the txnid in the DBP.
- */
-#define	SET_LOCKER(D, T) ((D)->txn = (T))
+#define	RESTORE_CURSOR(D, ORIG, COPY, RET) {				\
+	if ((RET) == 0) {						\
+		if ((ORIG)->dbc->txn == NULL &&				\
+		    (COPY)->lock != 0 && (ORIG)->lock != (COPY)->lock)	\
+			(void)lock_put((D)->dbenv->lk_info, (COPY)->lock); \
+	} else {							\
+		if ((ORIG)->dbc->txn == NULL &&				\
+		    (ORIG)->lock != 0 && (ORIG)->lock != (COPY)->lock)	\
+			(void)lock_put((D)->dbenv->lk_info, (ORIG)->lock); \
+		*ORIG = *COPY;						\
+	}								\
+}
 
 /*
  * More interface macros used to get/release the meta data page.
  */
-#define	GET_META(D, H) {						\
-	int _r;								\
-	if (F_ISSET(D, DB_AM_LOCKING) && !F_ISSET(D, DB_AM_RECOVER)) {	\
-		(D)->lock.pgno = BUCKET_INVALID;			\
-	    	if ((_r = lock_get((D)->dbenv->lk_info,			\
-	    	    (D)->txn == NULL ? (D)->locker : (D)->txn->txnid,	\
-		    0, &(D)->lock_dbt, DB_LOCK_READ,			\
-		    &(H)->hlock)) != 0)					\
-			return (_r < 0 ? EAGAIN : _r);			\
+#define	GET_META(D, I, R) {						\
+	if (F_ISSET(D, DB_AM_LOCKING) &&				\
+	    !F_ISSET((I)->dbc, DBC_RECOVER)) {				\
+		(I)->dbc->lock.pgno = BUCKET_INVALID;			\
+		(R) = lock_get((D)->dbenv->lk_info, (I)->dbc->locker, 	\
+		    0, &(I)->dbc->lock_dbt, DB_LOCK_READ, &(I)->hlock);	\
+		(R) = (R) < 0 ? EAGAIN : (R);				\
 	}								\
-	if ((_r = __ham_get_page(D, 0, (PAGE **)&((H)->hdr))) != 0) {	\
-		if ((H)->hlock) {					\
-			(void)lock_put((D)->dbenv->lk_info, (H)->hlock);\
-			(H)->hlock = 0;					\
-		}							\
-		return (_r);						\
+	if ((R) == 0 && 						\
+	    ((R) = __ham_get_page(D, 0, (PAGE **)&((I)->hdr))) != 0 &&  \
+	    (I)->hlock != LOCK_INVALID) {				\
+		(void)lock_put((D)->dbenv->lk_info, (I)->hlock);	\
+		(I)->hlock = LOCK_INVALID;				\
 	}								\
 }
 
-#define	RELEASE_META(D, H) {						\
-	if (!F_ISSET(D, DB_AM_RECOVER) &&				\
-	    (D)->txn == NULL && (H)->hlock)				\
-		(void)lock_put((H)->dbp->dbenv->lk_info, (H)->hlock);	\
-	(H)->hlock = 0;							\
-	if ((H)->hdr)							\
-		(void)__ham_put_page(D, (PAGE *)(H)->hdr,		\
-		    F_ISSET(D, DB_HS_DIRTYMETA) ? 1 : 0);		\
-	(H)->hdr = NULL;						\
-	F_CLR(D, DB_HS_DIRTYMETA);					\
+#define	RELEASE_META(D, I) {						\
+	if ((I)->hdr)							\
+		(void)__ham_put_page(D, (PAGE *)(I)->hdr,		\
+		    F_ISSET(I, H_DIRTY) ? 1 : 0);			\
+	(I)->hdr = NULL;						\
+	if (!F_ISSET((I)->dbc, DBC_RECOVER) &&				\
+	    (I)->dbc->txn == NULL && (I)->hlock)			\
+		(void)lock_put((D)->dbenv->lk_info, (I)->hlock);	\
+	(I)->hlock = LOCK_INVALID;					\
+	F_CLR(I, H_DIRTY);						\
 }
 
-#define	DIRTY_META(H, R) {						\
-	if (F_ISSET((H)->dbp, DB_AM_LOCKING) &&				\
-	    !F_ISSET((H)->dbp, DB_AM_RECOVER)) {			\
+#define	DIRTY_META(D, I, R) {						\
+	if (F_ISSET(D, DB_AM_LOCKING) &&				\
+	    !F_ISSET((I)->dbc, DBC_RECOVER)) {				\
 		DB_LOCK _tmp;						\
-		(H)->dbp->lock.pgno = BUCKET_INVALID;			\
-	    	if (((R) = lock_get((H)->dbp->dbenv->lk_info,		\
-	    	    (H)->dbp->txn ? (H)->dbp->txn->txnid :		\
-	    	    (H)->dbp->locker, 0, &(H)->dbp->lock_dbt,		\
+		(I)->dbc->lock.pgno = BUCKET_INVALID;			\
+	    	if (((R) = lock_get((D)->dbenv->lk_info,		\
+	    	    (I)->dbc->locker, 0, &(I)->dbc->lock_dbt,		\
 	    	    DB_LOCK_WRITE, &_tmp)) == 0)			\
-			(R) = lock_put((H)->dbp->dbenv->lk_info,	\
-			    (H)->hlock);				\
+			(R) = lock_put((D)->dbenv->lk_info, (I)->hlock);\
 		else if ((R) < 0)					\
 			(R) = EAGAIN;					\
-		(H)->hlock = _tmp;					\
+		(I)->hlock = _tmp;					\
 	}								\
-	F_SET((H)->dbp, DB_HS_DIRTYMETA);				\
-}
-
-/* Allocate and discard thread structures. */
-#define	H_GETHANDLE(dbp, dbpp, ret)					\
-	if (F_ISSET(dbp, DB_AM_THREAD))					\
-		ret = __db_gethandle(dbp, __ham_hdup, dbpp);		\
-	else {								\
-		ret = 0;						\
-		*dbpp = dbp;						\
-	}
-
-#define	H_PUTHANDLE(dbp) {						\
-	if (F_ISSET(dbp, DB_AM_THREAD))					\
-		__db_puthandle(dbp);					\
+	F_SET((I), H_DIRTY);						\
 }
 
 /* Test string. */
@@ -171,16 +159,16 @@ typedef struct htab {		/* Memory resident data structure. */
  * the table, we can allocate extra pages.  We keep track of how many pages
  * we've allocated at each point to calculate bucket to page number mapping.
  */
-#define	BUCKET_TO_PAGE(H, B) \
-	((B) + 1 + ((B) ? (H)->hdr->spares[__db_log2((B)+1)-1] : 0))
+#define	BUCKET_TO_PAGE(I, B) \
+	((B) + 1 + ((B) ? (I)->hdr->spares[__db_log2((B)+1)-1] : 0))
 
-#define	PGNO_OF(H, S, O) (BUCKET_TO_PAGE((H), (1 << (S)) - 1) + (O))
+#define	PGNO_OF(I, S, O) (BUCKET_TO_PAGE((I), (1 << (S)) - 1) + (O))
 
 /* Constraints about number of pages and how much data goes on a page. */
 
 #define	MAX_PAGES(H)	UINT32_T_MAX
 #define	MINFILL		4
-#define	ISBIG(H, N)	(((N) > ((H)->hdr->pagesize / MINFILL)) ? 1 : 0)
+#define	ISBIG(I, N)	(((N) > ((I)->hdr->pagesize / MINFILL)) ? 1 : 0)
 
 /* Shorthands for accessing structure */
 #define	NDX_INVALID	0xFFFF
diff --git a/db2/include/hash_ext.h b/db2/include/hash_ext.h
index 7086adcc44..fe17dc7b39 100644
--- a/db2/include/hash_ext.h
+++ b/db2/include/hash_ext.h
@@ -3,13 +3,11 @@
 #define _hash_ext_h_
 int __ham_open __P((DB *, DB_INFO *));
 int __ham_close __P((DB *));
-int __ham_c_iclose __P((DB *, DBC *));
-int __ham_expand_table __P((HTAB *));
-u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
+int __ham_c_init __P((DBC *));
+u_int32_t __ham_call_hash __P((HASH_CURSOR *, u_int8_t *, int32_t));
 int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
 void __ham_c_update
    __P((HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
-int  __ham_hdup __P((DB *, DB *));
 int __ham_insdel_log
     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
     u_int32_t, u_int32_t, db_pgno_t, u_int32_t,
@@ -72,48 +70,45 @@ int __ham_init_recover __P((DB_ENV *));
 int __ham_pgin __P((db_pgno_t, void *, DBT *));
 int __ham_pgout __P((db_pgno_t, void *, DBT *));
 int __ham_mswap __P((void *));
-#ifdef DEBUG
-void __ham_dump_bucket __P((HTAB *, u_int32_t));
-#endif
-int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
-void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t));
+int __ham_add_dup __P((DBC *, DBT *, u_int32_t));
+void __ham_move_offpage __P((DBC *, PAGE *, u_int32_t, db_pgno_t));
+void __ham_dsearch __P((DBC *, DBT *, u_int32_t *, int *));
 u_int32_t __ham_func2 __P((const void *, u_int32_t));
 u_int32_t __ham_func3 __P((const void *, u_int32_t));
 u_int32_t __ham_func4 __P((const void *, u_int32_t));
 u_int32_t __ham_func5 __P((const void *, u_int32_t));
-int __ham_item __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
-int __ham_item_reset __P((HTAB *, HASH_CURSOR *));
+int __ham_item __P((DBC *, db_lockmode_t));
+int __ham_item_reset __P((DBC *));
 void __ham_item_init __P((HASH_CURSOR *));
-int __ham_item_done __P((HTAB *, HASH_CURSOR *, int));
-int __ham_item_last __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
-int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
-int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
-int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+int __ham_item_done __P((DBC *, int));
+int __ham_item_last __P((DBC *, db_lockmode_t));
+int __ham_item_first __P((DBC *, db_lockmode_t));
+int __ham_item_prev __P((DBC *, db_lockmode_t));
+int __ham_item_next __P((DBC *, db_lockmode_t));
 void __ham_putitem __P((PAGE *p, const DBT *, int));
 void __ham_reputpair
    __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *));
-int __ham_del_pair __P((HTAB *, HASH_CURSOR *, int));
-int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
+int __ham_del_pair __P((DBC *, int));
+int __ham_replpair __P((DBC *, DBT *, u_int32_t));
 void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
     int32_t,  DBT *));
-int __ham_split_page __P((HTAB *, u_int32_t, u_int32_t));
-int __ham_add_el
-   __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *, int));
-void __ham_copy_item __P((HTAB *, PAGE *, u_int32_t, PAGE *));
-int __ham_add_ovflpage __P((HTAB *, PAGE *, int, PAGE **));
-int __ham_new_page __P((HTAB *, u_int32_t, u_int32_t, PAGE **));
-int __ham_del_page __P((DB *, PAGE *));
+int __ham_split_page __P((DBC *, u_int32_t, u_int32_t));
+int __ham_add_el __P((DBC *, const DBT *, const DBT *, int));
+void __ham_copy_item __P((size_t, PAGE *, u_int32_t, PAGE *));
+int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **));
+int __ham_new_page __P((DB *, u_int32_t, u_int32_t, PAGE **));
+int __ham_del_page __P((DBC *, PAGE *));
 int __ham_put_page __P((DB *, PAGE *, int32_t));
-int __ham_dirty_page __P((HTAB *, PAGE *));
+int __ham_dirty_page __P((DB *, PAGE *));
 int __ham_get_page __P((DB *, db_pgno_t, PAGE **));
-int __ham_overflow_page __P((DB *, u_int32_t, PAGE **));
+int __ham_overflow_page
+    __P((DBC *, u_int32_t, PAGE **));
 #ifdef DEBUG
-db_pgno_t __bucket_to_page __P((HTAB *, db_pgno_t));
+db_pgno_t __bucket_to_page __P((HASH_CURSOR *, db_pgno_t));
 #endif
-void __ham_init_ovflpages __P((HTAB *));
-int __ham_get_cpage __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
-int __ham_next_cpage
-   __P((HTAB *, HASH_CURSOR *, db_pgno_t, int, u_int32_t));
+void __ham_init_ovflpages __P((DBC *));
+int __ham_get_cpage __P((DBC *, db_lockmode_t));
+int __ham_next_cpage __P((DBC *, db_pgno_t, int, u_int32_t));
 void __ham_dpair __P((DB *, PAGE *, u_int32_t));
 int __ham_insdel_recover
     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
@@ -131,5 +126,5 @@ int __ham_ovfl_recover
     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __ham_copypage_recover
   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
-int __ham_stat __P((DB *, FILE *));
+int __ham_stat __P((DB *, void *, void *(*)(size_t), u_int32_t));
 #endif /* _hash_ext_h_ */
diff --git a/db2/include/lock.h b/db2/include/lock.h
index 47a38b8783..13364ca7a5 100644
--- a/db2/include/lock.h
+++ b/db2/include/lock.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)lock.h	10.15 (Sleepycat) 5/10/98
+ *	@(#)lock.h	10.17 (Sleepycat) 1/3/99
  */
 
 typedef struct __db_lockobj	DB_LOCKOBJ;
@@ -22,6 +22,12 @@ typedef struct __db_lockobj	DB_LOCKOBJ;
  */
 #define DB_LOCK_MAXID		0x7fffffff
 
+/* Check for region catastrophic shutdown. */
+#define	LOCK_PANIC_CHECK(lt) {						\
+	if ((lt)->region->hdr.panic)					\
+		return (DB_RUNRECOVERY);				\
+}
+
 /*
  * The lock region consists of:
  *	The DB_LOCKREGION structure (sizeof(DB_LOCKREGION)).
@@ -135,10 +141,24 @@ struct __db_lock {
 	u_int32_t	refcount;	/* Reference count the lock. */
 	db_lockmode_t	mode;		/* What sort of lock. */
 	ssize_t		obj;		/* Relative offset of object struct. */
+	size_t		txnoff;		/* Offset of holding transaction. */
 	db_status_t	status;		/* Status of this lock. */
 };
 
 /*
+ * This is a serious layering violation.  To support nested transactions, we
+ * need to be able to tell that a lock is held by a transaction (as opposed to
+ * some other locker) and to be able to traverse the parent/descendent chain.
+ * In order to do this, each lock held by a transaction maintains a reference
+ * to the shared memory transaction structure so it can be accessed during lock
+ * promotion.  As the structure is in shared memory, we cannot store a pointer
+ * to it, so we use the offset within the region.  As nothing lives at region
+ * offset 0, we use that to indicate that there is no transaction associated
+ * with the current lock.
+ */
+#define TXN_IS_HOLDING(L)	((L)->txnoff != 0 /* INVALID_REG_OFFSET */)
+
+/*
  * We cannot return pointers to the user (else we cannot easily grow regions),
  * so we return offsets in the region.  These must be converted to and from
  * regular pointers.  Always use the macros below.
diff --git a/db2/include/lock_ext.h b/db2/include/lock_ext.h
index 1e0522c6b5..ce7994774a 100644
--- a/db2/include/lock_ext.h
+++ b/db2/include/lock_ext.h
@@ -6,6 +6,9 @@ int __lock_is_locked
 void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int));
 int __lock_getobj  __P((DB_LOCKTAB *,
     u_int32_t, const DBT *, u_int32_t type, DB_LOCKOBJ **));
+int __lock_downgrade __P((DB_LOCKTAB *,
+    DB_LOCK, db_lockmode_t, u_int32_t));
+void __lock_panic __P((DB_ENV *));
 int __lock_validate_region __P((DB_LOCKTAB *));
 int __lock_grow_region __P((DB_LOCKTAB *, int, size_t));
 void __lock_dump_region __P((DB_LOCKTAB *, char *, FILE *));
diff --git a/db2/include/log.h b/db2/include/log.h
index 7d5161cc9d..50309085aa 100644
--- a/db2/include/log.h
+++ b/db2/include/log.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)log.h	10.25 (Sleepycat) 4/10/98
+ *	@(#)log.h	10.30 (Sleepycat) 10/11/98
  */
 
 #ifndef _LOG_H_
@@ -16,8 +16,10 @@ struct __log;		typedef struct __log LOG;
 struct __log_persist;	typedef struct __log_persist LOGP;
 
 #ifndef MAXLFNAME
-#define	MAXLFNAME	99999		/* Maximum log file name. */
-#define	LFNAME		"log.%05d"	/* Log file name template. */
+#define	LFPREFIX	"log."		/* Log file name prefix. */
+#define	LFNAME		"log.%010d"	/* Log file name template. */
+#define	LFNAME_V1	"log.%05d"	/* Log file name template, rev 1. */
+#define	MAXLFNAME	2000000000	/* Maximum log file name. */
 #endif
 					/* Default log name. */
 #define DB_DEFAULT_LOG_FILE	"__db_log.share"
@@ -38,6 +40,12 @@ struct __log_persist;	typedef struct __log_persist LOGP;
 	(void)__db_mutex_unlock(&((RLAYOUT *)(dblp)->lp)->lock,		\
 	    (dblp)->reginfo.fd)
 
+/* Check for region catastrophic shutdown. */
+#define	LOG_PANIC_CHECK(dblp) {						\
+	if ((dblp)->lp->rlayout.panic)					\
+		return (DB_RUNRECOVERY);				\
+}
+
 /*
  * The per-process table that maps log file-id's to DB structures.
  */
@@ -84,7 +92,28 @@ struct __db_log {
 
 	char	 *dir;			/* Directory argument. */
 
-	u_int32_t flags;		/* Support the DB_AM_XXX flags. */
+/*
+ * These fields are used by XA; since XA forbids threaded execution, these
+ * do not have to be protected.
+ */
+	void 	*xa_info;		/* Committed transaction list that
+					 * has to be carried between calls
+					 * to xa_recover. */
+	DB_LSN	xa_lsn;			/* Position of an XA recovery scan. */
+	DB_LSN	xa_first;		/* LSN to which we need to roll back
+					   for this XA recovery scan. */
+
+	/*
+	 * !!!
+	 * Currently used to hold:
+	 *	DB_AM_THREAD	(a DB flag)
+	 *	DBC_RECOVER	(a DBC flag)
+	 * If they are ever the same bits, we're in serious trouble.
+	 */
+#if DB_AM_THREAD == DBC_RECOVER
+	DB_AM_THREAD, DBC_RECOVER, FLAG MISMATCH
+#endif
+	u_int32_t flags;
 };
 
 /*
diff --git a/db2/include/log_ext.h b/db2/include/log_ext.h
index bf3bcb02ce..842a3f4265 100644
--- a/db2/include/log_ext.h
+++ b/db2/include/log_ext.h
@@ -1,8 +1,9 @@
 /* DO NOT EDIT: automatically built by dist/distrib. */
 #ifndef _log_ext_h_
 #define _log_ext_h_
+void __log_panic __P((DB_ENV *));
 int __log_find __P((DB_LOG *, int, int *));
-int __log_valid __P((DB_LOG *, LOG *, int));
+int __log_valid __P((DB_LOG *, u_int32_t, int));
 int __log_register_log
     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
     u_int32_t, const DBT *, const DBT *, u_int32_t,
@@ -15,7 +16,7 @@ int __log_init_recover __P((DB_ENV *));
 int __log_findckp __P((DB_LOG *, DB_LSN *));
 int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int));
 int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
-int __log_name __P((DB_LOG *, int, char **));
+int __log_name __P((DB_LOG *, u_int32_t, char **, int *, u_int32_t));
 int __log_register_recover
     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __log_add_logid __P((DB_LOG *, DB *, u_int32_t));
diff --git a/db2/include/mp.h b/db2/include/mp.h
index 8635efa722..904bccfe98 100644
--- a/db2/include/mp.h
+++ b/db2/include/mp.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)mp.h	10.33 (Sleepycat) 5/4/98
+ *	@(#)mp.h	10.37 (Sleepycat) 1/1/99
  */
 
 struct __bh;		typedef struct __bh BH;
@@ -16,11 +16,11 @@ struct __mpoolfile;	typedef struct __mpoolfile MPOOLFILE;
 #define	DB_DEFAULT_MPOOL_FILE	"__db_mpool.share"
 
 /*
- * We default to 128K (16 8K pages) if the user doesn't specify, and
+ * We default to 256K (32 8K pages) if the user doesn't specify, and
  * require a minimum of 20K.
  */
 #ifndef	DB_CACHESIZE_DEF
-#define	DB_CACHESIZE_DEF	(128 * 1024)
+#define	DB_CACHESIZE_DEF	(256 * 1024)
 #endif
 #define	DB_CACHESIZE_MIN	( 20 * 1024)
 
@@ -106,6 +106,12 @@ struct __mpoolfile;	typedef struct __mpoolfile MPOOLFILE;
 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
 		(void)__db_mutex_unlock(&(bhp)->mutex, (dbmp)->reginfo.fd)
 
+/* Check for region catastrophic shutdown. */
+#define	MP_PANIC_CHECK(dbmp) {						\
+	if ((dbmp)->mp->rlayout.panic)					\
+		return (DB_RUNRECOVERY);				\
+}
+
 /*
  * DB_MPOOL --
  *	Per-process memory pool structure.
@@ -158,6 +164,18 @@ struct __db_mpoolfile {
 
 	int	   fd;			/* Underlying file descriptor. */
 
+	u_int32_t ref;			/* Reference count. */
+
+	/*
+	 * !!!
+	 * This field is a special case -- it's protected by the region lock
+	 * NOT the thread lock.  The reason for this is that we always have
+	 * the region lock immediately before or after we modify the field,
+	 * and we don't want to use the structure lock to protect it because
+	 * then I/O (which is done with the structure lock held because of
+	 * the race between the seek and write of the file descriptor) will
+	 * block any other put/get calls using this DB_MPOOLFILE structure.
+	 */
 	u_int32_t pinref;		/* Pinned block reference count. */
 
 /* These fields are not protected. */
diff --git a/db2/include/mp_ext.h b/db2/include/mp_ext.h
index 3650839475..8b46334408 100644
--- a/db2/include/mp_ext.h
+++ b/db2/include/mp_ext.h
@@ -9,10 +9,12 @@ int __memp_pg __P((DB_MPOOLFILE *, BH *, int));
 void __memp_bhfree __P((DB_MPOOL *, MPOOLFILE *, BH *, int));
 int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *,
    u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
+void __memp_panic __P((DB_ENV *));
 char * __memp_fn __P((DB_MPOOLFILE *));
 char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *));
 void __memp_dump_region __P((DB_MPOOL *, char *, FILE *));
-int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *));
+int __memp_reg_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
+int __memp_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
 int __memp_ropen
    __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
 int __mp_xxx_fd __P((DB_MPOOLFILE *, int *));
diff --git a/db2/include/os.h b/db2/include/os.h
new file mode 100644
index 0000000000..f173d1f610
--- /dev/null
+++ b/db2/include/os.h
@@ -0,0 +1,24 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998
+ *	Sleepycat Software.  All rights reserved.
+ *
+ *	@(#)os.h	10.11 (Sleepycat) 10/12/98
+ */
+
+/*
+ * We group seek/write calls into a single function so that we can use
+ * pread(2)/pwrite(2) where they're available.
+ */
+#define	DB_IO_READ	1
+#define	DB_IO_WRITE	2
+typedef struct __io {
+	int	    fd_io;		/* I/O file descriptor. */
+	int	    fd_lock;		/* Locking file descriptor. */
+	db_mutex_t *mutexp;		/* Mutex to lock. */
+	size_t	    pagesize;		/* Page size. */
+	db_pgno_t   pgno;		/* Page number. */
+	u_int8_t   *buf;		/* Buffer. */
+	size_t	    bytes;		/* Bytes read/written. */
+} DB_IO;
diff --git a/db2/include/os_ext.h b/db2/include/os_ext.h
index 889a45a44e..346210975f 100644
--- a/db2/include/os_ext.h
+++ b/db2/include/os_ext.h
@@ -1,15 +1,17 @@
 /* DO NOT EDIT: automatically built by dist/distrib. */
 #ifndef _os_ext_h_
 #define _os_ext_h_
-int __db_abspath __P((const char *));
-char *__db_strdup __P((const char *));
-void *__db_calloc __P((size_t, size_t));
-void *__db_malloc __P((size_t));
-void *__db_realloc __P((void *, size_t));
+int __os_abspath __P((const char *));
+int __os_strdup __P((const char *, void *));
+int __os_calloc __P((size_t, size_t, void *));
+int __os_malloc __P((size_t, void *(*)(size_t), void *));
+int __os_realloc __P((void *, size_t));
+void __os_free __P((void *, size_t));
+void __os_freestr __P((void *));
 int __os_dirlist __P((const char *, char ***, int *));
 void __os_dirfree __P((char **, int));
-int __db_fileid __P((DB_ENV *, const char *, int, u_int8_t *));
-int __db_fsync __P((int));
+int __os_fileid __P((DB_ENV *, const char *, int, u_int8_t *));
+int __os_fsync __P((int));
 int __db_mapanon_ok __P((int));
 int __db_mapinit __P((void));
 int __db_mapregion __P((char *, REGINFO *));
@@ -20,15 +22,19 @@ int __db_unmapfile __P((void *, size_t));
 u_int32_t __db_oflags __P((int));
 int __db_omode __P((const char *));
 int __db_open __P((const char *, u_int32_t, u_int32_t, int, int *));
-int __db_close __P((int));
+int __os_open __P((const char *, int, int, int *));
+int __os_close __P((int));
 char *__db_rpath __P((const char *));
-int __db_read __P((int, void *, size_t, ssize_t *));
-int __db_write __P((int, void *, size_t, ssize_t *));
+int __os_io __P((DB_IO *, int, ssize_t *));
+int __os_read __P((int, void *, size_t, ssize_t *));
+int __os_write __P((int, const void *, size_t, ssize_t *));
 int __os_seek __P((int, size_t, db_pgno_t, u_int32_t, int, int));
 int __os_sleep __P((u_long, u_long));
 int __os_spin __P((void));
+void __os_yield __P((u_long));
 int __os_exists __P((const char *, int *));
 int __os_ioinfo
    __P((const char *, int, u_int32_t *, u_int32_t *, u_int32_t *));
-int __db_unlink __P((const char *));
+int __os_tmpdir __P((DB_ENV *, u_int32_t));
+int __os_unlink __P((const char *));
 #endif /* _os_ext_h_ */
diff --git a/db2/include/os_func.h b/db2/include/os_jump.h
index 12794d550d..e2d577ff10 100644
--- a/db2/include/os_func.h
+++ b/db2/include/os_jump.h
@@ -4,7 +4,7 @@
  * Copyright (c) 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)os_func.h	10.8 (Sleepycat) 4/19/98
+ *	@(#)os_jump.h	10.1 (Sleepycat) 10/17/98
  */
 
 /* Calls which can be replaced by the application. */
@@ -38,32 +38,3 @@ struct __db_jumptab {
 };
 
 extern struct __db_jumptab __db_jump;
-
-/*
- * Names used by DB to call through the jump table.
- *
- * The naming scheme goes like this: if the functionality the application can
- * replace is the same as the DB functionality, e.g., malloc, or dirlist, then
- * we use the name __db_XXX, and the application is expected to replace the
- * complete functionality, which may or may not map directly to an ANSI C or
- * POSIX 1003.1 interface.  If the functionality that the aplication replaces
- * only underlies what the DB os directory exports to other parts of DB, e.g.,
- * read, then the name __os_XXX is used, and the application can only replace
- * the underlying functionality.  Under most circumstances, the os directory
- * part of DB is the only code that should use the __os_XXX names, all other
- * parts of DB should be calling __db_XXX functions.
- */
-#define	__os_close	__db_jump.j_close	/* __db_close is a wrapper. */
-#define	__db_dirfree	__db_jump.j_dirfree
-#define	__db_dirlist	__db_jump.j_dirlist
-#define	__db_exists	__db_jump.j_exists
-#define	__db_free	__db_jump.j_free
-#define	__os_fsync	__db_jump.j_fsync	/* __db_fsync is a wrapper. */
-#define	__db_ioinfo	__db_jump.j_ioinfo
-#define	__os_open	__db_jump.j_open	/* __db_open is a wrapper. */
-#define	__os_read	__db_jump.j_read	/* __db_read is a wrapper. */
-#define	__db_seek	__db_jump.j_seek
-#define	__db_sleep	__db_jump.j_sleep
-#define	__os_unlink	__db_jump.j_unlink	/* __db_unlink is a wrapper. */
-#define	__os_write	__db_jump.j_write	/* __db_write is a wrapper. */
-#define	__db_yield	__db_jump.j_yield
diff --git a/db2/include/txn.h b/db2/include/txn.h
index a2512ed152..a6fa4db8de 100644
--- a/db2/include/txn.h
+++ b/db2/include/txn.h
@@ -4,11 +4,13 @@
  * Copyright (c) 1996, 1997, 1998
  *	Sleepycat Software.  All rights reserved.
  *
- *	@(#)txn.h	10.15 (Sleepycat) 4/21/98
+ *	@(#)txn.h	10.18 (Sleepycat) 1/3/99
  */
 #ifndef	_TXN_H_
 #define	_TXN_H_
 
+#include "xa.h"
+
 /*
  * The name of the transaction shared memory region is DEFAULT_TXN_FILE and
  * the region is always created group RW of the group owning the directory.
@@ -25,6 +27,8 @@
 /*
  * Internal data maintained in shared memory for each transaction.
  */
+typedef char DB_XID[XIDDATASIZE];
+
 typedef struct __txn_detail {
 	u_int32_t txnid;		/* current transaction id
 					   used to link free list also */
@@ -32,12 +36,31 @@ typedef struct __txn_detail {
 	DB_LSN	begin_lsn;		/* lsn of begin record */
 	size_t	last_lock;		/* offset in lock region of last lock
 					   for this transaction. */
+	size_t	parent;			/* Offset of transaction's parent. */
 #define	TXN_UNALLOC	0
 #define	TXN_RUNNING	1
 #define	TXN_ABORTED	2
 #define	TXN_PREPARED	3
+#define	TXN_COMMITTED	4
 	u_int32_t status;		/* status of the transaction */
 	SH_TAILQ_ENTRY	links;		/* free/active list */
+
+#define	TXN_XA_ABORTED		1
+#define	TXN_XA_DEADLOCKED	2
+#define	TXN_XA_ENDED		3
+#define	TXN_XA_PREPARED		4
+#define	TXN_XA_STARTED		5
+#define	TXN_XA_SUSPENDED	6
+	u_int32_t xa_status;		/* XA status */
+
+	/*
+	 * XID (xid_t) structure: because these fields are logged, the
+	 * sizes have to be explicit.
+	 */
+	DB_XID xid;			/* XA global transaction id */
+	u_int32_t bqual;		/* bqual_length from XID */
+	u_int32_t gtrid;		/* gtrid_length from XID */
+	int32_t format;			/* XA format */
 } TXN_DETAIL;
 
 /*
@@ -105,6 +128,12 @@ struct __db_txnregion {
 #define	UNLOCK_TXNREGION(tmgrp)						\
 	(void)__db_mutex_unlock(&(tmgrp)->region->hdr.lock, (tmgrp)->reginfo.fd)
 
+/* Check for region catastrophic shutdown. */
+#define	TXN_PANIC_CHECK(tmgrp) {					\
+	if ((tmgrp)->region->hdr.panic)					\
+		return (DB_RUNRECOVERY);				\
+}
+
 /*
  * Log record types.
  */
@@ -114,4 +143,6 @@ struct __db_txnregion {
 
 #include "txn_auto.h"
 #include "txn_ext.h"
+
+#include "xa_ext.h"
 #endif /* !_TXN_H_ */
diff --git a/db2/include/txn_auto.h b/db2/include/txn_auto.h
index fd5a456115..bb3de4eb17 100644
--- a/db2/include/txn_auto.h
+++ b/db2/include/txn_auto.h
@@ -22,4 +22,30 @@ typedef struct _txn_ckp_args {
 	DB_LSN 	last_ckp;
 } __txn_ckp_args;
 
+
+#define	DB_txn_xa_regop	(DB_txn_BEGIN + 3)
+
+typedef struct _txn_xa_regop_args {
+	u_int32_t type;
+	DB_TXN *txnid;
+	DB_LSN prev_lsn;
+	u_int32_t	opcode;
+	DBT	xid;
+	int32_t	formatID;
+	u_int32_t	gtrid;
+	u_int32_t	bqual;
+	DB_LSN 	begin_lsn;
+} __txn_xa_regop_args;
+
+
+#define	DB_txn_child	(DB_txn_BEGIN + 4)
+
+typedef struct _txn_child_args {
+	u_int32_t type;
+	DB_TXN *txnid;
+	DB_LSN prev_lsn;
+	u_int32_t	opcode;
+	u_int32_t	parent;
+} __txn_child_args;
+
 #endif
diff --git a/db2/include/txn_ext.h b/db2/include/txn_ext.h
index 7d694f070d..e0d69c360d 100644
--- a/db2/include/txn_ext.h
+++ b/db2/include/txn_ext.h
@@ -1,6 +1,9 @@
 /* DO NOT EDIT: automatically built by dist/distrib. */
 #ifndef _txn_ext_h_
 #define _txn_ext_h_
+void __txn_panic __P((DB_ENV *));
+int __txn_xa_begin __P((DB_ENV *, DB_TXN *));
+int __txn_is_ancestor __P((DB_TXNMGR *, size_t, size_t));
 int __txn_regop_log
     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
     u_int32_t));
@@ -13,9 +16,26 @@ int __txn_ckp_log
 int __txn_ckp_print
    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __txn_ckp_read __P((void *, __txn_ckp_args **));
+int __txn_xa_regop_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, const DBT *, int32_t, u_int32_t,
+    u_int32_t, DB_LSN *));
+int __txn_xa_regop_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __txn_xa_regop_read __P((void *, __txn_xa_regop_args **));
+int __txn_child_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, u_int32_t));
+int __txn_child_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __txn_child_read __P((void *, __txn_child_args **));
 int __txn_init_print __P((DB_ENV *));
 int __txn_init_recover __P((DB_ENV *));
 int __txn_regop_recover
-    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __txn_xa_regop_recover
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 int __txn_ckp_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __txn_child_recover
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
 #endif /* _txn_ext_h_ */
diff --git a/db2/include/xa.h b/db2/include/xa.h
new file mode 100644
index 0000000000..ae822f3e75
--- /dev/null
+++ b/db2/include/xa.h
@@ -0,0 +1,179 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1998
+ *	Sleepycat Software.  All rights reserved.
+ *
+ *	@(#)xa.h	10.1 (Sleepycat) 6/22/98
+ */
+/*
+ * Start of xa.h header
+ *
+ * Define a symbol to prevent multiple inclusions of this header file
+ */
+#ifndef	XA_H
+#define	XA_H
+
+/*
+ * Transaction branch identification: XID and NULLXID:
+ */
+#define	XIDDATASIZE	128		/* size in bytes */
+#define	MAXGTRIDSIZE	 64		/* maximum size in bytes of gtrid */
+#define	MAXBQUALSIZE	 64		/* maximum size in bytes of bqual */
+
+struct xid_t {
+	long formatID;			/* format identifier */
+	long gtrid_length;		/* value from 1 through 64 */
+	long bqual_length;		/* value from 1 through 64 */
+	char data[XIDDATASIZE];
+};
+typedef	struct xid_t XID;
+/*
+ * A value of -1 in formatID means that the XID is null.
+ */
+
+/*
+ * Declarations of routines by which RMs call TMs:
+ */
+extern int ax_reg __P((int, XID *, long));
+extern int ax_unreg __P((int, long));
+
+/*
+ * XA Switch Data Structure
+ */
+#define	RMNAMESZ	32		/* length of resource manager name, */
+					/* including the null terminator */
+#define	MAXINFOSIZE	256		/* maximum size in bytes of xa_info */
+					/* strings, including the null
+					terminator */
+struct xa_switch_t {
+	char name[RMNAMESZ];		/* name of resource manager */
+	long flags;			/* resource manager specific options */
+	long version;			/* must be 0 */
+	int (*xa_open_entry)		/* xa_open function pointer */
+	    __P((char *, int, long));
+	int (*xa_close_entry)		/* xa_close function pointer */
+	    __P((char *, int, long));
+	int (*xa_start_entry)		/* xa_start function pointer */
+	    __P((XID *, int, long));
+	int (*xa_end_entry)		/* xa_end function pointer */
+	    __P((XID *, int, long));
+	int (*xa_rollback_entry)	/* xa_rollback function pointer */
+	    __P((XID *, int, long));
+	int (*xa_prepare_entry)		/* xa_prepare function pointer */
+	    __P((XID *, int, long));
+	int (*xa_commit_entry)		/* xa_commit function pointer */
+	    __P((XID *, int, long));
+	int (*xa_recover_entry)		/* xa_recover function pointer */
+	    __P((XID *, long, int, long));
+	int (*xa_forget_entry)		/* xa_forget function pointer */
+	    __P((XID *, int, long));
+	int (*xa_complete_entry)	/* xa_complete function pointer */
+	    __P((int *, int *, int, long));
+};
+
+/*
+ * Flag definitions for the RM switch
+ */
+#define	TMNOFLAGS	0x00000000L	/* no resource manager features
+					selected */
+#define	TMREGISTER	0x00000001L	/* resource manager dynamically
+					registers */
+#define	TMNOMIGRATE	0x00000002L	/* resource manager does not support
+					association migration */
+#define	TMUSEASYNC	0x00000004L	/* resource manager supports
+					asynchronous operations */
+/*
+ * Flag definitions for xa_ and ax_ routines
+ */
+/* use TMNOFLAGGS, defined above, when not specifying other flags */
+#define	TMASYNC		0x80000000L	/* perform routine asynchronously */
+#define	TMONEPHASE	0x40000000L	/* caller is using one-phase commit
+					optimisation */
+#define	TMFAIL		0x20000000L	/* dissociates caller and marks
+					transaction branch rollback-only */
+#define	TMNOWAIT	0x10000000L	/* return if blocking condition
+					exists */
+#define	TMRESUME	0x08000000L	/* caller is resuming association with
+					suspended transaction branch */
+#define	TMSUCCESS	0x04000000L	/* dissociate caller from transaction
+					branch */
+#define	TMSUSPEND	0x02000000L	/* caller is suspending, not ending,
+					association */
+#define	TMSTARTRSCAN	0x01000000L	/* start a recovery scan */
+#define	TMENDRSCAN	0x00800000L	/* end a recovery scan */
+#define	TMMULTIPLE	0x00400000L	/* wait for any asynchronous
+					operation */
+#define	TMJOIN		0x00200000L	/* caller is joining existing
+					transaction branch */
+#define	TMMIGRATE	0x00100000L	/* caller intends to perform
+					migration */
+
+/*
+ * ax_() return codes (transaction manager reports to resource manager)
+ */
+#define	TM_JOIN		2		/* caller is joining existing
+					transaction branch */
+#define	TM_RESUME	1		/* caller is resuming association with
+					suspended transaction branch */
+#define	TM_OK		0		/* normal execution */
+#define	TMER_TMERR	-1		/* an error occurred in the transaction
+					manager */
+#define	TMER_INVAL	-2		/* invalid arguments were given */
+#define	TMER_PROTO	-3		/* routine invoked in an improper
+					context */
+
+/*
+ * xa_() return codes (resource manager reports to transaction manager)
+ */
+#define	XA_RBBASE	100		/* The inclusive lower bound of the
+					rollback codes */
+#define	XA_RBROLLBACK	XA_RBBASE	/* The rollback was caused by an
+					unspecified reason */
+#define	XA_RBCOMMFAIL	XA_RBBASE+1	/* The rollback was caused by a
+					communication failure */
+#define	XA_RBDEADLOCK	XA_RBBASE+2	/* A deadlock was detected */
+#define	XA_RBINTEGRITY	XA_RBBASE+3	/* A condition that violates the
+					integrity of the resources was
+					detected */
+#define	XA_RBOTHER	XA_RBBASE+4	/* The resource manager rolled back the
+					transaction branch for a reason not
+					on this list */
+#define	XA_RBPROTO	XA_RBBASE+5	/* A protocol error occurred in the
+					resource manager */
+#define	XA_RBTIMEOUT	XA_RBBASE+6	/* A transaction branch took too long */
+#define	XA_RBTRANSIENT	XA_RBBASE+7	/* May retry the transaction branch */
+#define	XA_RBEND	XA_RBTRANSIENT	/* The inclusive upper bound of the
+					rollback codes */
+#define	XA_NOMIGRATE	9		/* resumption must occur where
+					suspension occurred */
+#define	XA_HEURHAZ	8		/* the transaction branch may have
+					been heuristically completed */
+#define	XA_HEURCOM	7		/* the transaction branch has been
+					heuristically committed */
+#define	XA_HEURRB	6		/* the transaction branch has been
+					heuristically rolled back */
+#define	XA_HEURMIX	5		/* the transaction branch has been
+					heuristically committed and rolled
+					back */
+#define	XA_RETRY	4		/* routine returned with no effect and
+					may be re-issued */
+#define	XA_RDONLY	3		/* the transaction branch was read-only
+					and has been committed */
+#define	XA_OK		0		/* normal execution */
+#define	XAER_ASYNC	-2		/* asynchronous operation already
+					outstanding */
+#define	XAER_RMERR	-3		/* a resource manager error occurred in
+					 the transaction branch */
+#define	XAER_NOTA	-4		/* the XID is not valid */
+#define	XAER_INVAL	-5		/* invalid arguments were given */
+#define	XAER_PROTO	-6		/* routine invoked in an improper
+					context */
+#define	XAER_RMFAIL	-7		/* resource manager unavailable */
+#define	XAER_DUPID	-8		/* the XID already exists */
+#define	XAER_OUTSIDE	-9		/* resource manager doing work outside
+					transaction */
+#endif /* ifndef XA_H */
+/*
+ * End of xa.h header
+ */
diff --git a/db2/include/xa_ext.h b/db2/include/xa_ext.h
new file mode 100644
index 0000000000..00369ccaae
--- /dev/null
+++ b/db2/include/xa_ext.h
@@ -0,0 +1,13 @@
+/* DO NOT EDIT: automatically built by dist/distrib. */
+#ifndef _xa_ext_h_
+#define _xa_ext_h_
+int __db_rmid_to_env __P((int rmid, DB_ENV **envp, int open_ok));
+int __db_xid_to_txn __P((DB_ENV *, XID *, size_t *));
+int __db_map_rmid __P((int, DB_ENV *));
+int __db_unmap_rmid __P((int));
+int __db_map_xid __P((DB_ENV *, XID *, size_t));
+void __db_unmap_xid __P((DB_ENV *, XID *, size_t));
+int __db_map_rmid_name __P((int, char *));
+int __db_rmid_to_name __P((int, char **));
+ void __db_unmap_rmid_name __P((int));
+#endif /* _xa_ext_h_ */