diff options
Diffstat (limited to 'db2/include')
34 files changed, 6043 insertions, 0 deletions
diff --git a/db2/include/btree.h b/db2/include/btree.h new file mode 100644 index 0000000000..5cf4224ae6 --- /dev/null +++ b/db2/include/btree.h @@ -0,0 +1,312 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)btree.h 10.16 (Sleepycat) 8/24/97 + */ + +/* Forward structure declarations. */ +struct __btree; typedef struct __btree BTREE; +struct __cursor; typedef struct __cursor CURSOR; +struct __epg; typedef struct __epg EPG; +struct __rcursor; typedef struct __rcursor RCURSOR; +struct __recno; typedef struct __recno RECNO; + +#undef DEFMINKEYPAGE /* Minimum keys per page */ +#define DEFMINKEYPAGE (2) + +#undef ISINTERNAL /* If an internal page. */ +#define ISINTERNAL(p) (TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO) +#undef ISLEAF /* If a leaf page. */ +#define ISLEAF(p) (TYPE(p) == P_LBTREE || TYPE(p) == P_LRECNO) + +/* Allocate and discard thread structures. */ +#define GETHANDLE(dbp, set_txn, dbpp, ret) { \ + if (F_ISSET(dbp, DB_AM_THREAD)) { \ + if ((ret = __db_gethandle(dbp, __bam_bdup, dbpp)) != 0) \ + return (ret); \ + } else \ + *dbpp = dbp; \ + *dbpp->txn = set_txn; \ +} +#define PUTHANDLE(dbp) { \ + dbp->txn = NULL; \ + if (F_ISSET(dbp, DB_AM_THREAD)) \ + __db_puthandle(dbp); \ +} + +/* + * If doing transactions we have to hold the locks associated with a data item + * from a page for the entire transaction. However, we don't have to hold the + * locks associated with walking the tree. Distinguish between the two so that + * we don't tie up the internal pages of the tree longer than necessary. + */ +#define __BT_LPUT(dbp, lock) \ + (F_ISSET((dbp), DB_AM_LOCKING) ? \ + lock_put((dbp)->dbenv->lk_info, lock) : 0) +#define __BT_TLPUT(dbp, lock) \ + (F_ISSET((dbp), DB_AM_LOCKING) && (dbp)->txn == NULL ? \ + lock_put((dbp)->dbenv->lk_info, lock) : 0) + +/* + * Flags to __bt_search() and __rec_search(). + * + * Note, internal page searches must find the largest record less than key in + * the tree so that descents work. Leaf page searches must find the smallest + * record greater than key so that the returned index is the record's correct + * position for insertion. + * + * The flags parameter to the search routines describes three aspects of the + * search: the type of locking required (including if we're locking a pair of + * pages), the item to return in the presence of duplicates and whether or not + * to return deleted entries. To simplify both the mnemonic representation + * and the code that checks for various cases, we construct a set of bitmasks. + */ +#define S_READ 0x0001 /* Read locks. */ +#define S_WRITE 0x0002 /* Write locks. */ + +#define S_APPEND 0x0040 /* Append to the tree. */ +#define S_DELNO 0x0080 /* Don't return deleted items. */ +#define S_DUPFIRST 0x0100 /* Return first duplicate. */ +#define S_DUPLAST 0x0200 /* Return last duplicate. */ +#define S_EXACT 0x0400 /* Exact items only. */ +#define S_PARENT 0x0800 /* Lock page pair. */ + +#define S_DELETE (S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT) +#define S_FIND (S_READ | S_DUPFIRST | S_DELNO) +#define S_INSERT (S_WRITE | S_DUPLAST) +#define S_KEYFIRST (S_WRITE | S_DUPFIRST) +#define S_KEYLAST (S_WRITE | S_DUPLAST) +#define S_WRPAIR (S_WRITE | S_DUPLAST | S_PARENT) + +/* + * Flags to __bam_iitem(). + */ +#define BI_NEWKEY 0x01 /* New key. */ +#define BI_DELETED 0x02 /* Key/data pair only placeholder. */ + +/* + * Various routines pass around page references. A page reference can be a + * pointer to the page or a page number; for either, an indx can designate + * an item on the page. + */ +struct __epg { + PAGE *page; /* The page. */ + db_indx_t indx; /* The index on the page. */ + DB_LOCK lock; /* The page's lock. */ +}; + +/* + * Btree cursor. + * + * Arguments passed to __bam_ca_replace(). + */ +typedef enum { + REPLACE_SETUP, + REPLACE_SUCCESS, + REPLACE_FAILED +} ca_replace_arg; +struct __cursor { + DBC *dbc; /* Enclosing DBC. */ + + PAGE *page; /* Cursor page. */ + + db_pgno_t pgno; /* Page. */ + db_indx_t indx; /* Page item ref'd by the cursor. */ + + db_pgno_t dpgno; /* Duplicate page. */ + db_indx_t dindx; /* Page item ref'd by the cursor. */ + + DB_LOCK lock; /* Cursor read lock. */ + db_lockmode_t mode; /* Lock mode. */ + + /* + * If a cursor record is deleted, the key/data pair has to remain on + * the page so that subsequent inserts/deletes don't interrupt the + * cursor progression through the file. This results in interesting + * cases when "standard" operations, e.g., dbp->put() are done in the + * context of "deleted" cursors. + * + * C_DELETED -- The item referenced by the cursor has been "deleted" + * but not physically removed from the page. + * C_REPLACE -- The "deleted" item referenced by a cursor has been + * replaced by a dbp->put(), so the cursor is no longer + * responsible for physical removal from the page. + * C_REPLACE_SETUP -- + * We are about to overwrite a "deleted" item, flag any + * cursors referencing it for transition to C_REPLACE + * state. + */ +#define C_DELETED 0x0001 +#define C_REPLACE 0x0002 +#define C_REPLACE_SETUP 0x0004 + u_int32_t flags; +}; + +/* + * Recno cursor. + * + * Arguments passed to __ram_ca(). + */ +typedef enum { + CA_DELETE, + CA_IAFTER, + CA_IBEFORE +} ca_recno_arg; +struct __rcursor { + DBC *dbc; /* Enclosing DBC. */ + + db_recno_t recno; /* Current record number. */ + + /* + * Cursors referencing "deleted" records are positioned between + * two records, and so must be specially adjusted until they are + * moved. + */ +#define CR_DELETED 0x0001 /* Record deleted. */ + u_int32_t flags; +}; + +/* + * We maintain a stack of the pages that we're locking in the tree. Btree's + * (currently) only save two levels of the tree at a time, so the default + * stack is always large enough. Recno trees have to lock the entire tree to + * do inserts/deletes, however. Grow the stack as necessary. + */ +#undef BT_STK_CLR +#define BT_STK_CLR(t) \ + ((t)->bt_csp = (t)->bt_sp) + +#undef BT_STK_ENTER +#define BT_STK_ENTER(t, pagep, page_indx, lock, ret) do { \ + if ((ret = \ + (t)->bt_csp == (t)->bt_esp ? __bam_stkgrow(t) : 0) == 0) { \ + (t)->bt_csp->page = pagep; \ + (t)->bt_csp->indx = page_indx; \ + (t)->bt_csp->lock = lock; \ + } \ +} while (0) + +#undef BT_STK_PUSH +#define BT_STK_PUSH(t, pagep, page_indx, lock, ret) do { \ + BT_STK_ENTER(t, pagep, page_indx, lock, ret); \ + ++(t)->bt_csp; \ +} while (0) + +#undef BT_STK_POP +#define BT_STK_POP(t) \ + ((t)->bt_csp == (t)->bt_stack ? NULL : --(t)->bt_csp) + +/* + * The in-memory recno data structure. + * + * !!! + * These fields are ignored as far as multi-threading is concerned. There + * are no transaction semantics associated with backing files, nor is there + * any thread protection. + */ +#undef RECNO_OOB +#define RECNO_OOB 0 /* Illegal record number. */ + +struct __recno { + int re_delim; /* Variable-length delimiting byte. */ + int re_pad; /* Fixed-length padding byte. */ + u_int32_t re_len; /* Length for fixed-length records. */ + + char *re_source; /* Source file name. */ + int re_fd; /* Source file descriptor */ + db_recno_t re_last; /* Last record number read. */ + void *re_cmap; /* Current point in mapped space. */ + void *re_smap; /* Start of mapped space. */ + void *re_emap; /* End of mapped space. */ + size_t re_msize; /* Size of mapped region. */ + /* Recno input function. */ + int (*re_irec) __P((DB *, db_recno_t)); + +#define RECNO_EOF 0x0001 /* EOF on backing source file. */ +#define RECNO_MODIFIED 0x0002 /* Tree was modified. */ + u_int32_t flags; +}; + +/* + * The in-memory btree data structure. + */ +struct __btree { +/* + * These fields are per-thread and are initialized when the BTREE structure + * is created. + */ + db_pgno_t bt_lpgno; /* Last insert location. */ + + DBT bt_rkey; /* Returned key. */ + DBT bt_rdata; /* Returned data. */ + + EPG *bt_sp; /* Stack pointer. */ + EPG *bt_csp; /* Current stack entry. */ + EPG *bt_esp; /* End stack pointer. */ + EPG bt_stack[5]; + + RECNO *bt_recno; /* Private recno structure. */ + + DB_BTREE_LSTAT lstat; /* Btree local statistics. */ + +/* + * These fields are copied from the original BTREE structure and never + * change. + */ + db_indx_t bt_maxkey; /* Maximum keys per page. */ + db_indx_t bt_minkey; /* Minimum keys per page. */ + + int (*bt_compare) /* Comparison function. */ + __P((const DBT *, const DBT *)); + size_t(*bt_prefix) /* Prefix function. */ + __P((const DBT *, const DBT *)); + + db_indx_t bt_ovflsize; /* Maximum key/data on-page size. */ +}; + +#include "btree_auto.h" +#include "btree_ext.h" +#include "db_am.h" +#include "common_ext.h" diff --git a/db2/include/btree_auto.h b/db2/include/btree_auto.h new file mode 100644 index 0000000000..b422e1db1b --- /dev/null +++ b/db2/include/btree_auto.h @@ -0,0 +1,108 @@ +/* Do not edit: automatically built by dist/db_gen.sh. */ +#ifndef bam_AUTO_H +#define bam_AUTO_H + +#define DB_bam_pg_alloc (DB_bam_BEGIN + 1) + +typedef struct _bam_pg_alloc_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + DB_LSN meta_lsn; + DB_LSN page_lsn; + db_pgno_t pgno; + u_int32_t ptype; + db_pgno_t next; +} __bam_pg_alloc_args; + + +#define DB_bam_pg_free (DB_bam_BEGIN + 2) + +typedef struct _bam_pg_free_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN meta_lsn; + DBT header; + db_pgno_t next; +} __bam_pg_free_args; + + +#define DB_bam_split (DB_bam_BEGIN + 3) + +typedef struct _bam_split_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t left; + DB_LSN llsn; + db_pgno_t right; + DB_LSN rlsn; + u_int32_t indx; + db_pgno_t npgno; + DB_LSN nlsn; + DBT pg; +} __bam_split_args; + + +#define DB_bam_rsplit (DB_bam_BEGIN + 4) + +typedef struct _bam_rsplit_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t pgno; + DBT pgdbt; + DBT rootent; + DB_LSN rootlsn; +} __bam_rsplit_args; + + +#define DB_bam_adj (DB_bam_BEGIN + 5) + +typedef struct _bam_adj_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; + u_int32_t indx_copy; + u_int32_t is_insert; +} __bam_adj_args; + + +#define DB_bam_cadjust (DB_bam_BEGIN + 6) + +typedef struct _bam_cadjust_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; + int32_t adjust; + int32_t total; +} __bam_cadjust_args; + + +#define DB_bam_cdel (DB_bam_BEGIN + 7) + +typedef struct _bam_cdel_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; +} __bam_cdel_args; + +#endif diff --git a/db2/include/btree_ext.h b/db2/include/btree_ext.h new file mode 100644 index 0000000000..dab0f5be4e --- /dev/null +++ b/db2/include/btree_ext.h @@ -0,0 +1,121 @@ +/* Do not edit: automatically built by dist/distrib. */ +int __bam_close __P((DB *)); +int __bam_sync __P((DB *, int)); +int __bam_cmp __P((DB *, const DBT *, EPG *)); +int __bam_defcmp __P((const DBT *, const DBT *)); +size_t __bam_defpfx __P((const DBT *, const DBT *)); +int __bam_pgin __P((db_pgno_t, void *, DBT *)); +int __bam_pgout __P((db_pgno_t, void *, DBT *)); +int __bam_mswap __P((PAGE *)); +int __bam_cursor __P((DB *, DB_TXN *, DBC **)); +int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, int)); +int __bam_ovfl_chk __P((DB *, CURSOR *, u_int32_t, int)); +int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *)); +void __bam_ca_di __P((DB *, db_pgno_t, u_int32_t, int)); +void __bam_ca_dup __P((DB *, + db_pgno_t, u_int32_t, u_int32_t, db_pgno_t, u_int32_t)); +void __bam_ca_move __P((DB *, BTREE *, db_pgno_t, db_pgno_t)); +void __bam_ca_replace + __P((DB *, db_pgno_t, u_int32_t, ca_replace_arg)); +void __bam_ca_split __P((DB *, + db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int)); +int __bam_delete __P((DB *, DB_TXN *, DBT *, int)); +int __ram_delete __P((DB *, DB_TXN *, DBT *, int)); +int __bam_ditem __P((DB *, PAGE *, u_int32_t)); +int __bam_adjindx __P((DB *, PAGE *, u_int32_t, u_int32_t, int)); +int __bam_dpage __P((DB *, const DBT *)); +int __bam_open __P((DB *, DBTYPE, DB_INFO *)); +int __bam_bdup __P((DB *, DB *)); +int __bam_new __P((DB *, u_int32_t, PAGE **)); +int __bam_free __P((DB *, PAGE *)); +int __bam_lget __P((DB *, int, db_pgno_t, db_lockmode_t, DB_LOCK *)); +int __bam_lput __P((DB *, DB_LOCK)); +int __bam_pget __P((DB *, PAGE **, db_pgno_t *, int)); +int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, int)); +int __bam_iitem __P((DB *, + PAGE **, db_indx_t *, DBT *, DBT *, int, int)); +int __bam_pg_alloc_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_pg_free_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_split_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_rsplit_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_adj_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_cadjust_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_cdel_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ram_open __P((DB *, DBTYPE, DB_INFO *)); +int __ram_cursor __P((DB *, DB_TXN *, DBC **)); +int __ram_close __P((DB *)); +void __ram_ca __P((DB *, db_recno_t, ca_recno_arg)); +int __ram_getno __P((DB *, const DBT *, db_recno_t *, int)); +int __ram_snapshot __P((DB *)); +int __bam_rsearch __P((DB *, db_recno_t *, u_int, int, int *)); +int __bam_adjust __P((DB *, BTREE *, int)); +int __bam_nrecs __P((DB *, db_recno_t *)); +db_recno_t __bam_total __P((PAGE *)); +int __bam_search __P((DB *, + const DBT *, u_int, int, db_recno_t *, int *)); +int __bam_stkrel __P((DB *)); +int __bam_stkgrow __P((BTREE *)); +int __bam_split __P((DB *, void *)); +int __bam_broot __P((DB *, PAGE *, PAGE *, PAGE *)); +int __ram_root __P((DB *, PAGE *, PAGE *, PAGE *)); +int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t)); +int __bam_stat __P((DB *, void *, void *(*)(size_t), int)); +void __bam_add_mstat __P((DB_BTREE_LSTAT *, DB_BTREE_LSTAT *)); +int __bam_pg_alloc_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, DB_LSN *, DB_LSN *, db_pgno_t, + u_int32_t, db_pgno_t)); +int __bam_pg_alloc_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_pg_alloc_read __P((void *, __bam_pg_alloc_args **)); +int __bam_pg_free_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, DB_LSN *, DBT *, + db_pgno_t)); +int __bam_pg_free_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_pg_free_read __P((void *, __bam_pg_free_args **)); +int __bam_split_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, + DB_LSN *, u_int32_t, db_pgno_t, DB_LSN *, + DBT *)); +int __bam_split_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_split_read __P((void *, __bam_split_args **)); +int __bam_rsplit_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, DBT *, DBT *, + DB_LSN *)); +int __bam_rsplit_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_rsplit_read __P((void *, __bam_rsplit_args **)); +int __bam_adj_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, DB_LSN *, u_int32_t, + u_int32_t, u_int32_t)); +int __bam_adj_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_adj_read __P((void *, __bam_adj_args **)); +int __bam_cadjust_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, DB_LSN *, u_int32_t, + int32_t, int32_t)); +int __bam_cadjust_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_cadjust_read __P((void *, __bam_cadjust_args **)); +int __bam_cdel_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, DB_LSN *, u_int32_t)); +int __bam_cdel_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_cdel_read __P((void *, __bam_cdel_args **)); +int __bam_init_print __P((DB_ENV *)); +int __bam_init_recover __P((DB_ENV *)); diff --git a/db2/include/clib_ext.h b/db2/include/clib_ext.h new file mode 100644 index 0000000000..8ccd2b559f --- /dev/null +++ b/db2/include/clib_ext.h @@ -0,0 +1,65 @@ +/* Do not edit: automatically built by dist/distrib. */ +#ifdef __STDC__ +void err __P((int eval, const char *, ...)); +#else +void err(); +#endif +#ifdef __STDC__ +void errx __P((int eval, const char *, ...)); +#else +void errx(); +#endif +#ifdef __STDC__ +void warn __P((const char *, ...)); +#else +void warn(); +#endif +#ifdef __STDC__ +void warnx __P((const char *, ...)); +#else +void warnx(); +#endif +#ifndef HAVE_GETCWD +char *getcwd __P((char *, size_t)); +#endif +void get_long __P((char *, long, long, long *)); +#ifndef HAVE_GETOPT +int getopt __P((int, char * const *, const char *)); +#endif +#ifndef HAVE_MEMCMP +int memcmp __P((const void *, const void *, size_t)); +#endif +#ifndef HAVE_MEMCPY +void *memcpy __P((void *, const void *, size_t)); +#endif +#ifndef HAVE_MEMMOVE +void *memmove __P((void *, const void *, size_t)); +#endif +#ifndef HAVE_MEMCPY +void *memcpy __P((void *, const void *, size_t)); +#endif +#ifndef HAVE_MEMMOVE +void *memmove __P((void *, const void *, size_t)); +#endif +#ifndef HAVE_RAISE +int raise __P((int)); +#endif +#ifndef HAVE_SNPRINTF +#ifdef __STDC__ +int snprintf __P((char *, size_t, const char *, ...)); +#else +int snprintf(); +#endif +#endif +#ifndef HAVE_STRDUP +char *strdup __P((const char *)); +#endif +#ifndef HAVE_STRERROR +char *strerror __P((int)); +#endif +#ifndef HAVE_STRSEP +char *strsep __P((char **, const char *)); +#endif +#ifndef HAVE_VSNPRINTF +int vsnprintf(); +#endif diff --git a/db2/include/common_ext.h b/db2/include/common_ext.h new file mode 100644 index 0000000000..9840162a12 --- /dev/null +++ b/db2/include/common_ext.h @@ -0,0 +1,41 @@ +/* Do not edit: automatically built by dist/distrib. */ +int __db_appname __P((DB_ENV *, + APPNAME, const char *, const char *, int *, char **)); +int __db_apprec __P((DB_ENV *, int)); +int __db_byteorder __P((DB_ENV *, int)); +#ifdef __STDC__ +void __db_err __P((const DB_ENV *dbenv, const char *fmt, ...)); +#else +void __db_err(); +#endif +int __db_panic __P((DB *)); +int __db_fchk __P((DB_ENV *, const char *, int, int)); +int __db_fcchk __P((DB_ENV *, const char *, int, int, int)); +int __db_cdelchk __P((const DB *, int, int, int)); +int __db_cgetchk __P((const DB *, DBT *, DBT *, int, int)); +int __db_cputchk __P((const DB *, + const DBT *, DBT *, int, int, int)); +int __db_delchk __P((const DB *, int, int)); +int __db_getchk __P((const DB *, const DBT *, DBT *, int)); +int __db_putchk __P((const DB *, DBT *, const DBT *, int, int, int)); +int __db_statchk __P((const DB *, int)); +int __db_syncchk __P((const DB *, int)); +int __db_ferr __P((const DB_ENV *, const char *, int)); +u_int32_t __db_log2 __P((u_int32_t)); +int __db_rcreate __P((DB_ENV *, APPNAME, + const char *, const char *, int, size_t, int *, void *)); +int __db_ropen __P((DB_ENV *, + APPNAME, const char *, const char *, int, int *, void *)); +int __db_rclose __P((DB_ENV *, int, void *)); +int __db_runlink __P((DB_ENV *, + APPNAME, const char *, const char *, int)); +int __db_rgrow __P((DB_ENV *, int, size_t)); +int __db_rremap __P((DB_ENV *, void *, size_t, size_t, int, void *)); +void __db_shalloc_init __P((void *, size_t)); +int __db_shalloc __P((void *, size_t, size_t, void *)); +void __db_shalloc_free __P((void *, void *)); +size_t __db_shalloc_count __P((void *)); +size_t __db_shsizeof __P((void *)); +void __db_shalloc_dump __P((FILE *, void *)); +int __db_tablesize __P((int)); +void __db_hashinit __P((void *, int)); diff --git a/db2/include/cxx_int.h b/db2/include/cxx_int.h new file mode 100644 index 0000000000..bf7a09602d --- /dev/null +++ b/db2/include/cxx_int.h @@ -0,0 +1,118 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997 + * Sleepycat Software. All rights reserved. + * + * @(#)cxx_int.h 10.4 (Sleepycat) 8/22/97 + */ + +#ifndef _CXX_INT_H_ +#define _CXX_INT_H_ + +// private data structures known to the implementation only + +#include <assert.h> // used by defines below + +// +// Using FooImp classes will allow the implementation to change in the +// future without any modification to user code or even to header files +// that the user includes. FooImp * is just like void * except that it +// provides a little extra protection, since you cannot randomly assign +// any old pointer to a FooImp* as you can with void *. Currently, a +// pointer to such an opaque class is always just a pointer to the +// appropriate underlying implementation struct. These are converted +// back and forth using the various overloaded wrap()/unwrap() methods. +// This is essentially a use of the "Bridge" Design Pattern. +// +// WRAPPED_CLASS implements the appropriate wrap() and unwrap() methods +// for a wrapper class that has an underlying pointer representation. +// +#define WRAPPED_CLASS(_WRAPPER_CLASS, _IMP_CLASS, _WRAPPED_TYPE) \ + \ + class _IMP_CLASS {}; \ + \ + inline _WRAPPED_TYPE unwrap(_WRAPPER_CLASS *val) \ + { \ + if (!val) return 0; \ + return (_WRAPPED_TYPE)(val->imp()); \ + } \ + \ + inline const _WRAPPED_TYPE unwrapConst(const _WRAPPER_CLASS *val) \ + { \ + if (!val) return 0; \ + return (const _WRAPPED_TYPE)(val->imp()); \ + } \ + \ + inline _IMP_CLASS *wrap(_WRAPPED_TYPE val) \ + { \ + return (_IMP_CLASS*)val; \ + } + +WRAPPED_CLASS(DbLockTab, DbLockTabImp, DB_LOCKTAB*) +WRAPPED_CLASS(DbLog, DbLogImp, DB_LOG*) +WRAPPED_CLASS(DbMpool, DbMpoolImp, DB_MPOOL*) +WRAPPED_CLASS(DbMpoolFile, DbMpoolFileImp, DB_MPOOLFILE*) +WRAPPED_CLASS(Db, DbImp, DB*) +WRAPPED_CLASS(DbTxn, DbTxnImp, DB_TXN*) +WRAPPED_CLASS(DbTxnMgr, DbTxnMgrImp, DB_TXNMGR*) + +// Macros that handle detected errors, in case we want to +// change the default behavior. runtime_error() throws an +// exception by default. +// +// Since it's unusual to throw an exception in a destructor, +// we have a separate macro. For now, we silently ignore such +// detected errors. +// +#define DB_ERROR(caller, ecode) \ + DbEnv::runtime_error(caller, ecode) + +#define DB_DESTRUCTOR_ERROR(caller, ecode) \ + DbEnv::runtime_error(caller, ecode, 1) + + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// These defines are for tedious flag or field set/get access methods. +// + +// Define setName() and getName() methods that twiddle +// the _flags field. +// +#define DB_FLAG_METHODS(_class, _flags, _cxx_name, _flag_name) \ + \ +void _class::set##_cxx_name(int onOrOff) \ +{ \ + if (onOrOff) \ + _flags |= _flag_name; \ + else \ + _flags &= ~(_flag_name); \ +} \ + \ +int _class::get##_cxx_name() const \ +{ \ + return (_flags & _flag_name) ? 1 : 0; \ +} + + +#define DB_RO_ACCESS(_class, _type, _cxx_name, _field) \ + \ +_type _class::get_##_cxx_name() const \ +{ \ + return _field; \ +} + +#define DB_WO_ACCESS(_class, _type, _cxx_name, _field) \ + \ +void _class::set_##_cxx_name(_type value) \ +{ \ + _field = value; \ +} \ + +#define DB_RW_ACCESS(_class, _type, _cxx_name, _field) \ + DB_RO_ACCESS(_class, _type, _cxx_name, _field) \ + DB_WO_ACCESS(_class, _type, _cxx_name, _field) + +#endif /* !_CXX_INT_H_ */ diff --git a/db2/include/db.h.src b/db2/include/db.h.src new file mode 100644 index 0000000000..f9b29fa2af --- /dev/null +++ b/db2/include/db.h.src @@ -0,0 +1,796 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + * + * @(#)db.h.src 10.67 (Sleepycat) 8/25/97 + */ + +#ifndef _DB_H_ +#define _DB_H_ + +#ifndef __NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <stdio.h> +#endif + +/* + * XXX + * MacOS: ensure that Metrowerks C makes enumeration types int sized. + */ +#ifdef __MWERKS__ +#pragma enumsalwaysint on +#endif + +/* + * XXX + * Handle function prototypes and the keyword "const". This steps on name + * space that DB doesn't control, but all of the other solutions are worse. + */ +#undef __P +#if defined(__STDC__) || defined(__cplusplus) +#define __P(protos) protos /* ANSI C prototypes */ +#else +#define const +#define __P(protos) () /* K&R C preprocessor */ +#endif + +/* + * !!! + * DB needs basic information about specifically sized types. If they're + * not provided by the system, typedef them here. + * + * We protect them against multiple inclusion using __BIT_TYPES_DEFINED__, + * as does BIND and Kerberos, since we don't know for sure what #include + * files the user is using. + * + * !!! + * We also provide the standard u_int, u_long etc., if they're not provided + * by the system. This isn't completely necessary, but the example programs + * need them. + */ +#ifndef __BIT_TYPES_DEFINED__ +#define __BIT_TYPES_DEFINED__ +@u_int8_decl@ +@int16_decl@ +@u_int16_decl@ +@int32_decl@ +@u_int32_decl@ +#endif + +@u_char_decl@ +@u_short_decl@ +@u_int_decl@ +@u_long_decl@ + +#define DB_VERSION_MAJOR 2 +#define DB_VERSION_MINOR 3 +#define DB_VERSION_PATCH 4 +#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.4: (8/20/97)" + +typedef u_int32_t db_pgno_t; /* Page number type. */ +typedef u_int16_t db_indx_t; /* Page offset type. */ +#define DB_MAX_PAGES 0xffffffff /* >= # of pages in a file */ + +typedef u_int32_t db_recno_t; /* Record number type. */ +typedef size_t DB_LOCK; /* Object returned by lock manager. */ +#define DB_MAX_RECORDS 0xffffffff /* >= # of records in a tree */ + +#define DB_FILE_ID_LEN 20 /* DB file ID length. */ + +/* Forward structure declarations, so applications get type checking. */ +struct __db; typedef struct __db DB; +#ifdef DB_DBM_HSEARCH + typedef struct __db DBM; +#endif +struct __db_bt_stat; typedef struct __db_bt_stat DB_BTREE_STAT; +struct __db_dbt; typedef struct __db_dbt DBT; +struct __db_env; typedef struct __db_env DB_ENV; +struct __db_info; typedef struct __db_info DB_INFO; +struct __db_lockregion; typedef struct __db_lockregion DB_LOCKREGION; +struct __db_lockreq; typedef struct __db_lockreq DB_LOCKREQ; +struct __db_locktab; typedef struct __db_locktab DB_LOCKTAB; +struct __db_log; typedef struct __db_log DB_LOG; +struct __db_lsn; typedef struct __db_lsn DB_LSN; +struct __db_mpool; typedef struct __db_mpool DB_MPOOL; +struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT; +struct __db_mpool_stat; typedef struct __db_mpool_stat DB_MPOOL_STAT; +struct __db_mpoolfile; typedef struct __db_mpoolfile DB_MPOOLFILE; +struct __db_txn; typedef struct __db_txn DB_TXN; +struct __db_txn_active; typedef struct __db_txn_active DB_TXN_ACTIVE; +struct __db_txn_stat; typedef struct __db_txn_stat DB_TXN_STAT; +struct __db_txnmgr; typedef struct __db_txnmgr DB_TXNMGR; +struct __db_txnregion; typedef struct __db_txnregion DB_TXNREGION; +struct __dbc; typedef struct __dbc DBC; + +/* Key/data structure -- a Data-Base Thang. */ +struct __db_dbt { + void *data; /* key/data */ + u_int32_t size; /* key/data length */ + u_int32_t ulen; /* RO: length of user buffer. */ + u_int32_t dlen; /* RO: get/put record length. */ + u_int32_t doff; /* RO: get/put record offset. */ + +#define DB_DBT_INTERNAL 0x01 /* Perform any mallocs using regular + malloc, not the user's malloc. */ +#define DB_DBT_MALLOC 0x02 /* Return in allocated memory. */ +#define DB_DBT_PARTIAL 0x04 /* Partial put/get. */ +#define DB_DBT_USERMEM 0x08 /* Return in user's memory. */ + u_int32_t flags; +}; + +/* + * Database configuration and initialization. + */ + /* + * Flags understood by both db_open(3) and db_appinit(3). + */ +#define DB_CREATE 0x00001 /* O_CREAT: create file as necessary. */ +#define DB_NOMMAP 0x00002 /* Don't mmap underlying file. */ +#define DB_THREAD 0x00004 /* Free-thread DB package handles. */ + +/* + * Flags understood by db_appinit(3). + * + * DB_APP_INIT and DB_MUTEXDEBUG are internal only, and not documented. + */ +/* 0x00007 COMMON MASK. */ +#define DB_APP_INIT 0x00008 /* Appinit called, paths initialized. */ +#define DB_INIT_LOCK 0x00010 /* Initialize locking. */ +#define DB_INIT_LOG 0x00020 /* Initialize logging. */ +#define DB_INIT_MPOOL 0x00040 /* Initialize mpool. */ +#define DB_INIT_TXN 0x00080 /* Initialize transactions. */ +#define DB_MPOOL_PRIVATE 0x00100 /* Mpool: private memory pool. */ +#define DB_MUTEXDEBUG 0x00200 /* Do not get/set mutexes in regions. */ +#define DB_RECOVER 0x00400 /* Run normal recovery. */ +#define DB_RECOVER_FATAL 0x00800 /* Run catastrophic recovery. */ +#define DB_TXN_NOSYNC 0x01000 /* Do not sync log on commit. */ +#define DB_USE_ENVIRON 0x02000 /* Use the environment. */ +#define DB_USE_ENVIRON_ROOT 0x04000 /* Use the environment if root. */ + +/* CURRENTLY UNUSED LOCK FLAGS. */ +#define DB_TXN_LOCK_2PL 0x00000 /* Two-phase locking. */ +#define DB_TXN_LOCK_OPTIMISTIC 0x00000 /* Optimistic locking. */ +#define DB_TXN_LOCK_MASK 0x00000 /* Lock flags mask. */ + +/* CURRENTLY UNUSED LOG FLAGS. */ +#define DB_TXN_LOG_REDO 0x00000 /* Redo-only logging. */ +#define DB_TXN_LOG_UNDO 0x00000 /* Undo-only logging. */ +#define DB_TXN_LOG_UNDOREDO 0x00000 /* Undo/redo write-ahead logging. */ +#define DB_TXN_LOG_MASK 0x00000 /* Log flags mask. */ + +/* + * Flags understood by db_open(3). + * + * DB_EXCL and DB_TEMPORARY are internal only, and not documented. + * DB_SEQUENTIAL is currently internal, but likely to be exported some day. + */ +/* 0x00007 COMMON MASK. */ +/* 0x07fff ALREADY USED. */ +#define DB_EXCL 0x08000 /* O_EXCL: exclusive open. */ +#define DB_RDONLY 0x10000 /* O_RDONLY: read-only. */ +#define DB_SEQUENTIAL 0x20000 /* Indicate sequential access. */ +#define DB_TEMPORARY 0x40000 /* Remove on last close. */ +#define DB_TRUNCATE 0x80000 /* O_TRUNCATE: replace existing DB. */ + +/* + * Deadlock detector modes; used in the DBENV structure to configure the + * locking subsystem. + */ +#define DB_LOCK_NORUN 0x0 +#define DB_LOCK_DEFAULT 0x1 +#define DB_LOCK_OLDEST 0x2 +#define DB_LOCK_RANDOM 0x3 +#define DB_LOCK_YOUNGEST 0x4 + +struct __db_env { + int db_lorder; /* Byte order. */ + + /* Error message callback. */ + void (*db_errcall) __P((const char *, char *)); + FILE *db_errfile; /* Error message file stream. */ + const char *db_errpfx; /* Error message prefix. */ + int db_verbose; /* Generate debugging messages. */ + + /* User paths. */ + char *db_home; /* Database home. */ + char *db_log_dir; /* Database log file directory. */ + char *db_tmp_dir; /* Database tmp file directory. */ + + char **db_data_dir; /* Database data file directories. */ + int data_cnt; /* Database data file slots. */ + int data_next; /* Next Database data file slot. */ + + /* Locking. */ + DB_LOCKTAB *lk_info; /* Return from lock_open(). */ + u_int8_t *lk_conflicts; /* Two dimensional conflict matrix. */ + int lk_modes; /* Number of lock modes in table. */ + unsigned int lk_max; /* Maximum number of locks. */ + u_int32_t lk_detect; /* Deadlock detect on every conflict. */ + int (*db_yield) __P((void)); /* Yield function for threads. */ + + /* Logging. */ + DB_LOG *lg_info; /* Return from log_open(). */ + u_int32_t lg_max; /* Maximum file size. */ + + /* Memory pool. */ + DB_MPOOL *mp_info; /* Return from memp_open(). */ + size_t mp_mmapsize; /* Maximum file size for mmap. */ + size_t mp_size; /* Bytes in the mpool cache. */ + + /* Transactions. */ + DB_TXNMGR *tx_info; /* Return from txn_open(). */ + unsigned int tx_max; /* Maximum number of transactions. */ + int (*tx_recover) /* Dispatch function for recovery. */ + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + + u_int32_t flags; /* Flags. */ +}; + +/******************************************************* + * Access methods. + *******************************************************/ +typedef enum { + DB_BTREE=1, /* B+tree. */ + DB_HASH, /* Extended Linear Hashing. */ + DB_RECNO, /* Fixed and variable-length records. */ + DB_UNKNOWN /* Figure it out on open. */ +} DBTYPE; + +#define DB_BTREEVERSION 6 /* Current btree version. */ +#define DB_BTREEOLDVER 6 /* Oldest btree version supported. */ +#define DB_BTREEMAGIC 0x053162 + +#define DB_HASHVERSION 5 /* Current hash version. */ +#define DB_HASHOLDVER 4 /* Oldest hash version supported. */ +#define DB_HASHMAGIC 0x061561 + +#define DB_LOGVERSION 2 /* Current log version. */ +#define DB_LOGOLDVER 2 /* Oldest log version supported. */ +#define DB_LOGMAGIC 0x040988 + +struct __db_info { + int db_lorder; /* Byte order. */ + size_t db_cachesize; /* Underlying cache size. */ + size_t db_pagesize; /* Underlying page size. */ + + /* Local heap allocation. */ + void *(*db_malloc) __P((size_t)); + + /* Btree access method. */ + int bt_maxkey; /* Maximum keys per page. */ + int bt_minkey; /* Minimum keys per page. */ + int (*bt_compare) /* Comparison function. */ + __P((const DBT *, const DBT *)); + size_t (*bt_prefix) /* Prefix function. */ + __P((const DBT *, const DBT *)); + + /* Hash access method. */ + unsigned int h_ffactor; /* Fill factor. */ + unsigned int h_nelem; /* Number of elements. */ + u_int32_t (*h_hash) /* Hash function. */ + __P((const void *, u_int32_t)); + + /* Recno access method. */ + int re_pad; /* Fixed-length padding byte. */ + int re_delim; /* Variable-length delimiting byte. */ + u_int32_t re_len; /* Length for fixed-length records. */ + char *re_source; /* Source file name. */ + +#define DB_DELIMITER 0x0001 /* Recno: re_delim set. */ +#define DB_DUP 0x0002 /* Btree, Hash: duplicate keys. */ +#define DB_FIXEDLEN 0x0004 /* Recno: fixed-length records. */ +#define DB_PAD 0x0008 /* Recno: re_pad set. */ +#define DB_RECNUM 0x0010 /* Btree: record numbers. */ +#define DB_RENUMBER 0x0020 /* Recno: renumber on insert/delete. */ +#define DB_SNAPSHOT 0x0040 /* Recno: snapshot the input. */ + u_int32_t flags; +}; + +/* + * DB access method and cursor operation codes. These are implemented as + * bit fields for future flexibility, but currently only a single one may + * be specified to any function. + */ +#define DB_AFTER 0x000001 /* c_put() */ +#define DB_APPEND 0x000002 /* put() */ +#define DB_BEFORE 0x000004 /* c_put() */ +#define DB_CHECKPOINT 0x000008 /* log_put(), log_get() */ +#define DB_CURRENT 0x000010 /* c_get(), c_put(), log_get() */ +#define DB_FIRST 0x000020 /* c_get(), log_get() */ +#define DB_FLUSH 0x000040 /* log_put() */ +#define DB_GET_RECNO 0x000080 /* c_get() */ +#define DB_KEYFIRST 0x000100 /* c_put() */ +#define DB_KEYLAST 0x000200 /* c_put() */ +#define DB_LAST 0x000400 /* c_get(), log_get() */ +#define DB_NEXT 0x000800 /* c_get(), log_get() */ +#define DB_NOOVERWRITE 0x001000 /* put() */ +#define DB_NOSYNC 0x002000 /* close() */ +#define DB_PREV 0x004000 /* c_get(), log_get() */ +#define DB_RECORDCOUNT 0x008000 /* stat() */ +#define DB_SET 0x010000 /* c_get(), log_get() */ +#define DB_SET_RANGE 0x020000 /* c_get() */ +#define DB_SET_RECNO 0x040000 /* get(), c_get() */ + +/* DB (user visible) error return codes. */ +#define DB_INCOMPLETE ( -1) /* Sync didn't finish. */ +#define DB_KEYEMPTY ( -2) /* The key/data pair was deleted or + was never created by the user. */ +#define DB_KEYEXIST ( -3) /* The key/data pair already exists. */ +#define DB_LOCK_DEADLOCK ( -4) /* Locker killed to resolve deadlock. */ +#define DB_LOCK_NOTGRANTED ( -5) /* Lock unavailable, no-wait set. */ +#define DB_LOCK_NOTHELD ( -6) /* Lock not held by locker. */ +#define DB_NOTFOUND ( -7) /* Key/data pair not found (EOF). */ + +/* DB (private) error return codes. */ +#define DB_DELETED ( -8) /* Recovery file marked deleted. */ +#define DB_NEEDSPLIT ( -9) /* Page needs to be split. */ +#define DB_REGISTERED (-10) /* Entry was previously registered. */ +#define DB_SWAPBYTES (-11) /* Database needs byte swapping. */ + +struct __db_ilock { /* Internal DB access method lock. */ + db_pgno_t pgno; /* Page being locked. */ + /* File id. */ + u_int8_t fileid[DB_FILE_ID_LEN]; +}; + +/* DB access method description structure. */ +struct __db { + void *mutex; /* Synchronization for free threading */ + DBTYPE type; /* DB access method. */ + DB_ENV *dbenv; /* DB_ENV structure. */ + DB_ENV *mp_dbenv; /* DB_ENV for local mpool creation. */ + + DB *master; /* Original DB created by db_open. */ + void *internal; /* Access method private. */ + + DB_MPOOL *mp; /* The access method's mpool. */ + DB_MPOOLFILE *mpf; /* The access method's mpool file. */ + + /* + * XXX + * Explicit representations of structures in queue.h. + * + * TAILQ_HEAD(curs_queue, __dbc); + */ + struct { + struct __dbc *tqh_first; + struct __dbc **tqh_last; + } curs_queue; + + /* + * XXX + * Explicit representations of structures in queue.h. + * + * LIST_HEAD(handleq, __db); + * LIST_ENTRY(__db); + */ + struct { + struct __db *lh_first; + } handleq; /* List of handles for this DB. */ + struct { + struct __db *le_next; + struct __db **le_prev; + } links; /* Links for the handle list. */ + + u_int32_t log_fileid; /* Logging file id. */ + + DB_TXN *txn; /* Current transaction. */ + u_int32_t locker; /* Default process' locker id. */ + DBT lock_dbt; /* DBT referencing lock. */ + struct __db_ilock lock; /* Lock. */ + + size_t pgsize; /* Logical page size of file. */ + + /* Local heap allocation. */ + void *(*db_malloc) __P((size_t)); + + /* Functions. */ + int (*close) __P((DB *, int)); + int (*cursor) __P((DB *, DB_TXN *, DBC **)); + int (*del) __P((DB *, DB_TXN *, DBT *, int)); + int (*fd) __P((DB *, int *)); + int (*get) __P((DB *, DB_TXN *, DBT *, DBT *, int)); + int (*put) __P((DB *, DB_TXN *, DBT *, DBT *, int)); + int (*stat) __P((DB *, void *, void *(*)(size_t), int)); + int (*sync) __P((DB *, int)); + +#define DB_AM_DUP 0x000001 /* DB_DUP (internal). */ +#define DB_AM_INMEM 0x000002 /* In-memory; no sync on close. */ +#define DB_AM_LOCKING 0x000004 /* Perform locking. */ +#define DB_AM_LOGGING 0x000008 /* Perform logging. */ +#define DB_AM_MLOCAL 0x000010 /* Database memory pool is local. */ +#define DB_AM_PGDEF 0x000020 /* Page size was defaulted. */ +#define DB_AM_RDONLY 0x000040 /* Database is readonly. */ +#define DB_AM_RECOVER 0x000080 /* In recovery (do not log or lock). */ +#define DB_AM_SWAP 0x000100 /* Pages need to be byte-swapped. */ +#define DB_AM_THREAD 0x000200 /* DB is multi-threaded. */ +#define DB_BT_RECNUM 0x000400 /* DB_RECNUM (internal) */ +#define DB_HS_DIRTYMETA 0x000800 /* Hash: Metadata page modified. */ +#define DB_RE_DELIMITER 0x001000 /* DB_DELIMITER (internal). */ +#define DB_RE_FIXEDLEN 0x002000 /* DB_FIXEDLEN (internal). */ +#define DB_RE_PAD 0x004000 /* DB_PAD (internal). */ +#define DB_RE_RENUMBER 0x008000 /* DB_RENUMBER (internal). */ +#define DB_RE_SNAPSHOT 0x010000 /* DB_SNAPSHOT (internal). */ + + u_int32_t flags; +}; + +/* Cursor description structure. */ +struct __dbc { + DB *dbp; /* Related DB access method. */ + DB_TXN *txn; /* Associated transaction. */ + + /* + * XXX + * Explicit representations of structures in queue.h. + * + * TAILQ_ENTRY(__dbc); + */ + struct { + struct __dbc *tqe_next; + struct __dbc **tqe_prev; + } links; + + void *internal; /* Access method private. */ + + int (*c_close) __P((DBC *)); + int (*c_del) __P((DBC *, int)); + int (*c_get) __P((DBC *, DBT *, DBT *, int)); + int (*c_put) __P((DBC *, DBT *, DBT *, int)); +}; + +/* Btree/recno statistics structure. */ +struct __db_bt_stat { + u_int32_t bt_flags; /* Open flags. */ + u_int32_t bt_maxkey; /* Maxkey value. */ + u_int32_t bt_minkey; /* Minkey value. */ + u_int32_t bt_re_len; /* Fixed-length record length. */ + u_int32_t bt_re_pad; /* Fixed-length record pad. */ + u_int32_t bt_pagesize; /* Page size. */ + u_int32_t bt_levels; /* Tree levels. */ + u_int32_t bt_nrecs; /* Number of records. */ + u_int32_t bt_int_pg; /* Internal pages. */ + u_int32_t bt_leaf_pg; /* Leaf pages. */ + u_int32_t bt_dup_pg; /* Duplicate pages. */ + u_int32_t bt_over_pg; /* Overflow pages. */ + u_int32_t bt_free; /* Pages on the free list. */ + u_int32_t bt_freed; /* Pages freed for reuse. */ + u_int32_t bt_int_pgfree; /* Bytes free in internal pages. */ + u_int32_t bt_leaf_pgfree; /* Bytes free in leaf pages. */ + u_int32_t bt_dup_pgfree; /* Bytes free in duplicate pages. */ + u_int32_t bt_over_pgfree; /* Bytes free in overflow pages. */ + u_int32_t bt_pfxsaved; /* Bytes saved by prefix compression. */ + u_int32_t bt_split; /* Total number of splits. */ + u_int32_t bt_rootsplit; /* Root page splits. */ + u_int32_t bt_fastsplit; /* Fast splits. */ + u_int32_t bt_added; /* Items added. */ + u_int32_t bt_deleted; /* Items deleted. */ + u_int32_t bt_get; /* Items retrieved. */ + u_int32_t bt_cache_hit; /* Hits in fast-insert code. */ + u_int32_t bt_cache_miss; /* Misses in fast-insert code. */ +}; + +#if defined(__cplusplus) +extern "C" { +#endif +int db_appinit __P((const char *, char * const *, DB_ENV *, int)); +int db_appexit __P((DB_ENV *)); +int db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **)); +char *db_version __P((int *, int *, int *)); +#if defined(__cplusplus) +}; +#endif + +/******************************************************* + * Locking + *******************************************************/ +#define DB_LOCKVERSION 1 +#define DB_LOCKMAGIC 0x090193 + +/* Flag values for lock_vec(). */ +#define DB_LOCK_NOWAIT 0x01 /* Don't wait on unavailable lock. */ + +/* Flag values for lock_detect(). */ +#define DB_LOCK_CONFLICT 0x01 /* Run on any conflict. */ + +/* Request types. */ +typedef enum { + DB_LOCK_DUMP, /* Display held locks. */ + DB_LOCK_GET, /* Get the lock. */ + DB_LOCK_PUT, /* Release the lock. */ + DB_LOCK_PUT_ALL, /* Release locker's locks. */ + DB_LOCK_PUT_OBJ /* Release locker's locks on obj. */ +} db_lockop_t; + +/* Simple R/W lock modes and for multi-granularity intention locking. */ +typedef enum { + DB_LOCK_NG=0, /* Not granted. */ + DB_LOCK_READ, /* Shared/read. */ + DB_LOCK_WRITE, /* Exclusive/write. */ + DB_LOCK_IREAD, /* Intent to share/read. */ + DB_LOCK_IWRITE, /* Intent exclusive/write. */ + DB_LOCK_IWR /* Intent to read and write. */ +} db_lockmode_t; + +/* Lock request structure. */ +struct __db_lockreq { + db_lockop_t op; /* Operation. */ + db_lockmode_t mode; /* Requested mode. */ + u_int32_t locker; /* Locker identity. */ + DBT *obj; /* Object being locked. */ + DB_LOCK lock; /* Lock returned. */ +}; + +/* + * Commonly used conflict matrices. + * + * Standard Read/Write (or exclusive/shared) locks. + */ +#define DB_LOCK_RW_N 3 +extern const u_int8_t db_rw_conflicts[]; + +/* Multi-granularity locking. */ +#define DB_LOCK_RIW_N 6 +extern const u_int8_t db_riw_conflicts[]; + +#if defined(__cplusplus) +extern "C" { +#endif +int lock_close __P((DB_LOCKTAB *)); +int lock_detect __P((DB_LOCKTAB *, int, u_int32_t)); +int lock_get __P((DB_LOCKTAB *, + u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *)); +int lock_id __P((DB_LOCKTAB *, u_int32_t *)); +int lock_open __P((const char *, int, int, DB_ENV *, DB_LOCKTAB **)); +int lock_put __P((DB_LOCKTAB *, DB_LOCK)); +int lock_unlink __P((const char *, int, DB_ENV *)); +int lock_vec __P((DB_LOCKTAB *, + u_int32_t, int, DB_LOCKREQ *, int, DB_LOCKREQ **)); +#if defined(__cplusplus) +}; +#endif + +/******************************************************* + * Logging. + *******************************************************/ +/* Flag values for log_archive(). */ +#define DB_ARCH_ABS 0x001 /* Absolute pathnames. */ +#define DB_ARCH_DATA 0x002 /* Data files. */ +#define DB_ARCH_LOG 0x004 /* Log files. */ + +/* + * A DB_LSN has two parts, a fileid which identifies a specific file, and an + * offset within that file. The fileid is an unsigned 4-byte quantity that + * uniquely identifies a file within the log directory -- currently a simple + * counter inside the log. The offset is also an unsigned 4-byte value. The + * log manager guarantees the offset is never more than 4 bytes by switching + * to a new log file before the maximum length imposed by an unsigned 4-byte + * offset is reached. + */ +struct __db_lsn { + u_int32_t file; /* File ID. */ + u_int32_t offset; /* File offset. */ +}; + +#if defined(__cplusplus) +extern "C" { +#endif +int log_archive __P((DB_LOG *, char **[], int, void *(*)(size_t))); +int log_close __P((DB_LOG *)); +int log_compare __P((const DB_LSN *, const DB_LSN *)); +int log_file __P((DB_LOG *, const DB_LSN *, char *, size_t)); +int log_flush __P((DB_LOG *, const DB_LSN *)); +int log_get __P((DB_LOG *, DB_LSN *, DBT *, int)); +int log_open __P((const char *, int, int, DB_ENV *, DB_LOG **)); +int log_put __P((DB_LOG *, DB_LSN *, const DBT *, int)); +int log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *)); +int log_unlink __P((const char *, int, DB_ENV *)); +int log_unregister __P((DB_LOG *, u_int32_t)); +#if defined(__cplusplus) +}; +#endif + +/******************************************************* + * Mpool + *******************************************************/ +/* Flag values for memp_fget(). */ +#define DB_MPOOL_CREATE 0x001 /* Create a page. */ +#define DB_MPOOL_LAST 0x002 /* Return the last page. */ +#define DB_MPOOL_NEW 0x004 /* Create a new page. */ + +/* Flag values for memp_fput(), memp_fset(). */ +#define DB_MPOOL_CLEAN 0x001 /* Clear modified bit. */ +#define DB_MPOOL_DIRTY 0x002 /* Page is modified. */ +#define DB_MPOOL_DISCARD 0x004 /* Don't cache the page. */ + +/* Mpool statistics structure. */ +struct __db_mpool_stat { + size_t st_cachesize; /* Cache size. */ + unsigned long st_cache_hit; /* Pages found in the cache. */ + unsigned long st_cache_miss; /* Pages not found in the cache. */ + unsigned long st_map; /* Pages from mapped files. */ + unsigned long st_page_create; /* Pages created in the cache. */ + unsigned long st_page_in; /* Pages read in. */ + unsigned long st_page_out; /* Pages written out. */ + unsigned long st_ro_evict; /* Read-only pages evicted. */ + unsigned long st_rw_evict; /* Read-write pages evicted. */ + unsigned long st_hash_buckets; /* Number of hash buckets. */ + unsigned long st_hash_searches; /* Total hash chain searches. */ + unsigned long st_hash_longest; /* Longest hash chain searched. */ + unsigned long st_hash_examined; /* Total hash entries searched. */ +}; + +/* Mpool file statistics structure. */ +struct __db_mpool_fstat { + char *file_name; /* File name. */ + size_t st_pagesize; /* Page size. */ + unsigned long st_cache_hit; /* Pages found in the cache. */ + unsigned long st_cache_miss; /* Pages not found in the cache. */ + unsigned long st_map; /* Pages from mapped files. */ + unsigned long st_page_create; /* Pages created in the cache. */ + unsigned long st_page_in; /* Pages read in. */ + unsigned long st_page_out; /* Pages written out. */ +}; + +#if defined(__cplusplus) +extern "C" { +#endif +int memp_close __P((DB_MPOOL *)); +int memp_fclose __P((DB_MPOOLFILE *)); +int memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, unsigned long, void *)); +int memp_fopen __P((DB_MPOOL *, const char *, + int, int, int, size_t, int, DBT *, u_int8_t *, DB_MPOOLFILE **)); +int memp_fput __P((DB_MPOOLFILE *, void *, unsigned long)); +int memp_fset __P((DB_MPOOLFILE *, void *, unsigned long)); +int memp_fsync __P((DB_MPOOLFILE *)); +int memp_open __P((const char *, int, int, DB_ENV *, DB_MPOOL **)); +int memp_register __P((DB_MPOOL *, int, + int (*)(db_pgno_t, void *, DBT *), + int (*)(db_pgno_t, void *, DBT *))); +int memp_stat __P((DB_MPOOL *, + DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, void *(*)(size_t))); +int memp_sync __P((DB_MPOOL *, DB_LSN *)); +int memp_unlink __P((const char *, int, DB_ENV *)); +#if defined(__cplusplus) +}; +#endif + +/******************************************************* + * Transactions. + *******************************************************/ +#define DB_TXNVERSION 1 +#define DB_TXNMAGIC 0x041593 + +/* Operations values to the tx_recover() function. */ +#define DB_TXN_BACKWARD_ROLL 1 /* Read the log backwards. */ +#define DB_TXN_FORWARD_ROLL 2 /* Read the log forwards. */ +#define DB_TXN_OPENFILES 3 /* Read for open files. */ +#define DB_TXN_REDO 4 /* Redo the operation. */ +#define DB_TXN_UNDO 5 /* Undo the operation. */ + +/* Internal transaction status values. */ + +/* Transaction statistics structure. */ +struct __db_txn_active { + u_int32_t txnid; /* Transaction ID */ + DB_LSN lsn; /* Lsn of the begin record */ +}; + +struct __db_txn_stat { + DB_LSN st_last_ckp; /* lsn of the last checkpoint */ + DB_LSN st_pending_ckp; /* last checkpoint did not finish */ + time_t st_time_ckp; /* time of last checkpoint */ + u_int32_t st_last_txnid; /* last transaction id given out */ + u_int32_t st_maxtxns; /* maximum number of active txns */ + u_int32_t st_naborts; /* number of aborted transactions */ + u_int32_t st_nbegins; /* number of begun transactions */ + u_int32_t st_ncommits; /* number of committed transactions */ + u_int32_t st_nactive; /* number of active transactions */ + DB_TXN_ACTIVE *st_txnarray; /* array of active transactions */ +}; + +#if defined(__cplusplus) +extern "C" { +#endif +int txn_abort __P((DB_TXN *)); +int txn_begin __P((DB_TXNMGR *, DB_TXN *, DB_TXN **)); +int txn_checkpoint __P((const DB_TXNMGR *, long, long)); +int txn_commit __P((DB_TXN *)); +int txn_close __P((DB_TXNMGR *)); +u_int32_t txn_id __P((DB_TXN *)); +int txn_open __P((const char *, int, int, DB_ENV *, DB_TXNMGR **)); +int txn_prepare __P((DB_TXN *)); +int txn_stat __P((DB_TXNMGR *, DB_TXN_STAT **, void *(*)(size_t))); +int txn_unlink __P((const char *, int, DB_ENV *)); +#if defined(__cplusplus) +}; +#endif + +#ifdef DB_DBM_HSEARCH +/******************************************************* + * Dbm/Ndbm historic interfaces. + *******************************************************/ +#define DBM_INSERT 0 /* Flags to dbm_store(). */ +#define DBM_REPLACE 1 + +/* + * The db(3) support for ndbm(3) always appends this suffix to the + * file name to avoid overwriting the user's original database. + */ +#define DBM_SUFFIX ".db" + +typedef struct { + char *dptr; + int dsize; +} datum; + +#if defined(__cplusplus) +extern "C" { +#endif +int dbminit __P((char *)); +#if !defined(__cplusplus) +int delete __P((datum)); +#endif +datum fetch __P((datum)); +datum firstkey __P((void)); +datum nextkey __P((datum)); +int store __P((datum, datum)); + +/* + * !!! + * Don't prototype: + * + * dbm_clearerr(DBM *db); + * dbm_dirfno(DBM *db); + * dbm_error(DBM *db); + * dbm_pagfno(DBM *db); + * dbm_rdonly(DBM *db); + * + * they weren't documented and were historically implemented as #define's. + */ +void dbm_close __P((DBM *)); +int dbm_delete __P((DBM *, datum)); +datum dbm_fetch __P((DBM *, datum)); +datum dbm_firstkey __P((DBM *)); +long dbm_forder __P((DBM *, datum)); +datum dbm_nextkey __P((DBM *)); +DBM *dbm_open __P((const char *, int, int)); +int dbm_store __P((DBM *, datum, datum, int)); +#if defined(__cplusplus) +}; +#endif + +/******************************************************* + * Hsearch historic interface. + *******************************************************/ +typedef enum { + FIND, ENTER +} ACTION; + +typedef struct entry { + char *key; + void *data; +} ENTRY; + +#if defined(__cplusplus) +extern "C" { +#endif +int hcreate __P((unsigned int)); +void hdestroy __P((void)); +ENTRY *hsearch __P((ENTRY, ACTION)); +#if defined(__cplusplus) +}; +#endif +#endif /* DB_DBM_HSEARCH */ + +/* + * XXX + * MacOS: Reset Metrowerks C enum sizes. + */ +#ifdef __MWERKS__ +#pragma enumsalwaysint reset +#endif +#endif /* !_DB_H_ */ diff --git a/db2/include/db_185.h.src b/db2/include/db_185.h.src new file mode 100644 index 0000000000..52fb3a0da1 --- /dev/null +++ b/db2/include/db_185.h.src @@ -0,0 +1,170 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)db_185.h.src 8.3 (Sleepycat) 7/27/97 + */ + +#ifndef _DB_185_H_ +#define _DB_185_H_ + +#include <sys/types.h> + +#include <limits.h> + +/* + * XXX + * Handle function prototypes and the keyword "const". This steps on name + * space that DB doesn't control, but all of the other solutions are worse. + */ +#undef __P +#if defined(__STDC__) || defined(__cplusplus) +#define __P(protos) protos /* ANSI C prototypes */ +#else +#define const +#define __P(protos) () /* K&R C preprocessor */ +#endif + +#define RET_ERROR -1 /* Return values. */ +#define RET_SUCCESS 0 +#define RET_SPECIAL 1 + +#ifndef __BIT_TYPES_DEFINED__ +#define __BIT_TYPES_DEFINED__ +@u_int8_decl@ +@int16_decl@ +@u_int16_decl@ +@int32_decl@ +@u_int32_decl@ +#endif + +#define MAX_PAGE_NUMBER 0xffffffff /* >= # of pages in a file */ +typedef u_int32_t pgno_t; +#define MAX_PAGE_OFFSET 65535 /* >= # of bytes in a page */ +typedef u_int16_t indx_t; +#define MAX_REC_NUMBER 0xffffffff /* >= # of records in a tree */ +typedef u_int32_t recno_t; + +/* Key/data structure -- a Data-Base Thang. */ +typedef struct { + void *data; /* data */ + size_t size; /* data length */ +} DBT; + +/* Routine flags. */ +#define R_CURSOR 1 /* del, put, seq */ +#define __R_UNUSED 2 /* UNUSED */ +#define R_FIRST 3 /* seq */ +#define R_IAFTER 4 /* put (RECNO) */ +#define R_IBEFORE 5 /* put (RECNO) */ +#define R_LAST 6 /* seq (BTREE, RECNO) */ +#define R_NEXT 7 /* seq */ +#define R_NOOVERWRITE 8 /* put */ +#define R_PREV 9 /* seq (BTREE, RECNO) */ +#define R_SETCURSOR 10 /* put (RECNO) */ +#define R_RECNOSYNC 11 /* sync (RECNO) */ + +typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; + +/* Access method description structure. */ +typedef struct __db { + DBTYPE type; /* Underlying db type. */ + int (*close) __P((struct __db *)); + int (*del) __P((const struct __db *, const DBT *, u_int)); + int (*get) __P((const struct __db *, const DBT *, DBT *, u_int)); + int (*put) __P((const struct __db *, DBT *, const DBT *, u_int)); + int (*seq) __P((const struct __db *, DBT *, DBT *, u_int)); + int (*sync) __P((const struct __db *, u_int)); + void *internal; /* Access method private. */ + int (*fd) __P((const struct __db *)); +} DB; + +#define BTREEMAGIC 0x053162 +#define BTREEVERSION 3 + +/* Structure used to pass parameters to the btree routines. */ +typedef struct { +#define R_DUP 0x01 /* duplicate keys */ + u_long flags; + u_int cachesize; /* bytes to cache */ + int maxkeypage; /* maximum keys per page */ + int minkeypage; /* minimum keys per page */ + u_int psize; /* page size */ + int (*compare) /* comparison function */ + __P((const DBT *, const DBT *)); + size_t (*prefix) /* prefix function */ + __P((const DBT *, const DBT *)); + int lorder; /* byte order */ +} BTREEINFO; + +#define HASHMAGIC 0x061561 +#define HASHVERSION 2 + +/* Structure used to pass parameters to the hashing routines. */ +typedef struct { + u_int bsize; /* bucket size */ + u_int ffactor; /* fill factor */ + u_int nelem; /* number of elements */ + u_int cachesize; /* bytes to cache */ + u_int32_t /* hash function */ + (*hash) __P((const void *, size_t)); + int lorder; /* byte order */ +} HASHINFO; + +/* Structure used to pass parameters to the record routines. */ +typedef struct { +#define R_FIXEDLEN 0x01 /* fixed-length records */ +#define R_NOKEY 0x02 /* key not required */ +#define R_SNAPSHOT 0x04 /* snapshot the input */ + u_long flags; + u_int cachesize; /* bytes to cache */ + u_int psize; /* page size */ + int lorder; /* byte order */ + size_t reclen; /* record length (fixed-length records) */ + u_char bval; /* delimiting byte (variable-length records */ + char *bfname; /* btree file name */ +} RECNOINFO; + +#if defined(__cplusplus) +extern "C" { +#endif +DB *dbopen __P((const char *, int, int, DBTYPE, const void *)); + +#if defined(__cplusplus) +}; +#endif +#endif /* !_DB_185_H_ */ diff --git a/db2/include/db_am.h b/db2/include/db_am.h new file mode 100644 index 0000000000..3289eececa --- /dev/null +++ b/db2/include/db_am.h @@ -0,0 +1,87 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + * + * @(#)db_am.h 10.5 (Sleepycat) 8/22/97 + */ +#ifndef _DB_AM_H +#define _DB_AM_H + +#define DB_ISBIG 0x01 +#define DB_ADD_DUP 0x10 +#define DB_REM_DUP 0x20 +#define DB_ADD_BIG 0x30 +#define DB_REM_BIG 0x40 +#define DB_SPLITOLD 0x50 +#define DB_SPLITNEW 0x60 + +/* + * Standard initialization and shutdown macros for all recovery functions. + * + * Requires the following local variables: + * + * DB *file_dbp, *mdbp; + * DB_MPOOLFILE *mpf; + * int ret; + */ +#define REC_INTRO(func) { \ + file_dbp = mdbp = NULL; \ + if ((ret = func(dbtp->data, &argp)) != 0) \ + goto out; \ + if (__db_fileid_to_db(logp, &mdbp, argp->fileid)) { \ + if (ret == DB_DELETED) \ + ret = 0; \ + goto out; \ + } \ + if (mdbp == NULL) \ + goto out; \ + if (F_ISSET(mdbp, DB_AM_THREAD)) { \ + if ((ret = __db_gethandle(mdbp, \ + mdbp->type == DB_HASH ? __ham_hdup : __bam_bdup, \ + &file_dbp)) != 0) \ + goto out; \ + } else \ + file_dbp = mdbp; \ + F_SET(file_dbp, DB_AM_RECOVER); \ + mpf = file_dbp->mpf; \ +} +#define REC_CLOSE { \ + if (argp != NULL) \ + free (argp); \ + if (file_dbp != NULL) { \ + F_CLR(file_dbp, DB_AM_RECOVER); \ + if (F_ISSET(file_dbp, DB_AM_THREAD)) \ + __db_puthandle(file_dbp); \ + } \ + return (ret); \ +} + +/* + * No-op versions of the same macros. + */ +#define REC_NOOP_INTRO(func) { \ + if ((ret = func(dbtp->data, &argp)) != 0) \ + return (ret); \ +} +#define REC_NOOP_CLOSE { \ + if (argp != NULL) \ + free (argp); \ + return (ret); \ +} + +/* + * Standard debugging macro for all recovery functions. + */ +#ifdef DEBUG_RECOVER +#define REC_PRINT(func) \ + (void)func(logp, dbtp, lsnp, redo, info); +#else +#define REC_PRINT(func) \ + info = info; /* XXX: Shut the compiler up. */ +#endif + +#include "db_auto.h" +#include "db_ext.h" +#endif diff --git a/db2/include/db_auto.h b/db2/include/db_auto.h new file mode 100644 index 0000000000..7478173740 --- /dev/null +++ b/db2/include/db_auto.h @@ -0,0 +1,118 @@ +/* Do not edit: automatically built by dist/db_gen.sh. */ +#ifndef db_AUTO_H +#define db_AUTO_H + +#define DB_db_addrem (DB_db_BEGIN + 1) + +typedef struct _db_addrem_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + u_int32_t fileid; + db_pgno_t pgno; + u_int32_t indx; + size_t nbytes; + DBT hdr; + DBT dbt; + DB_LSN pagelsn; +} __db_addrem_args; + + +#define DB_db_split (DB_db_BEGIN + 2) + +typedef struct _db_split_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + u_int32_t fileid; + db_pgno_t pgno; + DBT pageimage; + DB_LSN pagelsn; +} __db_split_args; + + +#define DB_db_big (DB_db_BEGIN + 3) + +typedef struct _db_big_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + u_int32_t fileid; + db_pgno_t pgno; + db_pgno_t prev_pgno; + db_pgno_t next_pgno; + DBT dbt; + DB_LSN pagelsn; + DB_LSN prevlsn; + DB_LSN nextlsn; +} __db_big_args; + + +#define DB_db_ovref (DB_db_BEGIN + 4) + +typedef struct _db_ovref_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; +} __db_ovref_args; + + +#define DB_db_relink (DB_db_BEGIN + 5) + +typedef struct _db_relink_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + db_pgno_t prev; + DB_LSN lsn_prev; + db_pgno_t next; + DB_LSN lsn_next; +} __db_relink_args; + + +#define DB_db_addpage (DB_db_BEGIN + 6) + +typedef struct _db_addpage_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + db_pgno_t nextpgno; + DB_LSN nextlsn; +} __db_addpage_args; + + +#define DB_db_debug (DB_db_BEGIN + 7) + +typedef struct _db_debug_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + DBT op; + u_int32_t fileid; + DBT key; + DBT data; + u_int32_t arg_flags; +} __db_debug_args; + + +#define DB_db_noop (DB_db_BEGIN + 8) + +typedef struct _db_noop_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; +} __db_noop_args; + +#endif diff --git a/db2/include/db_cxx.h b/db2/include/db_cxx.h new file mode 100644 index 0000000000..506aed845c --- /dev/null +++ b/db2/include/db_cxx.h @@ -0,0 +1,888 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997 + * Sleepycat Software. All rights reserved. + * + * @(#)db_cxx.h 10.7 (Sleepycat) 8/22/97 + */ + +#ifndef _DB_CXX_H_ +#define _DB_CXX_H_ + +// +// C++ assumptions: +// +// To ensure portability to many platforms, both new and old, we make +// few assumptions about the C++ compiler and library. For example, +// we do not expect STL, templates or namespaces to be available. The +// "newest" C++ feature used is exceptions, which are used liberally +// to transmit error information. Even the use of exceptions can be +// disabled at runtime, see setErrorModel(). +// +// C++ naming conventions: +// +// - All top level class names start with Db. +// - All class members start with lower case letter. +// - All private data members are suffixed with underscore. +// - Use underscores to divide names into multiple words. +// - Simple data accessors are named with get_ or set_ prefix. +// - All method names are taken from names of functions in the C +// layer of db (usually by dropping a prefix like "db_"). +// These methods have the same argument types and order, +// other than dropping the explicit arg that acts as "this". +// +// As a rule, each DbFoo object has exactly one underlying DB_FOO struct +// (defined in db.h) associated with it. In many cases, we inherit directly +// from the DB_FOO structure to make this relationship explicit. Often, +// the underlying C layer allocates and deallocates these structures, so +// there is no easy way to add any data to the DbFoo class. When you see +// a comment about whether data is permitted to be added, this is what +// is going on. Of course, if we need to add data to such C++ classes +// in the future, we will arrange to have an indirect pointer to the +// DB_FOO struct (as some of the classes already have). +// + + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Forward declarations +// + +#include "db.h" + +class Db; // forward +class Dbc; // forward +class DbEnv; // forward +class DbException; // forward +class DbInfo; // forward +class DbLock; // forward +class DbLockTab; // forward +class DbLog; // forward +class DbLsn; // forward +class DbMpool; // forward +class DbMpoolFile; // forward +class Dbt; // forward +class DbTxn; // forward +class DbTxnMgr; // forward + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Mechanisms for declaring classes +// + +// +// Every class defined in this file has an _exported next to the class name. +// This is needed for WinTel machines so that the class methods can +// be exported or imported in a DLL as appropriate. Users of the DLL +// use the define DB_USE_DLL. When the DLL is built, DB_CREATE_DLL +// must be defined. +// +#if defined(_MSC_VER) + +# if defined(DB_CREATE_DLL) +# define _exported __declspec(dllexport) // creator of dll +# elif defined(DB_USE_DLL) +# define _exported __declspec(dllimport) // user of dll +# else +# define _exported // static lib creator or user +# endif + +#else + +# define _exported + +#endif + +// DEFINE_DB_CLASS defines an imp_ data member and imp() accessor. +// The underlying type is a pointer to an opaque *Imp class, that +// gets converted to the correct implementation class by the implementation. +// +// Since these defines use "private/public" labels, and leave the access +// being "private", we always use these by convention before any data +// members in the private section of a class. Keeping them in the +// private section also emphasizes that they are off limits to user code. +// +#define DEFINE_DB_CLASS(name) \ + public: class name##Imp* imp() { return imp_; } \ + public: const class name##Imp* imp() const { return imp_; } \ + private: class name##Imp* imp_ + + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Turn off inappropriate compiler warnings +// + +#ifdef _MSC_VER + +// These are level 4 warnings that are explicitly disabled. +// With Visual C++, by default you do not see above level 3 unless +// you use /W4. But we like to compile with the highest level +// warnings to catch other errors. +// +// 4201: nameless struct/union +// triggered by standard include file <winnt.h> +// +// 4514: unreferenced inline function has been removed +// certain include files in MSVC define methods that are not called +// +#pragma warning(disable: 4201 4514) + +#endif + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Exception classes +// + +// Almost any error in the DB library throws a DbException. +// Every exception should be considered an abnormality +// (e.g. bug, misuse of DB, file system error). +// +// NOTE: We would like to inherit from class exception and +// let it handle what(), but there are +// MSVC++ problems when <exception> is included. +// +class _exported DbException +{ +public: + virtual ~DbException(); + DbException(int err); + DbException(const char *description); + DbException(const char *prefix, int err); + DbException(const char *prefix1, const char *prefix2, int err); + const int get_errno(); + virtual const char *what() const; + + DbException(const DbException &); + DbException &operator = (const DbException &); + +private: + char *what_; + int err_; // errno +}; + + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Lock classes +// + +class _exported DbLock +{ + friend DbLockTab; + +public: + DbLock(unsigned int); + DbLock(); + + unsigned int get_lock_id(); + void set_lock_id(unsigned int); + + int put(DbLockTab *locktab); + + DbLock(const DbLock &); + DbLock &operator = (const DbLock &); + +protected: + // We can add data to this class if needed + // since its contained class is not allocated by db. + // (see comment at top) + + DB_LOCK lock_; +}; + +class _exported DbLockTab +{ +friend DbEnv; +public: + int close(); + int detect(int atype, u_int32_t flags); + int get(u_int32_t locker, int flags, const Dbt *obj, + db_lockmode_t lock_mode, DbLock *lock); + int id(u_int32_t *idp); + int vec(u_int32_t locker, int flags, DB_LOCKREQ list[], + int nlist, DB_LOCKREQ **elistp); + + // Create or remove new locktab files + // + static int open(const char *dir, int flags, int mode, + DbEnv* dbenv, DbLockTab **regionp); + static int unlink(const char *dir, int force, DbEnv* dbenv); + +private: + // We can add data to this class if needed + // since it is implemented via a pointer. + // (see comment at top) + + // copying not allowed + // + DbLockTab(const DbLockTab &); + DbLockTab &operator = (const DbLockTab &); + + // Note: use DbLockTab::open() or DbEnv::get_lk_info() + // to get pointers to a DbLockTab, + // and call DbLockTab::close() rather than delete to release them. + // + DbLockTab(); + ~DbLockTab(); + + DEFINE_DB_CLASS(DbLockTab); +}; + + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Log classes +// + +class _exported DbLsn : protected DB_LSN +{ + friend DbLog; // friendship needed to cast to base class + friend DbMpool; +}; + +class _exported DbLog +{ +friend DbEnv; +public: + int archive(char **list[], int flags, void *(*db_malloc)(size_t)); + int close(); + static int compare(const DbLsn *lsn0, const DbLsn *lsn1); + int file(DbLsn *lsn, char *namep, int len); + int flush(const DbLsn *lsn); + int get(DbLsn *lsn, Dbt *data, int flags); + int put(DbLsn *lsn, const Dbt *data, int flags); + + // Normally these would be called register and unregister to + // parallel the C interface, but "register" is a reserved word. + // + int db_register(Db *dbp, const char *name, u_int32_t *fidp); + int db_unregister(u_int32_t fid); + + // Create or remove new log files + // + static int open(const char *dir, int flags, int mode, + DbEnv* dbenv, DbLog **regionp); + static int unlink(const char *dir, int force, DbEnv* dbenv); + +private: + // We can add data to this class if needed + // since it is implemented via a pointer. + // (see comment at top) + + // Note: use DbLog::open() or DbEnv::get_lg_info() + // to get pointers to a DbLog, + // and call DbLog::close() rather than delete to release them. + // + DbLog(); + ~DbLog(); + + // no copying + DbLog(const DbLog &); + operator = (const DbLog &); + + DEFINE_DB_CLASS(DbLog); +}; + + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Memory pool classes +// + +class _exported DbMpoolFile +{ +public: + int close(); + int get(db_pgno_t *pgnoaddr, unsigned long flags, void *pagep); + int put(void *pgaddr, unsigned long flags); + int set(void *pgaddr, unsigned long flags); + int sync(); + + static int open(DbMpool *mp, const char *file, + int ftype, int flags, int mode, + size_t pagesize, int lsn_offset, + Dbt *pgcookie, u_int8_t *uid, DbMpoolFile **mpf); + +private: + // We can add data to this class if needed + // since it is implemented via a pointer. + // (see comment at top) + + // Note: use DbMpoolFile::open() + // to get pointers to a DbMpoolFile, + // and call DbMpoolFile::close() rather than delete to release them. + // + DbMpoolFile(); + + // Shut g++ up. +protected: + ~DbMpoolFile(); + +private: + // no copying + DbMpoolFile(const DbMpoolFile &); + operator = (const DbMpoolFile &); + + DEFINE_DB_CLASS(DbMpoolFile); +}; + +class _exported DbMpool +{ +friend DbEnv; +public: + int close(); + + // access to low level interface + // Normally this would be called register to parallel + // the C interface, but "register" is a reserved word. + // + int db_register(int ftype, + int (*pgin)(db_pgno_t pgno, void *pgaddr, DBT *pgcookie), + int (*pgout)(db_pgno_t pgno, void *pgaddr, DBT *pgcookie)); + + int stat(DB_MPOOL_STAT **gsp, DB_MPOOL_FSTAT ***fsp, + void *(*db_malloc)(size_t)); + int sync(DbLsn *lsn); + + // Create or remove new mpool files + // + static int open(const char *dir, int flags, int mode, + DbEnv* dbenv, DbMpool **regionp); + static int unlink(const char *dir, int force, DbEnv* dbenv); + +private: + // We can add data to this class if needed + // since it is implemented via a pointer. + // (see comment at top) + + // Note: use DbMpool::open() or DbEnv::get_mp_info() + // to get pointers to a DbMpool, + // and call DbMpool::close() rather than delete to release them. + // + DbMpool(); + ~DbMpool(); + + // no copying + DbMpool(const DbMpool &); + DbMpool &operator = (const DbMpool &); + + DEFINE_DB_CLASS(DbMpool); +}; + + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Transaction classes +// + +class _exported DbTxnMgr +{ +friend DbEnv; +public: + int begin(DbTxn *pid, DbTxn **tid); + int checkpoint(long kbyte, long min) const; + int close(); + int stat(DB_TXN_STAT **statp, void *(*db_malloc)(size_t)); + + // Create or remove new txnmgr files + // + static int open(const char *dir, int flags, int mode, + DbEnv* dbenv, DbTxnMgr **regionp); + static int unlink(const char *dir, int force, DbEnv* dbenv); + +private: + // We can add data to this class if needed + // since it is implemented via a pointer. + // (see comment at top) + + // Note: use DbTxnMgr::open() or DbEnv::get_tx_info() + // to get pointers to a DbTxnMgr, + // and call DbTxnMgr::close() rather than delete to release them. + // + DbTxnMgr(); + ~DbTxnMgr(); + + // no copying + DbTxnMgr(const DbTxnMgr &); + operator = (const DbTxnMgr &); + + DEFINE_DB_CLASS(DbTxnMgr); +}; + +class _exported DbTxn +{ +friend DbTxnMgr; +public: + int abort(); + int commit(); + u_int32_t id(); + int prepare(); + +private: + // We can add data to this class if needed + // since it is implemented via a pointer. + // (see comment at top) + + // Note: use DbTxnMgr::begin() to get pointers to a DbTxn, + // and call DbTxn::abort() or DbTxn::commit rather than + // delete to release them. + // + DbTxn(); + ~DbTxn(); + + // no copying + DbTxn(const DbTxn &); + operator = (const DbTxn &); + + DEFINE_DB_CLASS(DbTxn); +}; + + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Application classes +// + +// +// A set of application options - define how this application uses +// the db library. +// +class _exported DbInfo : protected DB_INFO +{ + friend DbEnv; + friend Db; + +public: + DbInfo(); + ~DbInfo(); + + // Byte order. + int get_lorder() const; + void set_lorder(int); + + // Underlying cache size. + size_t get_cachesize() const; + void set_cachesize(size_t); + + // Underlying page size. + size_t get_pagesize() const; + void set_pagesize(size_t); + + // Local heap allocation. + typedef void *(*db_malloc_fcn)(size_t); + db_malloc_fcn get_malloc() const; + void set_malloc(db_malloc_fcn); + + //////////////////////////////////////////////////////////////// + // Btree access method. + + // Maximum keys per page. + int get_bt_maxkey() const; + void set_bt_maxkey(int); + + // Minimum keys per page. + int get_bt_minkey() const; + void set_bt_minkey(int); + + // Comparison function. + typedef int (*bt_compare_fcn)(const DBT *, const DBT *); + bt_compare_fcn get_bt_compare() const; + void set_bt_compare(bt_compare_fcn); + + // Prefix function. + typedef size_t (*bt_prefix_fcn)(const DBT *, const DBT *); + bt_prefix_fcn get_bt_prefix() const; + void set_bt_prefix(bt_prefix_fcn); + + //////////////////////////////////////////////////////////////// + // Hash access method. + + // Fill factor. + unsigned int get_h_ffactor() const; + void set_h_ffactor(unsigned int); + + // Number of elements. + unsigned int get_h_nelem() const; + void set_h_nelem(unsigned int); + + // Hash function. + typedef u_int32_t (*h_hash_fcn)(const void *, u_int32_t); + h_hash_fcn get_h_hash() const; + void set_h_hash(h_hash_fcn); + + //////////////////////////////////////////////////////////////// + // Recno access method. + + // Fixed-length padding byte. + int get_re_pad() const; + void set_re_pad(int); + + // Variable-length delimiting byte. + int get_re_delim() const; + void set_re_delim(int); + + // Length for fixed-length records. + u_int32_t get_re_len() const; + void set_re_len(u_int32_t); + + // Source file name. + char *get_re_source() const; + void set_re_source(char *); + + // Note: some flags are set as side effects of calling + // above "set" methods. + // + u_int32_t get_flags() const; + void set_flags(u_int32_t); + + + // (deep) copying of this object is allowed. + // + DbInfo(const DbInfo &); + DbInfo &operator = (const DbInfo &); + +private: + // We can add data to this class if needed + // since parent class is not allocated by db. + // (see comment at top) +}; + +// +// Base application class. Provides functions for opening a database. +// User of this library can use this class as a starting point for +// developing a DB application - derive their application class from +// this one, add application control logic. +// +// Note that if you use the default constructor, you must explicitly +// call appinit() before any other db activity (e.g. opening files) +// +class _exported DbEnv : protected DB_ENV +{ +friend DbTxnMgr; +friend DbLog; +friend DbLockTab; +friend DbMpool; +friend Db; + +public: + + ~DbEnv(); + + // This constructor can be used to immediately initialize the + // application with these arguments. Do not use it if you + // need to set other parameters via the access methods. + // + DbEnv(const char *homeDir, char *const *db_config, int flags); + + // Use this constructor if you wish to *delay* the initialization + // of the db library. This is useful if you need to set + // any particular parameters via the access methods below. + // Then call appinit() to complete the initialization. + // + DbEnv(); + + // Used in conjunction with the default constructor to + // complete the initialization of the db library. + // + int appinit(const char *homeDir, char *const *db_config, int flags); + + //////////////////////////////////////////////////////////////// + // simple get/set access methods + // + // If you are calling set_ methods, you need to + // use the default constructor along with appinit(). + + // Byte order. + int get_lorder() const; + void set_lorder(int); + + // Error message callback. + typedef void (*db_errcall_fcn)(const char *, char *); + db_errcall_fcn get_errcall() const; + void set_errcall(db_errcall_fcn); + + // Error message file stream. + FILE *get_errfile() const; + void set_errfile(FILE *); + + // Error message prefix. + const char *get_errpfx() const; + void set_errpfx(const char *); + + // Generate debugging messages. + int get_verbose() const; + void set_verbose(int); + + //////////////////////////////////////////////////////////////// + // User paths. + + // Database home. + char *get_home() const; + void set_home(char *); + + // Database log file directory. + char *get_log_dir() const; + void set_log_dir(char *); + + // Database tmp file directory. + char *get_tmp_dir() const; + void set_tmp_dir(char *); + + // Database data file directories. + char **get_data_dir() const; + void set_data_dir(char **); + + // Database data file slots. + int get_data_cnt() const; + void set_data_cnt(int); + + // Next Database data file slot. + int get_data_next() const; + void set_data_next(int); + + + //////////////////////////////////////////////////////////////// + // Locking. + + // Return from lock_open(). + DbLockTab *get_lk_info() const; + + // Two dimensional conflict matrix. + u_int8_t *get_lk_conflicts() const; + void set_lk_conflicts(u_int8_t *); + + // Number of lock modes in table. + int get_lk_modes() const; + void set_lk_modes(int); + + // Maximum number of locks. + unsigned int get_lk_max() const; + void set_lk_max(unsigned int); + + // Deadlock detect on every conflict. + u_int32_t get_lk_detect() const; + void set_lk_detect(u_int32_t); + + // Yield function for threads. + typedef int (*db_yield_fcn) (void); + db_yield_fcn get_yield() const; + void set_yield(db_yield_fcn); + + + //////////////////////////////////////////////////////////////// + // Logging. + + // Return from log_open(). + DbLog *get_lg_info() const; + + // Maximum file size. + u_int32_t get_lg_max() const; + void set_lg_max(u_int32_t); + + + //////////////////////////////////////////////////////////////// + // Memory pool. + + // Return from memp_open(). + DbMpool *get_mp_info() const; + + // Maximum file size for mmap. + size_t get_mp_mmapsize() const; + void set_mp_mmapsize(size_t); + + // Bytes in the mpool cache. + size_t get_mp_size() const; + void set_mp_size(size_t); + + + //////////////////////////////////////////////////////////////// + // Transactions. + + // Return from txn_open(). + DbTxnMgr *get_tx_info() const; + + // Maximum number of transactions. + unsigned int get_tx_max() const; + void set_tx_max(unsigned int); + + // Dispatch function for recovery. + typedef int (*tx_recover_fcn)(DB_LOG *, DBT *, DB_LSN *, int, void *); + tx_recover_fcn get_tx_recover() const; + void set_tx_recover(tx_recover_fcn); + + // Flags. + u_int32_t get_flags() const; + void set_flags(u_int32_t); + + //////////////////////////////////////////////////////////////// + // The default error model is to throw an exception whenever + // an error occurs. This generally allows for cleaner logic + // for transaction processing, as a try block can surround a + // single transaction. Alternatively, since almost every method + // returns an error code (errno), the error model can be set to + // not throw exceptions, and instead return the appropriate code. + // + enum ErrorModel { Exception, ErrorReturn }; + void set_error_model(ErrorModel); + ErrorModel get_error_model() const; + + // If an error is detected and the error call function + // or stream is set, a message is dispatched or printed. + // If a prefix is set, each message is prefixed. + // + // You can use set_errcall() or set_errfile() above to control + // error functionality using a C model. Alternatively, you can + // call set_error_stream() to force all errors to a C++ stream. + // It is unwise to mix these approaches. + // + class ostream* get_error_stream() const; + void set_error_stream(class ostream*); + + // used internally + static int runtime_error(const char *caller, int err, int in_destructor = 0); + +private: + // We can add data to this class if needed + // since parent class is not allocated by db. + // (see comment at top) + + // no copying + DbEnv(const DbEnv &); + operator = (const DbEnv &); + + ErrorModel error_model_; + static void stream_error_function(const char *, char *); + static ostream *error_stream_; +}; + +//////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// +// Table access classes +// + +// +// Represents a database table = a set of keys with associated values. +// +class _exported Db +{ + friend DbEnv; + +public: + int close(int flags); + int cursor(DbTxn *txnid, Dbc **cursorp); + int del(Dbt *key, DbTxn *txnid); + int fd(int *fdp); + int get(DbTxn *txnid, Dbt *key, Dbt *data, int flags); + int put(DbTxn *txnid, Dbt *key, Dbt *data, int flags); + int stat(void *sp, void *(*db_malloc)(size_t), int flags); + int sync(int flags); + + DBTYPE get_type() const; + + static int open(const char *fname, DBTYPE type, int flags, + int mode, DbEnv *dbenv, DbInfo *info, Db **dbpp); + +private: + // We can add data to this class if needed + // since it is implemented via a pointer. + // (see comment at top) + + // Note: use Db::open() to get initialize pointers to a Db, + // and call Db::close() rather than delete to release them. + Db(); + ~Db(); + + // no copying + Db(const Db &); + Db &operator = (const Db &); + + DEFINE_DB_CLASS(Db); +}; + +// +// A chunk of data, maybe a key or value. +// +class _exported Dbt : private DBT +{ + friend Dbc; + friend Db; + friend DbLog; + friend DbMpoolFile; + friend DbLockTab; + +public: + + // key/data + void *get_data() const; + void set_data(void *); + + // key/data length + u_int32_t get_size() const; + void set_size(u_int32_t); + + // RO: length of user buffer. + u_int32_t get_ulen() const; + void set_ulen(u_int32_t); + + // RO: get/put record length. + u_int32_t get_dlen() const; + void set_dlen(u_int32_t); + + // RO: get/put record offset. + u_int32_t get_doff() const; + void set_doff(u_int32_t); + + // flags + u_int32_t get_flags() const; + void set_flags(u_int32_t); + + Dbt(void *data, size_t size); + Dbt(); + ~Dbt(); + Dbt(const Dbt &); + Dbt &operator = (const Dbt &); + +private: + // We can add data to this class if needed + // since parent class is not allocated by db. + // (see comment at top) +}; + +class _exported Dbc : protected DBC +{ + friend Db; + +public: + int close(); + int del(int flags); + int get(Dbt* key, Dbt *data, int flags); + int put(Dbt* key, Dbt *data, int flags); + +private: + // No data is permitted in this class (see comment at top) + + // Note: use Db::cursor() to get pointers to a Dbc, + // and call Dbc::close() rather than delete to release them. + // + Dbc(); + ~Dbc(); + + // no copying + Dbc(const Dbc &); + Dbc &operator = (const Dbc &); +}; + +#endif /* !_DB_CXX_H_ */ diff --git a/db2/include/db_dispatch.h b/db2/include/db_dispatch.h new file mode 100644 index 0000000000..b93ec39b54 --- /dev/null +++ b/db2/include/db_dispatch.h @@ -0,0 +1,73 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)db_dispatch.h 10.1 (Sleepycat) 4/12/97 + */ + +#ifndef _DB_DISPATCH_H +#define _DB_DISPATCH_H + +/* + * Declarations and typedefs for the list of transaction IDs used during + * recovery. + */ + +typedef struct __db_txnhead { + LIST_HEAD(__db_headlink, _db_txnlist) head; + u_int32_t maxid; +} __db_txnhead; + +typedef struct _db_txnlist { + LIST_ENTRY(_db_txnlist) links; + u_int32_t txnid; +} __db_txnlist; + +#define DB_log_BEGIN 0 +#define DB_txn_BEGIN 5 +#define DB_ham_BEGIN 20 +#define DB_db_BEGIN 40 +#define DB_bam_BEGIN 50 +#define DB_ram_BEGIN 100 +#define DB_user_BEGIN 150 + +#define TXN_UNDO 0 +#define TXN_REDO 1 +#define TXN_BACKWARD_ROLL -1 +#define TXN_FORWARD_ROLL -2 +#define TXN_OPENFILES -3 +#endif diff --git a/db2/include/db_ext.h b/db2/include/db_ext.h new file mode 100644 index 0000000000..1cccb47617 --- /dev/null +++ b/db2/include/db_ext.h @@ -0,0 +1,114 @@ +/* Do not edit: automatically built by dist/distrib. */ +int __db_pgerr __P((DB *, db_pgno_t)); +int __db_pgfmt __P((DB *, db_pgno_t)); +int __db_addrem_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, u_int32_t, db_pgno_t, u_int32_t, + size_t, DBT *, DBT *, DB_LSN *)); +int __db_addrem_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_addrem_read __P((void *, __db_addrem_args **)); +int __db_split_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, u_int32_t, db_pgno_t, DBT *, + DB_LSN *)); +int __db_split_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_split_read __P((void *, __db_split_args **)); +int __db_big_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, u_int32_t, db_pgno_t, db_pgno_t, + db_pgno_t, DBT *, DB_LSN *, DB_LSN *, + DB_LSN *)); +int __db_big_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_big_read __P((void *, __db_big_args **)); +int __db_ovref_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, DB_LSN *)); +int __db_ovref_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_ovref_read __P((void *, __db_ovref_args **)); +int __db_relink_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, + DB_LSN *, db_pgno_t, DB_LSN *)); +int __db_relink_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_relink_read __P((void *, __db_relink_args **)); +int __db_addpage_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, + DB_LSN *)); +int __db_addpage_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_addpage_read __P((void *, __db_addpage_args **)); +int __db_debug_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + DBT *, u_int32_t, DBT *, DBT *, + u_int32_t)); +int __db_debug_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_debug_read __P((void *, __db_debug_args **)); +int __db_noop_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t)); +int __db_noop_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_noop_read __P((void *, __db_noop_args **)); +int __db_init_print __P((DB_ENV *)); +int __db_init_recover __P((DB_ENV *)); +int __db_pgin __P((db_pgno_t, void *)); +int __db_pgout __P((db_pgno_t, void *)); +int __db_dispatch __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_add_recovery __P((DB_ENV *, + int (*)(DB_LOG *, DBT *, DB_LSN *, int, void *), u_int32_t)); +int __db_txnlist_init __P((void *)); +int __db_txnlist_add __P((void *, u_int32_t)); +int __db_txnlist_find __P((void *, u_int32_t)); +int __db_dput __P((DB *, + DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **))); +int __db_drem __P((DB *, + PAGE **, u_int32_t, int (*)(DB *, PAGE *))); +int __db_dend __P((DB *, db_pgno_t, PAGE **)); + int __db_ditem __P((DB *, PAGE *, int, u_int32_t)); +int __db_pitem + __P((DB *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); +int __db_relink __P((DB *, PAGE *, PAGE **, int)); +int __db_ddup __P((DB *, db_pgno_t, int (*)(DB *, PAGE *))); +int __db_goff __P((DB *, DBT *, + u_int32_t, db_pgno_t, void **, u_int32_t *)); +int __db_poff __P((DB *, const DBT *, db_pgno_t *, + int (*)(DB *, u_int32_t, PAGE **))); +int __db_ioff __P((DB *, db_pgno_t)); +int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *))); +int __db_moff __P((DB *, const DBT *, db_pgno_t)); +void __db_loadme __P((void)); +FILE *__db_prinit __P((FILE *)); +int __db_dump __P((DB *, char *, int)); +int __db_prdb __P((DB *)); +int __db_prbtree __P((DB *)); +int __db_prhash __P((DB *)); +int __db_prtree __P((DB_MPOOLFILE *, int)); +int __db_prnpage __P((DB_MPOOLFILE *, db_pgno_t)); +int __db_prpage __P((PAGE *, int)); +int __db_isbad __P((PAGE *, int)); +void __db_pr __P((u_int8_t *, u_int32_t)); +void __db_prflags __P((u_int32_t, const FN *)); +int __db_addrem_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_split_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_big_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_ovref_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_relink_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_addpage_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_debug_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_noop_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_ret __P((DB *, + PAGE *, u_int32_t, DBT *, void **, u_int32_t *)); +int __db_retcopy __P((DBT *, + void *, u_int32_t, void **, u_int32_t *, void *(*)(size_t))); +int __db_gethandle __P((DB *, int (*)(DB *, DB *), DB **)); +int __db_puthandle __P((DB *)); diff --git a/db2/include/db_int.h.src b/db2/include/db_int.h.src new file mode 100644 index 0000000000..b60e5002e5 --- /dev/null +++ b/db2/include/db_int.h.src @@ -0,0 +1,332 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + * + * @(#)db_int.h.src 10.28 (Sleepycat) 8/20/97 + */ + +#ifndef _DB_INTERNAL_H_ +#define _DB_INTERNAL_H_ + +#include "db.h" /* Standard DB include file. */ +#include "queue.h" +#include "os_ext.h" + +/******************************************************* + * General purpose constants and macros. + *******************************************************/ +#define UINT32_T_MAX 0xffffffff /* Maximum 32 bit unsigned. */ +#define UINT16_T_MAX 0xffff /* Maximum 16 bit unsigned. */ + +#define DB_MIN_PGSIZE 0x000200 /* Minimum page size. */ +#define DB_MAX_PGSIZE 0x010000 /* Maximum page size. */ + +#define DB_MINCACHE 10 /* Minimum cached pages */ + +/* + * Aligning items to particular sizes or in pages or memory. ALIGNP is a + * separate macro, as we've had to cast the pointer to different integral + * types on different architectures. + * + * We cast pointers into unsigned longs when manipulating them because C89 + * guarantees that u_long is the largest available integral type and further, + * to never generate overflows. However, neither C89 or C9X requires that + * any integer type be large enough to hold a pointer, although C9X created + * the intptr_t type, which is guaranteed to hold a pointer but may or may + * not exist. At some point in the future, we should test for intptr_t and + * use it where available. + */ +#undef ALIGNTYPE +#define ALIGNTYPE u_long +#undef ALIGNP +#define ALIGNP(value, bound) ALIGN((ALIGNTYPE)value, bound) +#undef ALIGN +#define ALIGN(value, bound) (((value) + (bound) - 1) & ~((bound) - 1)) + +/* + * There are several on-page structures that are declared to have a number of + * fields followed by a variable length array of items. The structure size + * without including the variable length array or the address of the first of + * those elements can be found using SSZ. + * + * This macro can also be used to find the offset of a structure element in a + * structure. This is used in various places to copy structure elements from + * unaligned memory references, e.g., pointers into a packed page. + * + * There are two versions because compilers object if you take the address of + * an array. + */ +#undef SSZ +#define SSZ(name, field) ((int)&(((name *)0)->field)) + +#undef SSZA +#define SSZA(name, field) ((int)&(((name *)0)->field[0])) + +/* Free and free-string macros that overwrite memory during debugging. */ +#ifdef DEBUG +#undef FREE +#define FREE(p, len) { \ + memset(p, 0xff, len); \ + free(p); \ +} +#undef FREES +#define FREES(p) { \ + FREE(p, strlen(p)); \ +} +#else +#undef FREE +#define FREE(p, len) { \ + free(p); \ +} +#undef FREES +#define FREES(p) { \ + free(p); \ +} +#endif + +/* Structure used to print flag values. */ +typedef struct __fn { + u_int32_t mask; /* Flag value. */ + char *name; /* Flag name. */ +} FN; + +/* Set, clear and test flags. */ +#define F_SET(p, f) (p)->flags |= (f) +#define F_CLR(p, f) (p)->flags &= ~(f) +#define F_ISSET(p, f) ((p)->flags & (f)) +#define LF_SET(f) (flags |= (f)) +#define LF_CLR(f) (flags &= ~(f)) +#define LF_ISSET(f) (flags & (f)) + +/* Display separator string. */ +#undef DB_LINE +#define DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" + +/******************************************************* + * Files. + *******************************************************/ +#ifndef MAXPATHLEN /* Maximum path length. */ +#ifdef PATH_MAX +#define MAXPATHLEN PATH_MAX +#else +#define MAXPATHLEN 1024 +#endif +#endif + +#define PATH_DOT "." /* Current working directory. */ +#define PATH_SEPARATOR "/" /* Path separator character. */ + +#ifndef S_IRUSR /* UNIX specific file permissions. */ +#define S_IRUSR 0000400 /* R for owner */ +#define S_IWUSR 0000200 /* W for owner */ +#define S_IRGRP 0000040 /* R for group */ +#define S_IWGRP 0000020 /* W for group */ +#define S_IROTH 0000004 /* R for other */ +#define S_IWOTH 0000002 /* W for other */ +#endif + +#ifndef S_ISDIR /* UNIX specific: directory test. */ +#define S_ISDIR(m) ((m & 0170000) == 0040000) +#endif + +/******************************************************* + * Mutex support. + *******************************************************/ +@spin_line1@ +@spin_line2@ +@spin_line3@ + +/* + * !!! + * Various systems require different alignments for mutexes (the worst we've + * seen so far is 16-bytes on some HP architectures). The mutex (tsl_t) must + * be first in the db_mutex_t structure, which must itself be first in the + * region. This ensures the alignment is as returned by mmap(2), which should + * be sufficient. All other mutex users must ensure proper alignment locally. + */ +#define MUTEX_ALIGNMENT @mutex_align@ + +/* + * The offset of a mutex in memory. + */ +#define MUTEX_LOCK_OFFSET(a, b) ((off_t)((u_int8_t *)b - (u_int8_t *)a)) + +typedef struct _db_mutex_t { +#ifdef HAVE_SPINLOCKS + tsl_t tsl_resource; /* Resource test and set. */ +#ifdef DEBUG + u_long pid; /* Lock holder: 0 or process pid. */ +#endif +#else + off_t off; /* Backing file offset. */ + u_long pid; /* Lock holder: 0 or process pid. */ +#endif +#ifdef MUTEX_STATISTICS + u_long mutex_set_wait; /* Blocking mutex: required waiting. */ + u_long mutex_set_nowait; /* Blocking mutex: without waiting. */ +#endif +} db_mutex_t; + +#include "mutex_ext.h" + +/******************************************************* + * Access methods. + *******************************************************/ +/* Lock/unlock a DB thread. */ +#define DB_THREAD_LOCK(dbp) \ + (F_ISSET(dbp, DB_AM_THREAD) ? \ + __db_mutex_lock((db_mutex_t *)(dbp)->mutex, -1, \ + (dbp)->dbenv == NULL ? NULL : (dbp)->dbenv->db_yield) : 0) +#define DB_THREAD_UNLOCK(dbp) \ + (F_ISSET(dbp, DB_AM_THREAD) ? \ + __db_mutex_unlock((db_mutex_t *)(dbp)->mutex, -1) : 0) + +/* Btree/recno local statistics structure. */ +struct __db_bt_lstat; typedef struct __db_bt_lstat DB_BTREE_LSTAT; +struct __db_bt_lstat { + u_int32_t bt_freed; /* Pages freed for reuse. */ + u_int32_t bt_pfxsaved; /* Bytes saved by prefix compression. */ + u_int32_t bt_split; /* Total number of splits. */ + u_int32_t bt_rootsplit; /* Root page splits. */ + u_int32_t bt_fastsplit; /* Fast splits. */ + u_int32_t bt_added; /* Items added. */ + u_int32_t bt_deleted; /* Items deleted. */ + u_int32_t bt_get; /* Items retrieved. */ + u_int32_t bt_cache_hit; /* Hits in fast-insert code. */ + u_int32_t bt_cache_miss; /* Misses in fast-insert code. */ +}; + +/******************************************************* + * Environment. + *******************************************************/ +/* Type passed to __db_appname(). */ +typedef enum { + DB_APP_NONE=0, /* No type (region). */ + DB_APP_DATA, /* Data file. */ + DB_APP_LOG, /* Log file. */ + DB_APP_TMP /* Temporary file. */ +} APPNAME; + +/******************************************************* + * Regions. + *******************************************************/ +/* + * The shared memory regions share an initial structure so that the general + * region code can handle races between the region being deleted and other + * processes waiting on the region mutex. + * + * !!! + * Note, the mutex must be the first entry in the region; see comment above. + */ +typedef struct _rlayout { + db_mutex_t lock; /* Region mutex. */ + u_int32_t refcnt; /* Region reference count. */ + size_t size; /* Region length. */ + int majver; /* Major version number. */ + int minver; /* Minor version number. */ + int patch; /* Patch version number. */ + +#define DB_R_DELETED 0x01 /* Region was deleted. */ + u_int32_t flags; +} RLAYOUT; + +/******************************************************* + * Mpool. + *******************************************************/ +/* + * File types for DB access methods. Negative numbers are reserved to DB. + */ +#define DB_FTYPE_BTREE -1 /* Btree. */ +#define DB_FTYPE_HASH -2 /* Hash. */ + +/* Structure used as the DB pgin/pgout pgcookie. */ +typedef struct __dbpginfo { + size_t db_pagesize; /* Underlying page size. */ + int needswap; /* If swapping required. */ +} DB_PGINFO; + +/******************************************************* + * Log. + *******************************************************/ +/* Initialize an LSN to 'zero'. */ +#define ZERO_LSN(LSN) { \ + (LSN).file = 0; \ + (LSN).offset = 0; \ +} + +/* Return 1 if LSN is a 'zero' lsn, otherwise return 0. */ +#define IS_ZERO_LSN(LSN) ((LSN).file == 0) + +/* Test if we need to log a change. */ +#define DB_LOGGING(dbp) \ + (F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER)) + +#ifdef DEBUG +/* + * Debugging macro to log operations. + * If DEBUG_WOP is defined, log operations that modify the database. + * If DEBUG_ROP is defined, log operations that read the database. + * + * D dbp + * T txn + * O operation (string) + * K key + * A data + * F flags + */ +#define LOG_OP(D, T, O, K, A, F) { \ + DB_LSN _lsn; \ + DBT _op; \ + if (DB_LOGGING((D))) { \ + memset(&_op, 0, sizeof(_op)); \ + _op.data = O; \ + _op.size = strlen(O) + 1; \ + (void)__db_debug_log((D)->dbenv->lg_info, \ + T, &_lsn, 0, &_op, (D)->log_fileid, K, A, F); \ + } \ +} +#ifdef DEBUG_ROP +#define DEBUG_LREAD(D, T, O, K, A, F) LOG_OP(D, T, O, K, A, F) +#else +#define DEBUG_LREAD(D, T, O, K, A, F) +#endif +#ifdef DEBUG_WOP +#define DEBUG_LWRITE(D, T, O, K, A, F) LOG_OP(D, T, O, K, A, F) +#else +#define DEBUG_LWRITE(D, T, O, K, A, F) +#endif +#else +#define DEBUG_LREAD(D, T, O, K, A, F) +#define DEBUG_LWRITE(D, T, O, K, A, F) +#endif /* DEBUG */ + +/******************************************************* + * Transactions and recovery. + *******************************************************/ +/* + * The locker id space is divided between the transaction manager and the lock + * manager. Lockid's start at 0 and go to MAX_LOCKER_ID. Txn Id's start at + * MAX_LOCKER_ID + 1 and go up to MAX_TXNID. + */ +#define MAX_LOCKER_ID 0x0fffffff +#define MAX_TXNID 0xffffffff + +/* + * Out of band value for a lock. The locks are returned to callers as offsets + * into the lock regions. Since the RLAYOUT structure begins all regions, an + * offset of 0 is guaranteed not to be a valid lock. + */ +#define LOCK_INVALID 0 + +/* The structure allocated for every transaction. */ +struct __db_txn { + DB_TXNMGR *mgrp; /* Pointer to transaction manager. */ + DB_TXN *parent; /* Pointer to transaction's parent. */ + DB_LSN last_lsn; /* Lsn of last log write. */ + u_int32_t txnid; /* Unique transaction id. */ + size_t off; /* Detail structure within region. */ + TAILQ_ENTRY(__db_txn) links; +}; +#endif /* !_DB_INTERNAL_H_ */ diff --git a/db2/include/db_page.h b/db2/include/db_page.h new file mode 100644 index 0000000000..9e78682c57 --- /dev/null +++ b/db2/include/db_page.h @@ -0,0 +1,535 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + * + * @(#)db_page.h 10.10 (Sleepycat) 8/18/97 + */ + +#ifndef _DB_PAGE_H_ +#define _DB_PAGE_H_ + +/* + * DB page formats. + * + * This implementation requires that values within the following structures + * NOT be padded -- note, ANSI C permits random padding within structures. + * If your compiler pads randomly you can just forget ever making DB run on + * your system. In addition, no data type can require larger alignment than + * its own size, e.g., a 4-byte data element may not require 8-byte alignment. + * + * Note that key/data lengths are often stored in db_indx_t's -- this is + * not accidental, nor does it limit the key/data size. If the key/data + * item fits on a page, it's guaranteed to be small enough to fit into a + * db_indx_t, and storing it in one saves space. + */ + +#define PGNO_METADATA 0 /* Metadata page number. */ +#define PGNO_INVALID 0 /* Metadata page number, therefore illegal. */ +#define PGNO_ROOT 1 /* Root is page #1. */ + +/************************************************************************ + BTREE METADATA PAGE LAYOUT + ************************************************************************/ + +/* + * Btree metadata page layout: + * + * +-----------------------------------+ + * | lsn | pgno | magic | + * +-----------------------------------+ + * | version | pagesize | free | + * +-----------------------------------+ + * | flags | unused ... | + * +-----------------------------------+ + */ +typedef struct _btmeta { + DB_LSN lsn; /* 00-07: LSN. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + u_int32_t magic; /* 12-15: Magic number. */ + u_int32_t version; /* 16-19: Version. */ + u_int32_t pagesize; /* 20-23: Pagesize. */ + u_int32_t maxkey; /* 24-27: Btree: Maxkey. */ + u_int32_t minkey; /* 28-31: Btree: Minkey. */ + u_int32_t free; /* 32-35: Free list page number. */ +#define BTM_DUP 0x001 /* Duplicates. */ +#define BTM_RECNO 0x002 /* Recno tree. */ +#define BTM_RECNUM 0x004 /* Btree: maintain record count. */ +#define BTM_FIXEDLEN 0x008 /* Recno: fixed length records. */ +#define BTM_RENUMBER 0x010 /* Recno: renumber on insert/delete. */ +#define BTM_MASK 0x01f + u_int32_t flags; /* 36-39: Flags. */ + u_int32_t re_len; /* 40-43: Recno: fixed-length record length. */ + u_int32_t re_pad; /* 44-47: Recno: fixed-length record pad. */ + /* 48-67: Unique file ID. */ + u_int8_t uid[DB_FILE_ID_LEN]; + + u_int32_t spare[13]; /* 68-123: Save some room for growth. */ + + DB_BTREE_LSTAT stat; /* 124-163: Statistics. */ +} BTMETA; + +/************************************************************************ + HASH METADATA PAGE LAYOUT + ************************************************************************/ + +/* + * Hash metadata page layout: + * + * +-----------------------------------+ + * | lsn | magic | version | + * +-----------------------------------+ + * | pagesize | ovfl_point| last_freed| + * +-----------------------------------+ + * | max_bucket| high_mask | low_mask | + * +-----------------------------------+ + * | ffactor | nelem | charkey | + * +-----------------------------------+ + * | spares[32]| flags | unused | + * +-----------------------------------+ + */ +/* Hash Table Information */ +typedef struct hashhdr { /* Disk resident portion */ + DB_LSN lsn; /* 00-07: LSN of the header page */ + db_pgno_t pgno; /* 08-11: Page number (btree compatibility). */ + u_int32_t magic; /* 12-15: Magic NO for hash tables */ + u_int32_t version; /* 16-19: Version ID */ + u_int32_t pagesize; /* 20-23: Bucket/Page Size */ + u_int32_t ovfl_point; /* 24-27: Overflow page allocation location */ + u_int32_t last_freed; /* 28-31: Last freed overflow page pgno */ + u_int32_t max_bucket; /* 32-35: ID of Maximum bucket in use */ + u_int32_t high_mask; /* 36-39: Modulo mask into table */ + u_int32_t low_mask; /* 40-43: Modulo mask into table lower half */ + u_int32_t ffactor; /* 44-47: Fill factor */ + u_int32_t nelem; /* 48-51: Number of keys in hash table */ + u_int32_t h_charkey; /* 52-55: Value of hash(CHARKEY) */ +#define DB_HASH_DUP 0x01 + u_int32_t flags; /* 56-59: Allow duplicates. */ +#define NCACHED 32 /* number of spare points */ + /* 60-187: Spare pages for overflow */ + u_int32_t spares[NCACHED]; + /* 188-207: Unique file ID. */ + u_int8_t uid[DB_FILE_ID_LEN]; + + /* + * Minimum page size is 256. + */ +} HASHHDR; + +/************************************************************************ + MAIN PAGE LAYOUT + ************************************************************************/ + +/* + * +-----------------------------------+ + * | lsn | pgno | prev pgno | + * +-----------------------------------+ + * | next pgno | entries | hf offset | + * +-----------------------------------+ + * | level | type | index | + * +-----------------------------------+ + * | index | free --> | + * +-----------+-----------------------+ + * | F R E E A R E A | + * +-----------------------------------+ + * | <-- free | item | + * +-----------------------------------+ + * | item | item | item | + * +-----------------------------------+ + * + * sizeof(PAGE) == 26 bytes, and the following indices are guaranteed to be + * two-byte aligned. + * + * For hash and btree leaf pages, index items are paired, e.g., inp[0] is the + * key for inp[1]'s data. All other types of pages only contain single items. + */ +typedef struct _db_page { + DB_LSN lsn; /* 00-07: Log sequence number. */ + db_pgno_t pgno; /* 08-11: Current page number. */ + db_pgno_t prev_pgno; /* 12-15: Previous page number. */ + db_pgno_t next_pgno; /* 16-19: Next page number. */ + db_indx_t entries; /* 20-21: Number of item pairs on the page. */ + db_indx_t hf_offset; /* 22-23: High free byte page offset. */ + + /* + * The btree levels are numbered from the leaf to the root, starting + * with 1, so the leaf is level 1, its parent is level 2, and so on. + * We maintain this level on all btree pages, but the only place that + * we actually need it is on the root page. It would not be difficult + * to hide the byte on the root page once it becomes an internal page, + * so we could get this byte back if we needed it for something else. + */ +#define LEAFLEVEL 1 +#define MAXBTREELEVEL 255 + u_int8_t level; /* 24: Btree tree level. */ + +#define P_INVALID 0 /* Invalid page type. */ +#define P_DUPLICATE 1 /* Duplicate. */ +#define P_HASH 2 /* Hash. */ +#define P_IBTREE 3 /* Btree internal. */ +#define P_IRECNO 4 /* Recno internal. */ +#define P_LBTREE 5 /* Btree leaf. */ +#define P_LRECNO 6 /* Recno leaf. */ +#define P_OVERFLOW 7 /* Overflow. */ + u_int8_t type; /* 25: Page type. */ + db_indx_t inp[1]; /* Variable length index of items. */ +} PAGE; + +/* Element macros. */ +#define LSN(p) (((PAGE *)p)->lsn) +#define PGNO(p) (((PAGE *)p)->pgno) +#define PREV_PGNO(p) (((PAGE *)p)->prev_pgno) +#define NEXT_PGNO(p) (((PAGE *)p)->next_pgno) +#define NUM_ENT(p) (((PAGE *)p)->entries) +#define HOFFSET(p) (((PAGE *)p)->hf_offset) +#define LEVEL(p) (((PAGE *)p)->level) +#define TYPE(p) (((PAGE *)p)->type) + +/* + * !!! + * The next_pgno and prev_pgno fields are not maintained for btree and recno + * internal pages. It's a minor performance improvement, and more, it's + * hard to do when deleting internal pages, and it decreases the chance of + * deadlock during deletes and splits. + * + * !!! + * The btree/recno access method needs db_recno_t bytes of space on the root + * page to specify how many records are stored in the tree. (The alternative + * is to store the number of records in the meta-data page, which will create + * a second hot spot in trees being actively modified, or recalculate it from + * the BINTERNAL fields on each access.) Overload the prev_pgno field. + */ +#define RE_NREC(p) \ + (TYPE(p) == P_LBTREE ? NUM_ENT(p) / 2 : \ + TYPE(p) == P_LRECNO ? NUM_ENT(p) : PREV_PGNO(p)) +#define RE_NREC_ADJ(p, adj) \ + PREV_PGNO(p) += adj; +#define RE_NREC_SET(p, num) \ + PREV_PGNO(p) = num; + +/* + * Initialize a page. + * + * !!! + * Don't modify the page's LSN, code depends on it being unchanged after a + * P_INIT call. + */ +#define P_INIT(pg, pg_size, n, pg_prev, pg_next, btl, pg_type) do { \ + PGNO(pg) = n; \ + PREV_PGNO(pg) = pg_prev; \ + NEXT_PGNO(pg) = pg_next; \ + NUM_ENT(pg) = 0; \ + HOFFSET(pg) = pg_size; \ + LEVEL(pg) = btl; \ + TYPE(pg) = pg_type; \ +} while (0) + +/* Page header length (offset to first index). */ +#define P_OVERHEAD (SSZA(PAGE, inp)) + +/* First free byte. */ +#define LOFFSET(pg) (P_OVERHEAD + NUM_ENT(pg) * sizeof(db_indx_t)) + +/* Free space on the page. */ +#define P_FREESPACE(pg) (HOFFSET(pg) - LOFFSET(pg)) + +/* Get a pointer to the bytes at a specific index. */ +#define P_ENTRY(pg, indx) ((u_int8_t *)pg + ((PAGE *)pg)->inp[indx]) + +/************************************************************************ + OVERFLOW PAGE LAYOUT + ************************************************************************/ + +/* + * Overflow items are referenced by HOFFPAGE and BOVERFLOW structures, which + * store a page number (the first page of the overflow item) and a length + * (the total length of the overflow item). The overflow item consists of + * some number of overflow pages, linked by the next_pgno field of the page. + * A next_pgno field of PGNO_INVALID flags the end of the overflow item. + * + * Overflow page overloads: + * The amount of overflow data stored on each page is stored in the + * hf_offset field. + * + * The implementation reference counts overflow items as it's possible + * for them to be promoted onto btree internal pages. The reference + * count is stored in the entries field. + */ +#define OV_LEN(p) (((PAGE *)p)->hf_offset) +#define OV_REF(p) (((PAGE *)p)->entries) + +/* Maximum number of bytes that you can put on an overflow page. */ +#define P_MAXSPACE(psize) ((psize) - P_OVERHEAD) + +/************************************************************************ + HASH PAGE LAYOUT + ************************************************************************/ + +/* Each index references a group of bytes on the page. */ +#define H_KEYDATA 1 /* Key/data item. */ +#define H_DUPLICATE 2 /* Duplicate key/data item. */ +#define H_OFFPAGE 3 /* Overflow key/data item. */ +#define H_OFFDUP 4 /* Overflow page of duplicates. */ + +/* + * The first and second types are H_KEYDATA and H_DUPLICATE, represented + * by the HKEYDATA structure: + * + * +-----------------------------------+ + * | type | key/data ... | + * +-----------------------------------+ + * + * For duplicates, the data field encodes duplicate elements in the data + * field: + * + * +---------------------------------------------------------------+ + * | type | len1 | element1 | len1 | len2 | element2 | len2 | + * +---------------------------------------------------------------+ + * + * Thus, by keeping track of the offset in the element, we can do both + * backward and forward traversal. + */ +typedef struct _hkeydata { + u_int8_t type; /* 00: Page type. */ + u_int8_t data[1]; /* Variable length key/data item. */ +} HKEYDATA; + +/* Get a HKEYDATA item for a specific index. */ +#define GET_HKEYDATA(pg, indx) \ + ((HKEYDATA *)P_ENTRY(pg, indx)) + +/* + * The length of any HKEYDATA item. Note that indx is an element index, + * not a PAIR index. + */ +#define LEN_HITEM(pg, pgsize, indx) \ + (((indx) == 0 ? pgsize : pg->inp[indx - 1]) - pg->inp[indx]) + +#define LEN_HKEYDATA(pg, psize, indx) \ + (((indx) == 0 ? psize : pg->inp[indx - 1]) - \ + pg->inp[indx] - HKEYDATA_SIZE(0)) + +/* + * Page space required to add a new HKEYDATA item to the page, with and + * without the index value. + */ +#define HKEYDATA_SIZE(len) \ + ((len) + SSZA(HKEYDATA, data)) +#define HKEYDATA_PSIZE(len) \ + (HKEYDATA_SIZE(len) + sizeof(db_indx_t)) + +/* Put a HKEYDATA item at the location referenced by a page entry. */ +#define PUT_HKEYDATA(pe, kd, len, type) { \ + ((HKEYDATA *)pe)->type = type; \ + memcpy((u_int8_t *)pe + sizeof(u_int8_t), kd, len); \ +} + +/* + * Macros the describe the page layout in terms of key-data pairs. + * The use of "pindex" indicates that the argument is the index + * expressed in pairs instead of individual elements. + */ +#define H_NUMPAIRS(pg) (NUM_ENT(pg) / 2) +#define H_KEYINDEX(pindx) (2 * (pindx)) +#define H_DATAINDEX(pindx) ((2 * (pindx)) + 1) +#define H_PAIRKEY(pg, pindx) GET_HKEYDATA(pg, H_KEYINDEX(pindx)) +#define H_PAIRDATA(pg, pindx) GET_HKEYDATA(pg, H_DATAINDEX(pindx)) +#define H_PAIRSIZE(pg, psize, pindx) \ + (LEN_HITEM(pg, psize, H_KEYINDEX(pindx)) + \ + LEN_HITEM(pg, psize, H_DATAINDEX(pindx))) +#define LEN_HDATA(p, psize, pindx) LEN_HKEYDATA(p, psize, H_DATAINDEX(pindx)) +#define LEN_HKEY(p, psize, pindx) LEN_HKEYDATA(p, psize, H_KEYINDEX(pindx)) + +/* + * The third type is the H_OFFPAGE, represented by the HOFFPAGE structure: + * + * +-----------------------------------+ + * | type | pgno_t | total len | + * +-----------------------------------+ + */ +typedef struct _hoffpage { + u_int8_t type; /* 00: Page type and delete flag. */ + u_int8_t unused[3]; /* 01-03: Padding, unused. */ + db_pgno_t pgno; /* 04-07: Offpage page number. */ + u_int32_t tlen; /* 08-11: Total length of item. */ +} HOFFPAGE; + +/* Get a HOFFPAGE item for a specific index. */ +#define GET_HOFFPAGE(pg, indx) \ + ((HOFFPAGE *)P_ENTRY(pg, indx)) + +/* + * Page space required to add a new HOFFPAGE item to the page, with and + * without the index value. + */ +#define HOFFPAGE_SIZE (sizeof(HOFFPAGE)) +#define HOFFPAGE_PSIZE (HOFFPAGE_SIZE + sizeof(db_indx_t)) + +/* + * The fourth type is H_OFFDUP represented by the HOFFDUP structure: + * + * +-----------------------+ + * | type | pgno_t | + * +-----------------------+ + */ +typedef struct _hoffdup { + u_int8_t type; /* 00: Page type and delete flag. */ + u_int8_t unused[3]; /* 01-03: Padding, unused. */ + db_pgno_t pgno; /* 04-07: Offpage page number. */ +} HOFFDUP; + +/* Get a HOFFDUP item for a specific index. */ +#define GET_HOFFDUP(pg, indx) \ + ((HOFFDUP *)P_ENTRY(pg, indx)) + +/* + * Page space required to add a new HOFFDUP item to the page, with and + * without the index value. + */ +#define HOFFDUP_SIZE (sizeof(HOFFDUP)) +#define HOFFDUP_PSIZE (HOFFDUP_SIZE + sizeof(db_indx_t)) + +/************************************************************************ + BTREE PAGE LAYOUT + ************************************************************************/ + +/* Each index references a group of bytes on the page. */ +#define B_KEYDATA 1 /* Key/data item. */ +#define B_DUPLICATE 2 /* Duplicate key/data item. */ +#define B_OVERFLOW 3 /* Overflow key/data item. */ + +/* + * The first type is B_KEYDATA, represented by the BKEYDATA structure: + * + * +-----------------------------------+ + * | length | type | key/data | + * +-----------------------------------+ + */ +typedef struct _bkeydata { + db_indx_t len; /* 00-01: Key/data item length. */ + u_int deleted :1; /* 02: Page type and delete flag. */ + u_int type :7; + u_int8_t data[1]; /* Variable length key/data item. */ +} BKEYDATA; + +/* Get a BKEYDATA item for a specific index. */ +#define GET_BKEYDATA(pg, indx) \ + ((BKEYDATA *)P_ENTRY(pg, indx)) + +/* + * Page space required to add a new BKEYDATA item to the page, with and + * without the index value. + */ +#define BKEYDATA_SIZE(len) \ + ALIGN((len) + SSZA(BKEYDATA, data), 4) +#define BKEYDATA_PSIZE(len) \ + (BKEYDATA_SIZE(len) + sizeof(db_indx_t)) + +/* + * The second and third types are B_DUPLICATE and B_OVERFLOW, represented + * by the BOVERFLOW structure: + * + * +-----------------------------------+ + * | total len | type | unused | + * +-----------------------------------+ + * | nxt: page | nxt: off | nxt: len | + * +-----------------------------------+ + */ +typedef struct _boverflow { + db_indx_t unused1; /* 00-01: Padding, unused. */ + u_int deleted :1; /* 02: Page type and delete flag. */ + u_int type :7; + u_int8_t unused2; /* 03: Padding, unused. */ + db_pgno_t pgno; /* 04-07: Next page number. */ + u_int32_t tlen; /* 08-11: Total length of item. */ +} BOVERFLOW; + +/* Get a BOVERFLOW item for a specific index. */ +#define GET_BOVERFLOW(pg, indx) \ + ((BOVERFLOW *)P_ENTRY(pg, indx)) + +/* + * Page space required to add a new BOVERFLOW item to the page, with and + * without the index value. + */ +#define BOVERFLOW_SIZE \ + ALIGN(sizeof(BOVERFLOW), 4) +#define BOVERFLOW_PSIZE \ + (BOVERFLOW_SIZE + sizeof(db_indx_t)) + +/* + * Btree leaf and hash page layouts group indices in sets of two, one + * for the key and one for the data. Everything else does it in sets + * of one to save space. I use the following macros so that it's real + * obvious what's going on... + */ +#define O_INDX 1 +#define P_INDX 2 + +/************************************************************************ + BTREE INTERNAL PAGE LAYOUT + ************************************************************************/ + +/* + * Btree internal entry. + * + * +-----------------------------------+ + * | leaf pgno | type | data ... | + * +-----------------------------------+ + */ +typedef struct _binternal { + db_indx_t len; /* 00-01: Key/data item length. */ + u_int deleted :1; /* 02: Page type and delete flag. */ + u_int type :7; + u_int8_t unused; /* 03: Padding, unused. */ + db_pgno_t pgno; /* 04-07: Page number of referenced page. */ + db_recno_t nrecs; /* 08-11: Subtree record count. */ + u_int8_t data[1]; /* Variable length key item. */ +} BINTERNAL; + +/* Get a BINTERNAL item for a specific index. */ +#define GET_BINTERNAL(pg, indx) \ + ((BINTERNAL *)P_ENTRY(pg, indx)) + +/* + * Page space required to add a new BINTERNAL item to the page, with and + * without the index value. + */ +#define BINTERNAL_SIZE(len) \ + ALIGN((len) + SSZA(BINTERNAL, data), 4) +#define BINTERNAL_PSIZE(len) \ + (BINTERNAL_SIZE(len) + sizeof(db_indx_t)) + +/************************************************************************ + RECNO INTERNAL PAGE LAYOUT + ************************************************************************/ + +/* + * The recno internal entry. + * + * +-----------------------+ + * | leaf pgno | # of recs | + * +-----------------------+ + * + * XXX + * Why not fold this into the db_indx_t structure, it's fixed length. + */ +typedef struct _rinternal { + db_pgno_t pgno; /* 00-03: Page number of referenced page. */ + db_recno_t nrecs; /* 04-07: Subtree record count. */ +} RINTERNAL; + +/* Get a RINTERNAL item for a specific index. */ +#define GET_RINTERNAL(pg, indx) \ + ((RINTERNAL *)P_ENTRY(pg, indx)) + +/* + * Page space required to add a new RINTERNAL item to the page, with and + * without the index value. + */ +#define RINTERNAL_SIZE \ + ALIGN(sizeof(RINTERNAL), 4) +#define RINTERNAL_PSIZE \ + (RINTERNAL_SIZE + sizeof(db_indx_t)) +#endif /* _DB_PAGE_H_ */ diff --git a/db2/include/db_shash.h b/db2/include/db_shash.h new file mode 100644 index 0000000000..f695a2bafa --- /dev/null +++ b/db2/include/db_shash.h @@ -0,0 +1,106 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + * + * @(#)db_shash.h 10.1 (Sleepycat) 4/12/97 + */ + +/* Hash Headers */ +typedef SH_TAILQ_HEAD(hash_head) DB_HASHTAB; + +/* + * __db_hashlookup -- + * + * Look up something in a shared memory hash table. The "elt" argument + * should be a key, and cmp_func must know how to compare a key to whatever + * structure it is that appears in the hash table. The comparison function + * cmp_func is called as: cmp_func(lookup_elt, table_elt); + * begin: address of the beginning of the hash table. + * type: the structure type of the elements that are linked in each bucket. + * field: the name of the field by which the "type" structures are linked. + * elt: the item for which we are searching in the hash table. + * result: the variable into which we'll store the element if we find it. + * nelems: the number of buckets in the hash table. + * hash_func: the hash function that operates on elements of the type of elt + * cmp_func: compare elements of the type of elt with those in the table (of + * type "type"). + * + * If the element is not in the hash table, this macro exits with result + * set to NULL. + */ +#define __db_hashlookup(begin, type, field, elt, r, n, hash, cmp) do { \ + DB_HASHTAB *__bucket; \ + u_int32_t __ndx; \ + \ + __ndx = hash(elt) % (n); \ + __bucket = &begin[__ndx]; \ + for (r = SH_TAILQ_FIRST(__bucket, type); \ + r != NULL; r = SH_TAILQ_NEXT(r, field, type)) \ + if (cmp(elt, r)) \ + break; \ +} while(0) + +/* + * __db_hashinsert -- + * + * Insert a new entry into the hash table. This assumes that lookup has + * failed; don't call it if you haven't already called __db_hashlookup. + * begin: the beginning address of the hash table. + * type: the structure type of the elements that are linked in each bucket. + * field: the name of the field by which the "type" structures are linked. + * elt: the item to be inserted. + * nelems: the number of buckets in the hash table. + * hash_func: the hash function that operates on elements of the type of elt + */ +#define __db_hashinsert(begin, type, field, elt, n, hash) do { \ + u_int32_t __ndx; \ + DB_HASHTAB *__bucket; \ + \ + __ndx = hash(elt) % (n); \ + __bucket = &begin[__ndx]; \ + SH_TAILQ_INSERT_HEAD(__bucket, elt, field, type); \ +} while(0) + +/* + * __db_hashremove -- + * Remove the entry with a key == elt. + * begin: address of the beginning of the hash table. + * type: the structure type of the elements that are linked in each bucket. + * field: the name of the field by which the "type" structures are linked. + * elt: the item to be deleted. + * nelems: the number of buckets in the hash table. + * hash_func: the hash function that operates on elements of the type of elt + * cmp_func: compare elements of the type of elt with those in the table (of + * type "type"). + */ +#define __db_hashremove(begin, type, field, elt, n, hash, cmp) { \ + u_int32_t __ndx; \ + DB_HASHTAB *__bucket; \ + SH_TAILQ_ENTRY *__entp; \ + \ + __ndx = hash(elt) % (n); \ + __bucket = &begin[__ndx]; \ + __db_hashlookup(begin, type, field, elt, __entp, n, hash, cmp); \ + SH_TAILQ_REMOVE(__bucket, __entp, field, type); \ +} + +/* + * __db_hashremove_el -- + * Given the object "obj" in the table, remove it. + * begin: address of the beginning of the hash table. + * type: the structure type of the elements that are linked in each bucket. + * field: the name of the field by which the "type" structures are linked. + * obj: the object in the table that we with to delete. + * nelems: the number of buckets in the hash table. + * hash_func: the hash function that operates on elements of the type of elt + */ +#define __db_hashremove_el(begin, type, field, obj, n, hash) { \ + u_int32_t __ndx; \ + DB_HASHTAB *__bucket; \ + \ + __ndx = hash(obj) % (n); \ + __bucket = &begin[__ndx]; \ + SH_TAILQ_REMOVE(__bucket, obj, field, type); \ +} diff --git a/db2/include/db_swap.h b/db2/include/db_swap.h new file mode 100644 index 0000000000..278282f5e4 --- /dev/null +++ b/db2/include/db_swap.h @@ -0,0 +1,105 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)db_swap.h 10.3 (Sleepycat) 6/10/97 + */ + +#ifndef _DB_SWAP_H_ +#define _DB_SWAP_H_ + +/* + * Little endian <==> big endian 32-bit swap macros. + * M_32_SWAP swap a memory location + * P_32_COPY copy potentially unaligned 4 byte quantities + * P_32_SWAP swap a referenced memory location + */ +#define M_32_SWAP(a) { \ + u_int32_t _tmp; \ + _tmp = a; \ + ((u_int8_t *)&a)[0] = ((u_int8_t *)&_tmp)[3]; \ + ((u_int8_t *)&a)[1] = ((u_int8_t *)&_tmp)[2]; \ + ((u_int8_t *)&a)[2] = ((u_int8_t *)&_tmp)[1]; \ + ((u_int8_t *)&a)[3] = ((u_int8_t *)&_tmp)[0]; \ +} +#define P_32_COPY(a, b) { \ + ((u_int8_t *)b)[0] = ((u_int8_t *)a)[0]; \ + ((u_int8_t *)b)[1] = ((u_int8_t *)a)[1]; \ + ((u_int8_t *)b)[2] = ((u_int8_t *)a)[2]; \ + ((u_int8_t *)b)[3] = ((u_int8_t *)a)[3]; \ +} +#define P_32_SWAP(a) { \ + u_int32_t _tmp; \ + P_32_COPY(a, &_tmp); \ + ((u_int8_t *)a)[0] = ((u_int8_t *)&_tmp)[3]; \ + ((u_int8_t *)a)[1] = ((u_int8_t *)&_tmp)[2]; \ + ((u_int8_t *)a)[2] = ((u_int8_t *)&_tmp)[1]; \ + ((u_int8_t *)a)[3] = ((u_int8_t *)&_tmp)[0]; \ +} + +/* + * Little endian <==> big endian 16-bit swap macros. + * M_16_SWAP swap a memory location + * P_16_COPY copy potentially unaligned from one location to another + * P_16_SWAP swap a referenced memory location + */ +#define M_16_SWAP(a) { \ + u_int16_t _tmp; \ + _tmp = (u_int16_t)a; \ + ((u_int8_t *)&a)[0] = ((u_int8_t *)&_tmp)[1]; \ + ((u_int8_t *)&a)[1] = ((u_int8_t *)&_tmp)[0]; \ +} +#define P_16_COPY(a, b) { \ + ((u_int8_t *)b)[0] = ((u_int8_t *)a)[0]; \ + ((u_int8_t *)b)[1] = ((u_int8_t *)a)[1]; \ +} +#define P_16_SWAP(a) { \ + u_int16_t _tmp; \ + P_16_COPY(a, &_tmp); \ + ((u_int8_t *)a)[0] = ((u_int8_t *)&_tmp)[1]; \ + ((u_int8_t *)a)[1] = ((u_int8_t *)&_tmp)[0]; \ +} + +#define SWAP32(p) { \ + P_32_SWAP(p); \ + (p) += sizeof(u_int32_t); \ +} +#define SWAP16(p) { \ + P_16_SWAP(p); \ + (p) += sizeof(u_int16_t); \ +} +#endif /* !_DB_SWAP_H_ */ diff --git a/db2/include/hash.h b/db2/include/hash.h new file mode 100644 index 0000000000..cb8ea350f5 --- /dev/null +++ b/db2/include/hash.h @@ -0,0 +1,211 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)hash.h 10.6 (Sleepycat) 8/18/97 + */ + +/* Cursor structure definitions. */ +typedef struct cursor_t { + DBC *db_cursor; + db_pgno_t bucket; /* Bucket we are traversing. */ + DB_LOCK lock; /* Lock held on the current bucket. */ + PAGE *pagep; /* The current page. */ + db_pgno_t pgno; /* Current page number. */ + db_indx_t bndx; /* Index within the current page. */ + PAGE *dpagep; /* Duplicate page pointer. */ + db_pgno_t dpgno; /* Duplicate page number. */ + db_indx_t dndx; /* Index within a duplicate set. */ + db_indx_t dup_off; /* Offset within a duplicate set. */ + db_indx_t dup_len; /* Length of current duplicate. */ + db_indx_t dup_tlen; /* Total length of duplicate entry. */ + u_int32_t seek_size; /* Number of bytes we need for add. */ + db_pgno_t seek_found_page;/* Page on which we can insert. */ + u_int32_t big_keylen; /* Length of big_key buffer. */ + void *big_key; /* Temporary buffer for big keys. */ + u_int32_t big_datalen; /* Length of big_data buffer. */ + void *big_data; /* Temporary buffer for big data. */ +#define H_OK 0x0001 +#define H_NOMORE 0x0002 +#define H_DELETED 0x0004 +#define H_ISDUP 0x0008 +#define H_EXPAND 0x0020 + u_int32_t flags; /* Is cursor inside a dup set. */ +} HASH_CURSOR; + +#define IS_VALID(C) ((C)->bucket != BUCKET_INVALID) + + +typedef struct htab { /* Memory resident data structure. */ + DB *dbp; /* Pointer to parent db structure. */ + DB_LOCK hlock; /* Metadata page lock. */ + HASHHDR *hdr; /* Pointer to meta-data page. */ + u_int32_t (*hash) __P((const void *, u_int32_t)); /* Hash Function */ + PAGE *split_buf; /* Temporary buffer for splits. */ + int local_errno; /* Error Number -- for DBM compatability */ + u_long hash_accesses; /* Number of accesses to this table. */ + u_long hash_collisions; /* Number of collisions on search. */ + u_long hash_expansions; /* Number of times we added a bucket. */ + u_long hash_overflows; /* Number of overflow pages. */ + u_long hash_bigpages; /* Number of big key/data pages. */ +} HTAB; + +/* + * Macro used for interface functions to set the txnid in the DBP. + */ +#define SET_LOCKER(D, T) ((D)->txn = (T)) + +/* + * More interface macros used to get/release the meta data page. + */ +#define GET_META(D, H) { \ + int _r; \ + if (F_ISSET(D, DB_AM_LOCKING) && !F_ISSET(D, DB_AM_RECOVER)) { \ + (D)->lock.pgno = BUCKET_INVALID; \ + if ((_r = lock_get((D)->dbenv->lk_info, \ + (D)->txn == NULL ? (D)->locker : (D)->txn->txnid, \ + 0, &(D)->lock_dbt, DB_LOCK_READ, \ + &(H)->hlock)) != 0) \ + return (_r < 0 ? EAGAIN : _r); \ + } \ + if ((_r = __ham_get_page(D, 0, (PAGE **)&((H)->hdr))) != 0) { \ + if ((H)->hlock) { \ + (void)lock_put((D)->dbenv->lk_info, (H)->hlock);\ + (H)->hlock = 0; \ + } \ + return (_r); \ + } \ +} + +#define RELEASE_META(D, H) { \ + if (!F_ISSET(D, DB_AM_RECOVER) && \ + (D)->txn == NULL && (H)->hlock) \ + (void)lock_put((H)->dbp->dbenv->lk_info, (H)->hlock); \ + (H)->hlock = 0; \ + if ((H)->hdr) \ + (void)__ham_put_page(D, (PAGE *)(H)->hdr, \ + F_ISSET(D, DB_HS_DIRTYMETA) ? 1 : 0); \ + (H)->hdr = NULL; \ + F_CLR(D, DB_HS_DIRTYMETA); \ +} + +#define DIRTY_META(H, R) { \ + if (F_ISSET((H)->dbp, DB_AM_LOCKING) && \ + !F_ISSET((H)->dbp, DB_AM_RECOVER)) { \ + DB_LOCK _tmp; \ + (H)->dbp->lock.pgno = BUCKET_INVALID; \ + if (((R) = lock_get((H)->dbp->dbenv->lk_info, \ + (H)->dbp->txn ? (H)->dbp->txn->txnid : \ + (H)->dbp->locker, 0, &(H)->dbp->lock_dbt, \ + DB_LOCK_WRITE, &_tmp)) == 0) \ + (R) = lock_put((H)->dbp->dbenv->lk_info, \ + (H)->hlock); \ + else if ((R) < 0) \ + (R) = EAGAIN; \ + (H)->hlock = _tmp; \ + } \ + F_SET((H)->dbp, DB_HS_DIRTYMETA); \ +} + +/* Allocate and discard thread structures. */ +#define H_GETHANDLE(dbp, dbpp, ret) \ + if (F_ISSET(dbp, DB_AM_THREAD)) \ + ret = __db_gethandle(dbp, __ham_hdup, dbpp); \ + else { \ + ret = 0; \ + *dbpp = dbp; \ + } + +#define H_PUTHANDLE(dbp) { \ + if (F_ISSET(dbp, DB_AM_THREAD)) \ + __db_puthandle(dbp); \ +} + +/* Test string. */ +#define CHARKEY "%$sniglet^&" + +/* Overflow management */ +/* + * Overflow page numbers are allocated per split point. At each doubling of + * the table, we can allocate extra pages. We keep track of how many pages + * we've allocated at each point to calculate bucket to page number mapping. + */ +#define BUCKET_TO_PAGE(H, B) \ + ((B) + 1 + ((B) ? (H)->hdr->spares[__db_log2((B)+1)-1] : 0)) + +#define PGNO_OF(H, S, O) (BUCKET_TO_PAGE((H), (1 << (S)) - 1) + (O)) + +/* Constraints about number of pages and how much data goes on a page. */ + +#define MAX_PAGES(H) UINT32_T_MAX +#define MINFILL 0.25 +#define ISBIG(H, N) (((N) > ((H)->hdr->pagesize * MINFILL)) ? 1 : 0) + +/* Shorthands for accessing structure */ +#define NDX_INVALID 0xFFFF +#define BUCKET_INVALID 0xFFFFFFFF + +/* On page duplicates are stored as a string of size-data-size triples. */ +#define DUP_SIZE(len) ((len) + 2 * sizeof(db_indx_t)) + +/* Log messages types (these are subtypes within a record type) */ +#define PAIR_KEYMASK 0x1 +#define PAIR_DATAMASK 0x2 +#define PAIR_ISKEYBIG(N) (N & PAIR_KEYMASK) +#define PAIR_ISDATABIG(N) (N & PAIR_DATAMASK) +#define OPCODE_OF(N) (N & ~(PAIR_KEYMASK | PAIR_DATAMASK)) + +#define PUTPAIR 0x20 +#define DELPAIR 0x30 +#define PUTOVFL 0x40 +#define DELOVFL 0x50 +#define ALLOCPGNO 0x60 +#define DELPGNO 0x70 +#define SPLITOLD 0x80 +#define SPLITNEW 0x90 + +#include "hash_auto.h" +#include "hash_ext.h" +#include "db_am.h" +#include "common_ext.h" diff --git a/db2/include/hash_auto.h b/db2/include/hash_auto.h new file mode 100644 index 0000000000..5ff1229115 --- /dev/null +++ b/db2/include/hash_auto.h @@ -0,0 +1,114 @@ +/* Do not edit: automatically built by dist/db_gen.sh. */ +#ifndef ham_AUTO_H +#define ham_AUTO_H + +#define DB_ham_insdel (DB_ham_BEGIN + 1) + +typedef struct _ham_insdel_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + u_int32_t fileid; + db_pgno_t pgno; + u_int32_t ndx; + DB_LSN pagelsn; + DBT key; + DBT data; +} __ham_insdel_args; + + +#define DB_ham_newpage (DB_ham_BEGIN + 2) + +typedef struct _ham_newpage_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + u_int32_t fileid; + db_pgno_t prev_pgno; + DB_LSN prevlsn; + db_pgno_t new_pgno; + DB_LSN pagelsn; + db_pgno_t next_pgno; + DB_LSN nextlsn; +} __ham_newpage_args; + + +#define DB_ham_splitmeta (DB_ham_BEGIN + 3) + +typedef struct _ham_splitmeta_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + u_int32_t bucket; + u_int32_t ovflpoint; + u_int32_t spares; + DB_LSN metalsn; +} __ham_splitmeta_args; + + +#define DB_ham_splitdata (DB_ham_BEGIN + 4) + +typedef struct _ham_splitdata_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + u_int32_t opcode; + db_pgno_t pgno; + DBT pageimage; + DB_LSN pagelsn; +} __ham_splitdata_args; + + +#define DB_ham_replace (DB_ham_BEGIN + 5) + +typedef struct _ham_replace_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t pgno; + u_int32_t ndx; + DB_LSN pagelsn; + int32_t off; + DBT olditem; + DBT newitem; + u_int32_t makedup; +} __ham_replace_args; + + +#define DB_ham_newpgno (DB_ham_BEGIN + 6) + +typedef struct _ham_newpgno_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; + u_int32_t fileid; + db_pgno_t pgno; + db_pgno_t free_pgno; + u_int32_t old_type; + db_pgno_t old_pgno; + u_int32_t new_type; + DB_LSN pagelsn; + DB_LSN metalsn; +} __ham_newpgno_args; + + +#define DB_ham_ovfl (DB_ham_BEGIN + 7) + +typedef struct _ham_ovfl_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t start_pgno; + u_int32_t npages; + db_pgno_t free_pgno; + DB_LSN metalsn; +} __ham_ovfl_args; + +#endif diff --git a/db2/include/hash_ext.h b/db2/include/hash_ext.h new file mode 100644 index 0000000000..5ae63dc6ad --- /dev/null +++ b/db2/include/hash_ext.h @@ -0,0 +1,120 @@ +/* Do not edit: automatically built by dist/distrib. */ +int __ham_open __P((DB *, DB_INFO *)); +int __ham_close __P((DB *)); +int __ham_expand_table __P((HTAB *)); +u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t)); +int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *)); +void __ham_c_update __P((HTAB *, + HASH_CURSOR *, db_pgno_t, u_int32_t, int, int)); +int __ham_hdup __P((DB *, DB *)); +int __ham_insdel_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, u_int32_t, db_pgno_t, u_int32_t, + DB_LSN *, DBT *, DBT *)); +int __ham_insdel_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_insdel_read __P((void *, __ham_insdel_args **)); +int __ham_newpage_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, u_int32_t, db_pgno_t, DB_LSN *, + db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *)); +int __ham_newpage_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_newpage_read __P((void *, __ham_newpage_args **)); +int __ham_splitmeta_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, u_int32_t, u_int32_t, u_int32_t, + DB_LSN *)); +int __ham_splitmeta_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_splitmeta_read __P((void *, __ham_splitmeta_args **)); +int __ham_splitdata_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, u_int32_t, db_pgno_t, DBT *, + DB_LSN *)); +int __ham_splitdata_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_splitdata_read __P((void *, __ham_splitdata_args **)); +int __ham_replace_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, u_int32_t, DB_LSN *, + int32_t, DBT *, DBT *, u_int32_t)); +int __ham_replace_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_replace_read __P((void *, __ham_replace_args **)); +int __ham_newpgno_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, u_int32_t, db_pgno_t, db_pgno_t, + u_int32_t, db_pgno_t, u_int32_t, DB_LSN *, + DB_LSN *)); +int __ham_newpgno_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_newpgno_read __P((void *, __ham_newpgno_args **)); +int __ham_ovfl_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, u_int32_t, db_pgno_t, + DB_LSN *)); +int __ham_ovfl_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_ovfl_read __P((void *, __ham_ovfl_args **)); +int __ham_init_print __P((DB_ENV *)); +int __ham_init_recover __P((DB_ENV *)); +int __ham_pgin __P((db_pgno_t, void *, DBT *)); +int __ham_pgout __P((db_pgno_t, void *, DBT *)); +int __ham_mswap __P((void *)); +#ifdef DEBUG +void __ham_dump_bucket __P((HTAB *, u_int32_t)); +#endif +int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int)); +void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t)); +u_int32_t __ham_func2 __P((const void *, u_int32_t)); +u_int32_t __ham_func3 __P((const void *, u_int32_t)); +u_int32_t __ham_func4 __P((const void *, u_int32_t)); +u_int32_t __ham_func5 __P((const void *, u_int32_t)); +int __ham_item __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); +int __ham_item_reset __P((HTAB *, HASH_CURSOR *)); +void __ham_item_init __P((HASH_CURSOR *)); +int __ham_item_done __P((HTAB *, HASH_CURSOR *, int)); +int __ham_item_last __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); +int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); +int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); +int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); +void __ham_putitem __P((PAGE *p, const DBT *, int)); +int __ham_del_pair __P((HTAB *, HASH_CURSOR *)); +int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t)); +void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t, + int32_t, DBT *)); +int __ham_split_page __P((HTAB *, u_int32_t, u_int32_t)); +int __ham_add_el __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *, + int)); +void __ham_copy_item __P((HTAB *, PAGE *, int, PAGE *)); +int __ham_add_ovflpage __P((HTAB *, PAGE *, int, PAGE **)); +int __ham_new_page __P((HTAB *, u_int32_t, u_int32_t, PAGE **)); +int __ham_del_page __P((DB *, PAGE *)); +int __ham_put_page __P((DB *, PAGE *, int32_t)); +int __ham_dirty_page __P((HTAB *, PAGE *)); +int __ham_get_page __P((DB *, db_pgno_t, PAGE **)); +int __ham_overflow_page __P((DB *, u_int32_t, PAGE **)); +#ifdef DEBUG +int bucket_to_page __P((HTAB *, int)); +#endif +void __ham_init_ovflpages __P((HTAB *)); +int __ham_get_cpage __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); +int __ham_next_cpage __P((HTAB *, HASH_CURSOR *, db_pgno_t, + int, int)); +void __ham_dpair __P((DB *, PAGE *, u_int32_t)); +int __ham_insdel_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_newpage_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_replace_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_newpgno_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_splitmeta_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_splitdata_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_ovfl_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_stat __P((DB *, FILE *)); diff --git a/db2/include/lock.h b/db2/include/lock.h new file mode 100644 index 0000000000..18d29e8740 --- /dev/null +++ b/db2/include/lock.h @@ -0,0 +1,194 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + * + * @(#)lock.h 10.7 (Sleepycat) 7/29/97 + */ + +typedef struct __db_lockobj DB_LOCKOBJ; + +#define DB_DEFAULT_LOCK_FILE "__db_lock.share" +#define DB_LOCK_DEFAULT_N 5000 +#define DB_LOCK_MAXID 0x7fffffff + +/* + * The lock region consists of: + * The DB_LOCKREGION structure (sizeof(DB_LOCKREGION)). + * The conflict matrix of nmodes * nmodes bytes (nmodes * nmodes). + * The hash table for object lookup (hashsize * sizeof(DB_OBJ *)). + * The locks themselves (maxlocks * sizeof(struct __db_lock). + * The objects being locked (maxlocks * sizeof(DB_OBJ)). + * String space to represent the DBTs that are the objects being locked. + */ +struct __db_lockregion { + RLAYOUT hdr; /* Shared region header. */ + u_int32_t magic; /* lock magic number */ + u_int32_t version; /* version number */ + u_int32_t id; /* unique id generator */ + u_int32_t need_dd; /* flag for deadlock detector */ + u_int32_t detect; /* run dd on every conflict */ + SH_TAILQ_HEAD(lock_header) free_locks; /* free lock header */ + SH_TAILQ_HEAD(obj_header) free_objs; /* free obj header */ + u_int32_t maxlocks; /* maximum number of locks in table */ + u_int32_t table_size; /* size of hash table */ + u_int32_t nmodes; /* number of lock modes */ + u_int32_t numobjs; /* number of objects */ + u_int32_t nlockers; /* number of lockers */ + size_t increment; /* how much to grow region */ + size_t hash_off; /* offset of hash table */ + size_t mem_off; /* offset of memory region */ + size_t mem_bytes; /* number of bytes in memory region */ + u_int32_t nconflicts; /* number of lock conflicts */ + u_int32_t nrequests; /* number of lock gets */ + u_int32_t nreleases; /* number of lock puts */ + u_int32_t ndeadlocks; /* number of deadlocks */ +}; + +/* Macros to lock/unlock the region. */ +#define LOCK_LOCKREGION(lt) \ + (void)__db_mutex_lock(&(lt)->region->hdr.lock,(lt)->fd, \ + (lt)->dbenv == NULL ? NULL : (lt)->dbenv->db_yield) +#define UNLOCK_LOCKREGION(lt) \ + (void)__db_mutex_unlock(&(lt)->region->hdr.lock, (lt)->fd) + +/* + * Since we will be keeping DBTs in shared memory, we need the equivalent + * of a DBT that will work in shared memory. + */ +typedef struct __sh_dbt { + u_int32_t size; + ssize_t off; +} SH_DBT; + +#define SH_DBT_PTR(p) ((void *)(((u_int8_t *)(p)) + (p)->off)) + +/* + * The lock table is the per-process cookie returned from a lock_open call. + */ +struct __db_lockobj { + SH_DBT lockobj; /* Identifies object locked. */ + SH_TAILQ_ENTRY links; /* Links for free list. */ + union { + SH_TAILQ_HEAD(_wait) _waiters; /* List of waiting locks. */ + u_int32_t _dd_id; /* Deadlock detector id. */ + } wlinks; + union { + SH_LIST_HEAD(_held) _heldby; /* Locks held by this locker. */ + SH_TAILQ_HEAD(_hold) _holders; /* List of held locks. */ + } dlinks; +#define DB_LOCK_OBJTYPE 1 +#define DB_LOCK_LOCKER 2 + u_int8_t type; /* Real object or locker id. */ +}; + + +#define dd_id wlinks._dd_id +#define waiters wlinks._waiters +#define holders dlinks._holders +#define heldby dlinks._heldby + +struct __db_locktab { + DB_ENV *dbenv; /* Environment. */ + int fd; /* mapped file descriptor */ + DB_LOCKREGION *region; /* address of shared memory region */ + DB_HASHTAB *hashtab; /* Beginning of hash table. */ + size_t reg_size; /* last known size of lock region */ + void *mem; /* Beginning of string space. */ + u_int8_t *conflicts; /* Pointer to conflict matrix. */ +}; + +/* Test for conflicts. */ +#define CONFLICTS(T, HELD, WANTED) \ + T->conflicts[HELD * T->region->nmodes + WANTED] + +/* + * Status of a lock. + */ +typedef enum { + DB_LSTAT_ABORTED, /* Lock belongs to an aborted txn. */ + DB_LSTAT_ERR, /* Lock is bad. */ + DB_LSTAT_FREE, /* Lock is unallocated. */ + DB_LSTAT_HELD, /* Lock is currently held. */ + DB_LSTAT_NOGRANT, /* Lock was not granted. */ + DB_LSTAT_PENDING, /* Lock was waiting and has been + * promoted; waiting for the owner + * to run and upgrade it to held. */ + DB_LSTAT_WAITING /* Lock is on the wait queue. */ +} db_status_t; + +/* + * Resources in the lock region. Used to indicate which resource + * is running low when we need to grow the region. + */ +typedef enum { + DB_LOCK_MEM, DB_LOCK_OBJ, DB_LOCK_LOCK +} db_resource_t; + +struct __db_lock { + /* + * Wait on mutex to wait on lock. You reference your own mutex with + * ID 0 and others reference your mutex with ID 1. + */ + db_mutex_t mutex; + + u_int32_t holder; /* Who holds this lock. */ + SH_TAILQ_ENTRY links; /* Free or holder/waiter list. */ + SH_LIST_ENTRY locker_links; /* List of locks held by a locker. */ + u_int32_t refcount; /* Reference count the lock. */ + db_lockmode_t mode; /* What sort of lock. */ + ssize_t obj; /* Relative offset of object struct. */ + db_status_t status; /* Status of this lock. */ +}; + +/* + * We cannot return pointers to the user (else we cannot easily grow regions), + * so we return offsets in the region. These must be converted to and from + * regular pointers. Always use the macros below. + */ +#define OFFSET_TO_LOCK(lt, off) \ + ((struct __db_lock *)((u_int8_t *)((lt)->region) + (off))) +#define LOCK_TO_OFFSET(lt, lock) \ + ((size_t)((u_int8_t *)(lock) - (u_int8_t *)lt->region)) +#define OFFSET_TO_OBJ(lt, off) \ + ((DB_LOCKOBJ *)((u_int8_t *)((lt)->region) + (off))) +#define OBJ_TO_OFFSET(lt, obj) \ + ((size_t)((u_int8_t *)(obj) - (u_int8_t *)lt->region)) + +/* + * The lock header contains the region structure and the conflict matrix. + * Aligned to a large boundary because we don't know what the underlying + * type of the hash table elements are. + */ +#define LOCK_HASH_ALIGN 8 +#define LOCK_HEADER_SIZE(M) \ + ((size_t)(sizeof(DB_LOCKREGION) + ALIGN((M * M), LOCK_HASH_ALIGN))) + +/* + * For the full region, we need to add the locks, the objects, the hash table + * and the string space (which is 16 bytes per lock). + */ +#define STRING_SIZE(N) (16 * N) + +#define LOCK_REGION_SIZE(M, N, H) \ + (ALIGN(LOCK_HEADER_SIZE(M) + \ + (H) * sizeof(DB_HASHTAB), MUTEX_ALIGNMENT) + \ + (N) * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) + \ + ALIGN((N) * sizeof(DB_LOCKOBJ), sizeof(size_t)) + \ + ALIGN(STRING_SIZE(N), sizeof(size_t))) + +#ifdef DEBUG +#define LOCK_DEBUG_LOCKERS 0x0001 +#define LOCK_DEBUG_LOCK 0x0002 +#define LOCK_DEBUG_OBJ 0x0004 +#define LOCK_DEBUG_CONF 0x0008 +#define LOCK_DEBUG_MEM 0x0010 +#define LOCK_DEBUG_BUCKET 0x0020 +#define LOCK_DEBUG_OBJECTS 0x0040 +#define LOCK_DEBUG_ALL 0xFFFF + +#define LOCK_DEBUG_NOMUTEX 0x0100 +#endif + +#include "lock_ext.h" diff --git a/db2/include/lock_ext.h b/db2/include/lock_ext.h new file mode 100644 index 0000000000..59d5072bc4 --- /dev/null +++ b/db2/include/lock_ext.h @@ -0,0 +1,8 @@ +/* Do not edit: automatically built by dist/distrib. */ +int __lock_getobj __P((DB_LOCKTAB *, + u_int32_t, DBT *, u_int32_t type, DB_LOCKOBJ **)); +int __lock_cmp __P((DBT *, DB_LOCKOBJ *)); +int __lock_locker_cmp __P((u_int32_t, DB_LOCKOBJ *)); +int __lock_ohash __P((DBT *)); +u_int32_t __lock_locker_hash __P((u_int32_t)); +u_int32_t __lock_lhash __P((DB_LOCKOBJ *)); diff --git a/db2/include/log.h b/db2/include/log.h new file mode 100644 index 0000000000..970dfd153a --- /dev/null +++ b/db2/include/log.h @@ -0,0 +1,157 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + * + * @(#)log.h 10.8 (Sleepycat) 8/18/97 + */ + +#ifndef _LOG_H_ +#define _LOG_H_ + +struct __fname; typedef struct __fname FNAME; +struct __hdr; typedef struct __hdr HDR; +struct __log; typedef struct __log LOG; +struct __log_persist; typedef struct __log_persist LOGP; + +#define MAXLFNAME 99999 /* Maximum log file name. */ +#define LFNAME "log.%05d" /* Log file name template. */ + + /* Default log name. */ +#define DB_DEFAULT_LOG_FILE "__db_log.share" + +#define DEFAULT_MAX (10 * 1048576) /* 10 Mb. */ + +/* Macros to return per-process address, offsets. */ +#define ADDR(base, offset) ((void *)((u_int8_t *)((base)->addr) + offset)) +#define OFFSET(base, p) ((u_int8_t *)(p) - (u_int8_t *)(base)->addr) + +/* Macros to lock/unlock the region and threads. */ +#define LOCK_LOGTHREAD(dblp) \ + if (F_ISSET(dblp, DB_AM_THREAD)) \ + (void)__db_mutex_lock(&(dblp)->mutex, -1, \ + (dblp)->dbenv == NULL ? NULL : (dblp)->dbenv->db_yield) +#define UNLOCK_LOGTHREAD(dblp) \ + if (F_ISSET(dblp, DB_AM_THREAD)) \ + (void)__db_mutex_unlock(&(dblp)->mutex, -1); +#define LOCK_LOGREGION(dblp) \ + (void)__db_mutex_lock(&((RLAYOUT *)(dblp)->lp)->lock, \ + (dblp)->fd, (dblp)->dbenv == NULL ? NULL : (dblp)->dbenv->db_yield) +#define UNLOCK_LOGREGION(dblp) \ + (void)__db_mutex_unlock(&((RLAYOUT *)(dblp)->lp)->lock, (dblp)->fd) + +/* + * The per-process table that maps log file-id's to DB structures. + */ +typedef struct __db_entry { + DB *dbp; /* Associated DB structure. */ + int refcount; /* Reference counted. */ + int deleted; /* File was not found during open. */ +} DB_ENTRY; + +/* + * DB_LOG + * Per-process log structure. + */ +struct __db_log { +/* These fields need to be protected for multi-threaded support. */ + db_mutex_t mutex; /* Mutex for thread protection. */ + + DB_ENTRY *dbentry; /* Recovery file-id mapping. */ +#define DB_GROW_SIZE 64 + u_int32_t dbentry_cnt; /* Entries. Grows by DB_GROW_SIZE. */ + +/* + * These fields are always accessed while the region lock is held, so they do + * not have to be protected by the thread lock as well OR, they are only used + * when threads are not being used, i.e. most cursor operations are disallowed + * on threaded logs. + */ + u_int32_t lfname; /* Log file "name". */ + int lfd; /* Log file descriptor. */ + + DB_LSN c_lsn; /* Cursor: current LSN. */ + DBT c_dbt; /* Cursor: return DBT structure. */ + int c_fd; /* Cursor: file descriptor. */ + u_int32_t c_off; /* Cursor: previous record offset. */ + u_int32_t c_len; /* Cursor: current record length. */ + +/* These fields are not protected. */ + LOG *lp; /* Address of the shared LOG. */ + + DB_ENV *dbenv; /* Reference to error information. */ + + void *maddr; /* Address of mmap'd region. */ + void *addr; /* Address of shalloc() region. */ + int fd; /* Region file descriptor. */ + + u_int32_t flags; /* Support the DB_AM_XXX flags. */ +}; + +/* + * HDR -- + * Log record header. + */ +struct __hdr { + u_int32_t prev; /* Previous offset. */ + u_int32_t cksum; /* Current checksum. */ + u_int32_t len; /* Current length. */ +}; + +struct __log_persist { + u_int32_t magic; /* DB_LOGMAGIC */ + u_int32_t version; /* DB_LOGVERSION */ + + u_int32_t lg_max; /* Maximum file size. */ + int mode; /* Log file mode. */ +}; + +/* + * LOG -- + * Shared log region. One of these is allocated in shared memory, + * and describes the log. + */ +struct __log { + RLAYOUT rlayout; /* General region information. */ + + LOGP persist; /* Persistent information. */ + + SH_TAILQ_HEAD(__fq) fq; /* List of file names. */ + + DB_LSN lsn; /* LSN at current file offset. */ + DB_LSN c_lsn; /* LSN of the last checkpoint. */ + DB_LSN s_lsn; /* LSN of the last sync. */ + DB_LSN span_lsn; /* LSN spanning buffer write. */ + + u_int32_t len; /* Length of the last record. */ + + size_t b_off; /* Current offset in the buffer. */ + u_int32_t w_off; /* Current write offset in the file. */ + + time_t chkpt; /* Time of the last checkpoint. */ + u_int32_t written; /* Bytes written since checkpoint. */ + + u_int8_t buf[4 * 1024]; /* Log buffer. */ +}; + +/* + * FNAME -- + * File name and id. + */ +struct __fname { + SH_TAILQ_ENTRY q; /* File name queue. */ + + u_int16_t ref; /* Reference count. */ + + u_int32_t id; /* Logging file id. */ + DBTYPE s_type; /* Saved DB type. */ + + u_int32_t fileid_off; /* Unique file id offset. */ + + size_t name_off; /* Name offset. */ +}; + +#include "log_auto.h" +#include "log_ext.h" +#endif /* _LOG_H_ */ diff --git a/db2/include/log_auto.h b/db2/include/log_auto.h new file mode 100644 index 0000000000..820aac6acf --- /dev/null +++ b/db2/include/log_auto.h @@ -0,0 +1,27 @@ +/* Do not edit: automatically built by dist/db_gen.sh. */ +#ifndef log_AUTO_H +#define log_AUTO_H + +#define DB_log_register (DB_log_BEGIN + 1) + +typedef struct _log_register_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + DBT name; + DBT uid; + u_int32_t id; + DBTYPE ftype; +} __log_register_args; + + +#define DB_log_unregister (DB_log_BEGIN + 2) + +typedef struct _log_unregister_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t id; +} __log_unregister_args; + +#endif diff --git a/db2/include/log_ext.h b/db2/include/log_ext.h new file mode 100644 index 0000000000..d5c9dd6e72 --- /dev/null +++ b/db2/include/log_ext.h @@ -0,0 +1,29 @@ +/* Do not edit: automatically built by dist/distrib. */ +int __log_find __P((DB_ENV *, LOG *, int *)); +int __log_valid __P((DB_ENV *, LOG *, int)); +int __log_register_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + DBT *, DBT *, u_int32_t, DBTYPE)); +int __log_register_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __log_register_read __P((void *, __log_register_args **)); +int __log_unregister_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t)); +int __log_unregister_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __log_unregister_read __P((void *, __log_unregister_args **)); +int __log_init_print __P((DB_ENV *)); +int __log_init_recover __P((DB_ENV *)); +int __log_findckp __P((DB_LOG *, DB_LSN *)); +int __log_get __P((DB_LOG *, DB_LSN *, DBT *, int, int)); +int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, int)); +int __log_name __P((DB_ENV *, int, char **)); +int __log_register_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __log_unregister_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __log_add_logid __P((DB_LOG *, DB *, u_int32_t)); +int __db_fileid_to_db __P((DB_LOG *, DB **, u_int32_t)); +void __log_close_files __P((DB_LOG *)); +void __log_rem_logid __P((DB_LOG *, u_int32_t)); diff --git a/db2/include/mp.h b/db2/include/mp.h new file mode 100644 index 0000000000..4872596f83 --- /dev/null +++ b/db2/include/mp.h @@ -0,0 +1,266 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + * + * @(#)mp.h 10.14 (Sleepycat) 8/18/97 + */ + +struct __bh; typedef struct __bh BH; +struct __db_mpreg; typedef struct __db_mpreg DB_MPREG; +struct __mpool; typedef struct __mpool MPOOL; +struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE; + + /* Default mpool name. */ +#define DB_DEFAULT_MPOOL_FILE "__db_mpool.share" + +/* + * We default to 128K (16 8K pages) if the user doesn't specify, and + * require a minimum of 20K. + */ +#define DB_CACHESIZE_DEF (128 * 1024) +#define DB_CACHESIZE_MIN ( 20 * 1024) + +/* Macro to return per-process address, offsets. */ +#define ADDR(base, offset) ((void *)((u_int8_t *)((base)->addr) + offset)) +#define OFFSET(base, p) ((u_int8_t *)(p) - (u_int8_t *)(base)->addr) + +#define INVALID 0 /* Invalid shared memory offset. */ +#define TEMPORARY "<tmp>" /* Temporary file name. */ + +/* + * There are two kinds of locks in the mpool code. The first is the region + * lock, used to serialize modifications to all data structures. The second + * is a per-buffer header lock. The locking order is as follows: + * + * Process searching for a buffer: + * Acquire the region lock. + * Find the buffer header. + * Increment the reference count (guarantee the buffer stays). + * If the BH_LOCKED flag is set: + * Release the region lock. + * Acquire the buffer lock. + * Release the buffer lock. + * Acquire the region lock. + * Return the buffer. + * + * Process reading/writing a buffer: + * Acquire the region lock. + * Find/create the buffer header. + * If reading, increment the reference count (guarantee the buffer stays). + * Set the BH_LOCKED flag. + * Acquire the buffer lock (guaranteed not to block). + * Release the region lock. + * Do the I/O and/or initialize buffer contents. + * Acquire the region lock. + * Clear the BH_LOCKED flag. + * Release the region lock. + * Release the buffer lock. + * If reading, return the buffer. + * + * Pointers to DB_MPOOL, MPOOL, DB_MPOOLFILE and MPOOLFILE structures are not + * reacquired when a region lock is reacquired because they couldn't have been + * closed/discarded and because they never move in memory. + */ +#define LOCKINIT(dbmp, mutexp) \ + if (F_ISSET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION)) \ + (void)__db_mutex_init(mutexp, (dbmp)->fd) + +#define LOCKHANDLE(dbmp, mutexp) \ + if (F_ISSET(dbmp, MP_LOCKHANDLE)) \ + (void)__db_mutex_lock(mutexp, (dbmp)->fd, \ + (dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield) +#define UNLOCKHANDLE(dbmp, mutexp) \ + if (F_ISSET(dbmp, MP_LOCKHANDLE)) \ + (void)__db_mutex_unlock(mutexp, (dbmp)->fd) + +#define LOCKREGION(dbmp) \ + if (F_ISSET(dbmp, MP_LOCKREGION)) \ + (void)__db_mutex_lock(&((RLAYOUT *)(dbmp)->mp)->lock, \ + (dbmp)->fd, \ + (dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield) +#define UNLOCKREGION(dbmp) \ + if (F_ISSET(dbmp, MP_LOCKREGION)) \ + (void)__db_mutex_unlock(&((RLAYOUT *)(dbmp)->mp)->lock, \ + (dbmp)->fd) + +#define LOCKBUFFER(dbmp, bhp) \ + if (F_ISSET(dbmp, MP_LOCKREGION)) \ + (void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->fd, \ + (dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield) +#define UNLOCKBUFFER(dbmp, bhp) \ + if (F_ISSET(dbmp, MP_LOCKREGION)) \ + (void)__db_mutex_unlock(&(bhp)->mutex, (dbmp)->fd) + +/* + * DB_MPOOL -- + * Per-process memory pool structure. + */ +struct __db_mpool { +/* These fields need to be protected for multi-threaded support. */ + db_mutex_t mutex; /* Structure lock. */ + + /* List of pgin/pgout routines. */ + LIST_HEAD(__db_mpregh, __db_mpreg) dbregq; + + /* List of DB_MPOOLFILE's. */ + TAILQ_HEAD(__db_mpoolfileh, __db_mpoolfile) dbmfq; + +/* These fields are not protected. */ + DB_ENV *dbenv; /* Reference to error information. */ + + MPOOL *mp; /* Address of the shared MPOOL. */ + + void *maddr; /* Address of mmap'd region. */ + void *addr; /* Address of shalloc() region. */ + + DB_HASHTAB *htab; /* Hash table of bucket headers. */ + + int fd; /* Underlying mmap'd fd. */ + + +#define MP_ISPRIVATE 0x01 /* Private, so local memory. */ +#define MP_LOCKHANDLE 0x02 /* Threaded, lock handles and region. */ +#define MP_LOCKREGION 0x04 /* Concurrent access, lock region. */ + u_int32_t flags; +}; + +/* + * DB_MPREG -- + * DB_MPOOL registry of pgin/pgout functions. + */ +struct __db_mpreg { + LIST_ENTRY(__db_mpreg) q; /* Linked list. */ + + int ftype; /* File type. */ + /* Pgin, pgout routines. */ + int (*pgin) __P((db_pgno_t, void *, DBT *)); + int (*pgout) __P((db_pgno_t, void *, DBT *)); +}; + +/* + * DB_MPOOLFILE -- + * Per-process DB_MPOOLFILE information. + */ +struct __db_mpoolfile { +/* These fields need to be protected for multi-threaded support. */ + db_mutex_t mutex; /* Structure lock. */ + + int fd; /* Underlying file descriptor. */ + + u_int32_t pinref; /* Pinned block reference count. */ + +/* These fields are not protected. */ + TAILQ_ENTRY(__db_mpoolfile) q; /* Linked list of DB_MPOOLFILE's. */ + + char *path; /* Initial file path. */ + DB_MPOOL *dbmp; /* Overlying DB_MPOOL. */ + MPOOLFILE *mfp; /* Underlying MPOOLFILE. */ + + void *addr; /* Address of mmap'd region. */ + size_t len; /* Length of mmap'd region. */ + +#define MP_PATH_ALLOC 0x01 /* Path is allocated memory. */ +#define MP_PATH_TEMP 0x02 /* Backing file is a temporary. */ +#define MP_READONLY 0x04 /* File is readonly. */ + u_int32_t flags; +}; + +/* + * MPOOL -- + * Shared memory pool region. One of these is allocated in shared + * memory, and describes the pool. + */ +struct __mpool { + RLAYOUT rlayout; /* General region information. */ + + SH_TAILQ_HEAD(__bhq) bhq; /* LRU list of buckets. */ + SH_TAILQ_HEAD(__bhfq) bhfq; /* Free buckets. */ + SH_TAILQ_HEAD(__mpfq) mpfq; /* List of MPOOLFILEs. */ + + /* + * We make the assumption that the early pages of the file are far + * more likely to be retrieved than the later pages, which means + * that the top bits are more interesting for hashing since they're + * less likely to collide. On the other hand, since 512 4K pages + * represents a 2MB file, only the bottom 9 bits of the page number + * are likely to be set. We XOR in the offset in the MPOOL of the + * MPOOLFILE that backs this particular page, since that should also + * be unique for the page. + */ +#define BUCKET(mp, mf_offset, pgno) \ + (((pgno) ^ ((mf_offset) << 9)) % (mp)->htab_buckets) + + size_t htab; /* Hash table offset. */ + size_t htab_buckets; /* Number of hash table entries. */ + + DB_LSN lsn; /* Maximum checkpoint LSN. */ + int lsn_cnt; /* Checkpoint buffers left to write. */ + + DB_MPOOL_STAT stat; /* Global mpool statistics. */ + +#define MP_LSN_RETRY 0x01 /* Retry all BH_WRITE buffers. */ + u_int32_t flags; +}; + +/* + * MPOOLFILE -- + * Shared DB_MPOOLFILE information. + */ +struct __mpoolfile { + SH_TAILQ_ENTRY q; /* List of MPOOLFILEs */ + + u_int32_t ref; /* Reference count. */ + + int ftype; /* File type. */ + int can_mmap; /* If the file can be mmap'd. */ + + int lsn_off; /* Page's LSN offset. */ + + size_t path_off; /* File name location. */ + + size_t fileid_off; /* File identification location. */ + + size_t pgcookie_len; /* Pgin/pgout cookie length. */ + size_t pgcookie_off; /* Pgin/pgout cookie location. */ + + int lsn_cnt; /* Checkpoint buffers left to write. */ + + DB_MPOOL_FSTAT stat; /* Per-file mpool statistics. */ +}; + +/* + * BH -- + * Buffer header. + */ +struct __bh { + db_mutex_t mutex; /* Structure lock. */ + + u_int16_t ref; /* Reference count. */ + +#define BH_CALLPGIN 0x001 /* Page needs to be reworked... */ +#define BH_DIRTY 0x002 /* Page was modified. */ +#define BH_DISCARD 0x004 /* Page is useless. */ +#define BH_LOCKED 0x008 /* Page is locked (I/O in progress). */ +#define BH_TRASH 0x010 /* Page is garbage. */ +#define BH_WRITE 0x020 /* Page scheduled for writing. */ + u_int16_t flags; + + SH_TAILQ_ENTRY q; /* LRU list of bucket headers. */ + SH_TAILQ_ENTRY mq; /* MPOOLFILE list of bucket headers. */ + + db_pgno_t pgno; /* Underlying MPOOLFILE page number. */ + size_t mf_offset; /* Associated MPOOLFILE offset. */ + + /* + * !!! + * This array must be size_t aligned -- the DB access methods put PAGE + * and other structures into it, and expect to be able to access them + * directly. (We guarantee size_t alignment in the db_mpool(3) manual + * page as well.) + */ + u_int8_t buf[1]; /* Variable length data. */ +}; + +#include "mp_ext.h" diff --git a/db2/include/mp_ext.h b/db2/include/mp_ext.h new file mode 100644 index 0000000000..3934c130a8 --- /dev/null +++ b/db2/include/mp_ext.h @@ -0,0 +1,14 @@ +/* Do not edit: automatically built by dist/distrib. */ +int __memp_bhwrite + __P((DB_MPOOL *, MPOOLFILE *, BH *, int *, int *)); +int __memp_pgread __P((DB_MPOOLFILE *, BH *, int)); +int __memp_pgwrite __P((DB_MPOOLFILE *, BH *, int *, int *)); +int __memp_pg __P((DB_MPOOLFILE *, BH *, int)); +void __memp_bhfree __P((DB_MPOOL *, MPOOLFILE *, BH *, int)); +int __memp_fopen __P((DB_MPOOL *, const char *, int, int, + int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **)); +void __memp_debug __P((DB_MPOOL *, FILE *, int)); +int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *)); +int __memp_ropen + __P((DB_MPOOL *, const char *, size_t, int, int)); +int __memp_rclose __P((DB_MPOOL *)); diff --git a/db2/include/mutex_ext.h b/db2/include/mutex_ext.h new file mode 100644 index 0000000000..ff46b6a404 --- /dev/null +++ b/db2/include/mutex_ext.h @@ -0,0 +1,4 @@ +/* Do not edit: automatically built by dist/distrib. */ +void __db_mutex_init __P((db_mutex_t *, off_t)); +int __db_mutex_lock __P((db_mutex_t *, int, int (*)(void))); +int __db_mutex_unlock __P((db_mutex_t *, int)); diff --git a/db2/include/os_ext.h b/db2/include/os_ext.h new file mode 100644 index 0000000000..59d72acf12 --- /dev/null +++ b/db2/include/os_ext.h @@ -0,0 +1,19 @@ +/* Do not edit: automatically built by dist/distrib. */ +int __db_abspath __P((const char *)); +char *__db_rpath __P((const char *)); +int __db_dir __P((DB_ENV *, const char *, char ***, int *)); +void __db_dirf __P((DB_ENV *, char **, int)); +int __db_fileid __P((DB_ENV *, const char *, int, u_int8_t *)); +int __db_lseek __P((int, size_t, db_pgno_t, u_long, int)); +int __db_mmap __P((int, size_t, int, int, void *)); +int __db_munmap __P((void *, size_t)); +int __db_oflags __P((int)); +int __db_fdopen __P((const char *, int, int, int, int *)); +int __db_fsync __P((int)); +int __db_close __P((int)); +int __db_read __P((int, void *, size_t, ssize_t *)); +int __db_write __P((int, void *, size_t, ssize_t *)); +int __db_sleep __P((u_long, u_long)); +int __db_exists __P((const char *, int *)); +int __db_stat __P((DB_ENV *, const char *, int, off_t *, off_t *)); +int __db_unlink __P((const char *)); diff --git a/db2/include/queue.h b/db2/include/queue.h new file mode 100644 index 0000000000..0909c86c60 --- /dev/null +++ b/db2/include/queue.h @@ -0,0 +1,275 @@ +/* BSDI $Id$ */ + +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + */ + +#ifndef _SYS_QUEUE_H_ +#define _SYS_QUEUE_H_ + +/* + * This file defines three types of data structures: lists, tail queues, + * and circular queues. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may only be traversed in the forward direction. + * + * A circle queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or after + * an existing element, at the head of the list, or at the end of the list. + * A circle queue may be traversed in either direction, but has a more + * complex end of list detection. + * + * For details on the use of these macros, see the queue(3) manual page. + */ + +/* + * List definitions. + */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +#define LIST_FIRST(head) ((head)->lh_first) +#define LIST_NEXT(elm, field) ((elm)->field.le_next) +#define LIST_END(head) NULL + +/* + * List functions. + */ +#define LIST_INIT(head) { \ + (head)->lh_first = NULL; \ +} + +#define LIST_INSERT_AFTER(listelm, elm, field) do { \ + if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ + (listelm)->field.le_next->field.le_prev = \ + &(elm)->field.le_next; \ + (listelm)->field.le_next = (elm); \ + (elm)->field.le_prev = &(listelm)->field.le_next; \ +} while (0) + +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + (elm)->field.le_next = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &(elm)->field.le_next; \ +} while (0) + +#define LIST_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.le_next = (head)->lh_first) != NULL) \ + (head)->lh_first->field.le_prev = &(elm)->field.le_next;\ + (head)->lh_first = (elm); \ + (elm)->field.le_prev = &(head)->lh_first; \ +} while (0) + +#define LIST_REMOVE(elm, field) do { \ + if ((elm)->field.le_next != NULL) \ + (elm)->field.le_next->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = (elm)->field.le_next; \ +} while (0) + +/* + * Tail queue definitions. + */ +#define TAILQ_HEAD(name, type) \ +struct name { \ + struct type *tqh_first; /* first element */ \ + struct type **tqh_last; /* addr of last next element */ \ +} + +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ +} + +#define TAILQ_FIRST(head) ((head)->tqh_first) +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) +#define TAILQ_END(head) NULL + +/* + * Tail queue functions. + */ +#define TAILQ_INIT(head) do { \ + (head)->tqh_first = NULL; \ + (head)->tqh_last = &(head)->tqh_first; \ +} while (0) + +#define TAILQ_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \ + (head)->tqh_first->field.tqe_prev = \ + &(elm)->field.tqe_next; \ + else \ + (head)->tqh_last = &(elm)->field.tqe_next; \ + (head)->tqh_first = (elm); \ + (elm)->field.tqe_prev = &(head)->tqh_first; \ +} while (0) + +#define TAILQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.tqe_next = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &(elm)->field.tqe_next; \ +} while (0) + +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\ + (elm)->field.tqe_next->field.tqe_prev = \ + &(elm)->field.tqe_next; \ + else \ + (head)->tqh_last = &(elm)->field.tqe_next; \ + (listelm)->field.tqe_next = (elm); \ + (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \ +} while (0) + +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + (elm)->field.tqe_next = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \ +} while (0) + +#define TAILQ_REMOVE(head, elm, field) do { \ + if (((elm)->field.tqe_next) != NULL) \ + (elm)->field.tqe_next->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + *(elm)->field.tqe_prev = (elm)->field.tqe_next; \ +} while (0) + +/* + * Circular queue definitions. + */ +#define CIRCLEQ_HEAD(name, type) \ +struct name { \ + struct type *cqh_first; /* first element */ \ + struct type *cqh_last; /* last element */ \ +} + +#define CIRCLEQ_ENTRY(type) \ +struct { \ + struct type *cqe_next; /* next element */ \ + struct type *cqe_prev; /* previous element */ \ +} + +#define CIRCLEQ_FIRST(head) ((head)->cqh_first) +#define CIRCLEQ_LAST(head) ((head)->cqh_last) +#define CIRCLEQ_END(head) ((void *)(head)) +#define CIRCLEQ_NEXT(elm, field) ((elm)->field.cqe_next) +#define CIRCLEQ_PREV(elm, field) ((elm)->field.cqe_prev) + +/* + * Circular queue functions. + */ +#define CIRCLEQ_INIT(head) do { \ + (head)->cqh_first = (void *)(head); \ + (head)->cqh_last = (void *)(head); \ +} while (0) + +#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ + (elm)->field.cqe_next = (listelm)->field.cqe_next; \ + (elm)->field.cqe_prev = (listelm); \ + if ((listelm)->field.cqe_next == (void *)(head)) \ + (head)->cqh_last = (elm); \ + else \ + (listelm)->field.cqe_next->field.cqe_prev = (elm); \ + (listelm)->field.cqe_next = (elm); \ +} while (0) + +#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do { \ + (elm)->field.cqe_next = (listelm); \ + (elm)->field.cqe_prev = (listelm)->field.cqe_prev; \ + if ((listelm)->field.cqe_prev == (void *)(head)) \ + (head)->cqh_first = (elm); \ + else \ + (listelm)->field.cqe_prev->field.cqe_next = (elm); \ + (listelm)->field.cqe_prev = (elm); \ +} while (0) + +#define CIRCLEQ_INSERT_HEAD(head, elm, field) do { \ + (elm)->field.cqe_next = (head)->cqh_first; \ + (elm)->field.cqe_prev = (void *)(head); \ + if ((head)->cqh_last == (void *)(head)) \ + (head)->cqh_last = (elm); \ + else \ + (head)->cqh_first->field.cqe_prev = (elm); \ + (head)->cqh_first = (elm); \ +} while (0) + +#define CIRCLEQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.cqe_next = (void *)(head); \ + (elm)->field.cqe_prev = (head)->cqh_last; \ + if ((head)->cqh_first == (void *)(head)) \ + (head)->cqh_first = (elm); \ + else \ + (head)->cqh_last->field.cqe_next = (elm); \ + (head)->cqh_last = (elm); \ +} while (0) + +#define CIRCLEQ_REMOVE(head, elm, field) do { \ + if ((elm)->field.cqe_next == (void *)(head)) \ + (head)->cqh_last = (elm)->field.cqe_prev; \ + else \ + (elm)->field.cqe_next->field.cqe_prev = \ + (elm)->field.cqe_prev; \ + if ((elm)->field.cqe_prev == (void *)(head)) \ + (head)->cqh_first = (elm)->field.cqe_next; \ + else \ + (elm)->field.cqe_prev->field.cqe_next = \ + (elm)->field.cqe_next; \ +} while (0) +#endif /* !_SYS_QUEUE_H_ */ diff --git a/db2/include/shqueue.h b/db2/include/shqueue.h new file mode 100644 index 0000000000..c3e2f4aecc --- /dev/null +++ b/db2/include/shqueue.h @@ -0,0 +1,361 @@ +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)shqueue.h 8.11 (Sleepycat) 7/27/97 + */ + +#ifndef _SYS_SHQUEUE_H_ +#define _SYS_SHQUEUE_H_ + +/* + * This file defines three types of data structures: lists, tail queues, and + * circular queues, similarly to the include file <sys/queue.h>. + * + * The difference is that this set of macros can be used for structures that + * reside in shared memory that may be mapped at different addresses in each + * process. In most cases, the macros for shared structures exactly mirror + * the normal macros, although the macro calls require an additional type + * parameter, only used by the HEAD and ENTRY macros of the standard macros. + * + * For details on the use of these macros, see the queue(3) manual page. + */ + +/* + * Shared list definitions. + */ +#define SH_LIST_HEAD(name) \ +struct name { \ + ssize_t slh_first; /* first element */ \ +} + +#define SH_LIST_ENTRY \ +struct { \ + ssize_t sle_next; /* relative offset next element */ \ + ssize_t sle_prev; /* relative offset of prev element */ \ +} + +/* + * Shared list functions. Since we use relative offsets for pointers, + * 0 is a valid offset. Therefore, we use -1 to indicate end of list. + * The macros ending in "P" return pointers without checking for end + * of list, the others check for end of list and evaluate to either a + * pointer or NULL. + */ + +#define SH_LIST_FIRSTP(head, type) \ + ((struct type *)(((u_int8_t *)(head)) + (head)->slh_first)) + +#define SH_LIST_FIRST(head, type) \ + ((head)->slh_first == -1 ? NULL : \ + ((struct type *)(((u_int8_t *)(head)) + (head)->slh_first))) + +#define SH_LIST_NEXTP(elm, field, type) \ + ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.sle_next)) + +#define SH_LIST_NEXT(elm, field, type) \ + ((elm)->field.sle_next == -1 ? NULL : \ + ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.sle_next))) + +#define SH_LIST_PREV(elm, field) \ + ((ssize_t *)(((u_int8_t *)(elm)) + (elm)->field.sle_prev)) + +#define SH_PTR_TO_OFF(src, dest) \ + ((ssize_t)(((u_int8_t *)(dest)) - ((u_int8_t *)(src)))) + +#define SH_LIST_END(head) NULL + +/* + * Take the element's next pointer and calculate what the corresponding + * Prev pointer should be -- basically it is the negation plus the offset + * of the next field in the structure. + */ +#define SH_LIST_NEXT_TO_PREV(elm, field) \ + (-(elm)->field.sle_next + SH_PTR_TO_OFF(elm, &(elm)->field.sle_next)) + +#define SH_LIST_INIT(head) (head)->slh_first = -1 + +#define SH_LIST_INSERT_AFTER(listelm, elm, field, type) do { \ + if ((listelm)->field.sle_next != -1) { \ + (elm)->field.sle_next = SH_PTR_TO_OFF(elm, \ + SH_LIST_NEXTP(listelm, field, type)); \ + SH_LIST_NEXTP(listelm, field, type)->field.sle_prev = \ + SH_LIST_NEXT_TO_PREV(elm, field); \ + } else \ + (elm)->field.sle_next = -1; \ + (listelm)->field.sle_next = SH_PTR_TO_OFF(listelm, elm); \ + (elm)->field.sle_prev = SH_LIST_NEXT_TO_PREV(listelm, field); \ +} while (0) + +#define SH_LIST_INSERT_HEAD(head, elm, field, type) do { \ + if ((head)->slh_first != -1) { \ + (elm)->field.sle_next = \ + (head)->slh_first - SH_PTR_TO_OFF(head, elm); \ + SH_LIST_FIRSTP(head, type)->field.sle_prev = \ + SH_LIST_NEXT_TO_PREV(elm, field); \ + } else \ + (elm)->field.sle_next = -1; \ + (head)->slh_first = SH_PTR_TO_OFF(head, elm); \ + (elm)->field.sle_prev = SH_PTR_TO_OFF(elm, &(head)->slh_first); \ +} while (0) + +#define SH_LIST_REMOVE(elm, field, type) do { \ + if ((elm)->field.sle_next != -1) { \ + SH_LIST_NEXTP(elm, field, type)->field.sle_prev = \ + (elm)->field.sle_prev - (elm)->field.sle_next; \ + *SH_LIST_PREV(elm, field) += (elm)->field.sle_next; \ + } else \ + *SH_LIST_PREV(elm, field) = -1; \ +} while (0) + +/* + * Shared tail queue definitions. + */ +#define SH_TAILQ_HEAD(name) \ +struct name { \ + ssize_t stqh_first; /* relative offset of first element */ \ + ssize_t stqh_last; /* relative offset of last's next */ \ +} + +#define SH_TAILQ_ENTRY \ +struct { \ + ssize_t stqe_next; /* relative offset of next element */ \ + ssize_t stqe_prev; /* relative offset of prev's next */ \ +} + +/* + * Shared tail queue functions. + */ +#define SH_TAILQ_FIRSTP(head, type) \ + ((struct type *)((u_int8_t *)(head) + (head)->stqh_first)) + +#define SH_TAILQ_FIRST(head, type) \ + ((head)->stqh_first == -1 ? NULL : SH_TAILQ_FIRSTP(head, type)) + +#define SH_TAILQ_NEXTP(elm, field, type) \ + ((struct type *)((u_int8_t *)(elm) + (elm)->field.stqe_next)) + +#define SH_TAILQ_NEXT(elm, field, type) \ + ((elm)->field.stqe_next == -1 ? NULL : SH_TAILQ_NEXTP(elm, field, type)) + +#define SH_TAILQ_PREVP(elm, field) \ + ((ssize_t *)((u_int8_t *)(elm) + (elm)->field.stqe_prev)) + +#define SH_TAILQ_LAST(head) \ + ((ssize_t *)(((u_int8_t *)(head)) + (head)->stqh_last)) + +#define SH_TAILQ_NEXT_TO_PREV(elm, field) \ + (-(elm)->field.stqe_next + SH_PTR_TO_OFF(elm, &(elm)->field.stqe_next)) + +#define SH_TAILQ_END(head) NULL + +#define SH_TAILQ_INIT(head) { \ + (head)->stqh_first = -1; \ + (head)->stqh_last = SH_PTR_TO_OFF(head, &(head)->stqh_first); \ +} + +#define SH_TAILQ_INSERT_HEAD(head, elm, field, type) do { \ + if ((head)->stqh_first != -1) { \ + (elm)->field.stqe_next = \ + (head)->stqh_first - SH_PTR_TO_OFF(head, elm); \ + SH_TAILQ_FIRSTP(head, type)->field.stqe_prev = \ + SH_TAILQ_NEXT_TO_PREV(elm, field); \ + } else { \ + (elm)->field.stqe_next = -1; \ + (head)->stqh_last = \ + SH_PTR_TO_OFF(head, &(elm)->field.stqe_next); \ + } \ + (head)->stqh_first = SH_PTR_TO_OFF(head, elm); \ + (elm)->field.stqe_prev = \ + SH_PTR_TO_OFF(elm, &(head)->stqh_first); \ +} while (0) + +#define SH_TAILQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.stqe_next = -1; \ + (elm)->field.stqe_prev = \ + -SH_PTR_TO_OFF(head, elm) + (head)->stqh_last; \ + if ((head)->stqh_last == \ + SH_PTR_TO_OFF((head), &(head)->stqh_first)) \ + (head)->stqh_first = SH_PTR_TO_OFF(head, elm); \ + else \ + *SH_TAILQ_LAST(head) = -(head)->stqh_last + \ + SH_PTR_TO_OFF((elm), &(elm)->field.stqe_next) + \ + SH_PTR_TO_OFF(head, elm); \ + (head)->stqh_last = \ + SH_PTR_TO_OFF(head, &((elm)->field.stqe_next)); \ +} while (0) + +#define SH_TAILQ_INSERT_AFTER(head, listelm, elm, field, type) do { \ + if ((listelm)->field.stqe_next != -1) { \ + (elm)->field.stqe_next = (listelm)->field.stqe_next - \ + SH_PTR_TO_OFF(listelm, elm); \ + SH_TAILQ_NEXTP(listelm, field, type)->field.stqe_prev = \ + SH_TAILQ_NEXT_TO_PREV(elm, field); \ + } else { \ + (elm)->field.stqe_next = -1; \ + (head)->stqh_last = \ + SH_PTR_TO_OFF(head, &elm->field.stqe_next); \ + } \ + (listelm)->field.stqe_next = SH_PTR_TO_OFF(listelm, elm); \ + (elm)->field.stqe_prev = SH_TAILQ_NEXT_TO_PREV(listelm, field); \ +} while (0) + +#define SH_TAILQ_REMOVE(head, elm, field, type) do { \ + if ((elm)->field.stqe_next != -1) { \ + SH_TAILQ_NEXTP(elm, field, type)->field.stqe_prev = \ + (elm)->field.stqe_prev + \ + SH_PTR_TO_OFF(SH_TAILQ_NEXTP(elm, \ + field, type), elm); \ + *SH_TAILQ_PREVP(elm, field) += elm->field.stqe_next; \ + } else { \ + (head)->stqh_last = (elm)->field.stqe_prev + \ + SH_PTR_TO_OFF(head, elm); \ + *SH_TAILQ_PREVP(elm, field) = -1; \ + } \ +} while (0) + +/* + * Shared circular queue definitions. + */ +#define SH_CIRCLEQ_HEAD(name) \ +struct name { \ + ssize_t scqh_first; /* first element */ \ + ssize_t scqh_last; /* last element */ \ +} + +#define SH_CIRCLEQ_ENTRY \ +struct { \ + ssize_t scqe_next; /* next element */ \ + ssize_t scqe_prev; /* previous element */ \ +} + +/* + * Shared circular queue functions. + */ +#define SH_CIRCLEQ_FIRSTP(head, type) \ + ((struct type *)(((u_int8_t *)(head)) + (head)->scqh_first)) + +#define SH_CIRCLEQ_FIRST(head, type) \ + ((head)->scqh_first == -1 ? \ + (void *)head : SH_CIRCLEQ_FIRSTP(head, type)) + +#define SH_CIRCLEQ_LASTP(head, type) \ + ((struct type *)(((u_int8_t *)(head)) + (head)->scqh_last)) + +#define SH_CIRCLEQ_LAST(head, type) \ + ((head)->scqh_last == -1 ? (void *)head : SH_CIRCLEQ_LASTP(head, type)) + +#define SH_CIRCLEQ_NEXTP(elm, field, type) \ + ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.scqe_next)) + +#define SH_CIRCLEQ_NEXT(head, elm, field, type) \ + ((elm)->field.scqe_next == SH_PTR_TO_OFF(elm, head) ? \ + (void *)head : SH_CIRCLEQ_NEXTP(elm, field, type)) + +#define SH_CIRCLEQ_PREVP(elm, field, type) \ + ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.scqe_prev)) + +#define SH_CIRCLEQ_PREV(head, elm, field, type) \ + ((elm)->field.scqe_prev == SH_PTR_TO_OFF(elm, head) ? \ + (void *)head : SH_CIRCLEQ_PREVP(elm, field, type)) + +#define SH_CIRCLEQ_END(head) ((void *)(head)) + +#define SH_CIRCLEQ_INIT(head) { \ + (head)->scqh_first = 0; \ + (head)->scqh_last = 0; \ +} + +#define SH_CIRCLEQ_INSERT_AFTER(head, listelm, elm, field, type) do { \ + (elm)->field.scqe_prev = SH_PTR_TO_OFF(elm, listelm); \ + (elm)->field.scqe_next = (listelm)->field.scqe_next + \ + (elm)->field.scqe_prev; \ + if (SH_CIRCLEQ_NEXTP(listelm, field, type) == (void *)head) \ + (head)->scqh_last = SH_PTR_TO_OFF(head, elm); \ + else \ + SH_CIRCLEQ_NEXTP(listelm, \ + field, type)->field.scqe_prev = \ + SH_PTR_TO_OFF(SH_CIRCLEQ_NEXTP(listelm, \ + field, type), elm); \ + (listelm)->field.scqe_next = -(elm)->field.scqe_prev; \ +} while (0) + +#define SH_CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field, type) do { \ + (elm)->field.scqe_next = SH_PTR_TO_OFF(elm, listelm); \ + (elm)->field.scqe_prev = (elm)->field.scqe_next - \ + SH_CIRCLEQ_PREVP(listelm, field, type)->field.scqe_next;\ + if (SH_CIRCLEQ_PREVP(listelm, field, type) == (void *)(head)) \ + (head)->scqh_first = SH_PTR_TO_OFF(head, elm); \ + else \ + SH_CIRCLEQ_PREVP(listelm, \ + field, type)->field.scqe_next = \ + SH_PTR_TO_OFF(SH_CIRCLEQ_PREVP(listelm, \ + field, type), elm); \ + (listelm)->field.scqe_prev = -(elm)->field.scqe_next; \ +} while (0) + +#define SH_CIRCLEQ_INSERT_HEAD(head, elm, field, type) do { \ + (elm)->field.scqe_prev = SH_PTR_TO_OFF(elm, head); \ + (elm)->field.scqe_next = (head)->scqh_first + \ + (elm)->field.scqe_prev; \ + if ((head)->scqh_last == 0) \ + (head)->scqh_last = -(elm)->field.scqe_prev; \ + else \ + SH_CIRCLEQ_FIRSTP(head, type)->field.scqe_prev = \ + SH_PTR_TO_OFF(SH_CIRCLEQ_FIRSTP(head, type), elm); \ + (head)->scqh_first = -(elm)->field.scqe_prev; \ +} while (0) + +#define SH_CIRCLEQ_INSERT_TAIL(head, elm, field, type) do { \ + (elm)->field.scqe_next = SH_PTR_TO_OFF(elm, head); \ + (elm)->field.scqe_prev = (head)->scqh_last + \ + (elm)->field.scqe_next; \ + if ((head)->scqh_first == 0) \ + (head)->scqh_first = -(elm)->field.scqe_next; \ + else \ + SH_CIRCLEQ_LASTP(head, type)->field.scqe_next = \ + SH_PTR_TO_OFF(SH_CIRCLEQ_LASTP(head, type), elm); \ + (head)->scqh_last = -(elm)->field.scqe_next; \ +} while (0) + +#define SH_CIRCLEQ_REMOVE(head, elm, field, type) do { \ + if (SH_CIRCLEQ_NEXTP(elm, field, type) == (void *)(head)) \ + (head)->scqh_last += (elm)->field.scqe_prev; \ + else \ + SH_CIRCLEQ_NEXTP(elm, field, type)->field.scqe_prev += \ + (elm)->field.scqe_prev; \ + if (SH_CIRCLEQ_PREVP(elm, field, type) == (void *)(head)) \ + (head)->scqh_first += (elm)->field.scqe_next; \ + else \ + SH_CIRCLEQ_PREVP(elm, field, type)->field.scqe_next += \ + (elm)->field.scqe_next; \ +} while (0) +#endif /* !_SYS_SHQUEUE_H_ */ diff --git a/db2/include/txn.h b/db2/include/txn.h new file mode 100644 index 0000000000..f4e0999b36 --- /dev/null +++ b/db2/include/txn.h @@ -0,0 +1,112 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + * + * @(#)txn.h 10.6 (Sleepycat) 7/29/97 + */ +#ifndef _TXN_H_ +#define _TXN_H_ + +/* + * The name of the transaction shared memory region is DEFAULT_TXN_FILE and + * the region is always created group RW of the group owning the directory. + */ +#define DEFAULT_TXN_FILE "__db_txn.share" +#define TXN_INVALID 0xffffffff /* Maximum number of txn ids. */ +#define TXN_MINIMUM 0x80000000 /* First transaction id */ + +/* + * Transaction type declarations. + */ + +/* + * Internal data maintained in shared memory for each transaction. + */ +typedef struct __txn_detail { + u_int32_t txnid; /* current transaction id + used to link free list also */ + DB_LSN last_lsn; /* last lsn written for this txn */ + DB_LSN begin_lsn; /* lsn of begin record */ + size_t last_lock; /* offset in lock region of last lock + for this transaction. */ +#define TXN_UNALLOC 0 +#define TXN_RUNNING 1 +#define TXN_ABORTED 2 +#define TXN_PREPARED 3 + u_int32_t status; /* status of the transaction */ +} TXN_DETAIL; + +/* + * The transaction manager encapsulates the transaction system. It contains + * references to the log and lock managers as well as the state that keeps + * track of the shared memory region. + */ +struct __db_txnmgr { +/* These fields need to be protected for multi-threaded support. */ + db_mutex_t mutex; /* Synchronization. */ + /* list of active transactions */ + TAILQ_HEAD(_chain, __db_txn) txn_chain; + +/* These fields are not protected. */ + DB_ENV *dbenv; /* Environment. */ + int (*recover) /* Recovery dispatch routine */ + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + int fd; /* mapped file descriptor */ + u_int flags; /* DB_TXN_NOSYNC, DB_THREAD */ + size_t reg_size; /* how large we think the region is */ + DB_TXNREGION *region; /* address of shared memory region */ +}; + +/* + * Layout of the shared memory region. + * + */ +struct __db_txnregion { + RLAYOUT hdr; /* Shared memory region header. */ + u_int32_t magic; /* transaction magic number */ + u_int32_t version; /* version number */ + u_int32_t maxtxns; /* maximum number of active txns */ + u_int32_t last_txnid; /* last transaction id given out */ + u_int32_t free_txn; /* head of transaction free list */ + DB_LSN pending_ckp; /* last checkpoint did not finish */ + DB_LSN last_ckp; /* lsn of the last checkpoint */ + time_t time_ckp; /* time of last checkpoint */ + u_int32_t logtype; /* type of logging */ + u_int32_t locktype; /* lock type */ + u_int32_t naborts; /* number of aborted transactions */ + u_int32_t ncommits; /* number of committed transactions */ + u_int32_t nbegins; /* number of begun transactions */ + TXN_DETAIL table[1]; /* array of TXN structures */ +}; + +#define TXN_REGION_SIZE(N) \ + (sizeof(DB_TXNREGION) + N * sizeof(DB_TXN)) + +/* Macros to lock/unlock the region and threads. */ +#define LOCK_TXNTHREAD(tmgrp) \ + if (F_ISSET(tmgrp, DB_THREAD)) \ + (void)__db_mutex_lock(&(tmgrp)->mutex, -1, \ + (tmgrp)->dbenv == NULL ? NULL : (tmgrp)->dbenv->db_yield) +#define UNLOCK_TXNTHREAD(tmgrp) \ + if (F_ISSET(tmgrp, DB_THREAD)) \ + (void)__db_mutex_unlock(&(tmgrp)->mutex, -1) + +#define LOCK_TXNREGION(tmgrp) \ + (void)__db_mutex_lock(&(tmgrp)->region->hdr.lock,(tmgrp)->fd, \ + (tmgrp)->dbenv == NULL ? NULL : (tmgrp)->dbenv->db_yield) +#define UNLOCK_TXNREGION(tmgrp) \ + (void)__db_mutex_unlock(&(tmgrp)->region->hdr.lock, (tmgrp)->fd) + +/* + * Log record types. + */ +#define TXN_BEGIN 1 +#define TXN_COMMIT 2 +#define TXN_PREPARE 3 +#define TXN_CHECKPOINT 4 + +#include "txn_auto.h" +#include "txn_ext.h" +#endif /* !_TXN_H_ */ diff --git a/db2/include/txn_auto.h b/db2/include/txn_auto.h new file mode 100644 index 0000000000..fd5a456115 --- /dev/null +++ b/db2/include/txn_auto.h @@ -0,0 +1,25 @@ +/* Do not edit: automatically built by dist/db_gen.sh. */ +#ifndef txn_AUTO_H +#define txn_AUTO_H + +#define DB_txn_regop (DB_txn_BEGIN + 1) + +typedef struct _txn_regop_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t opcode; +} __txn_regop_args; + + +#define DB_txn_ckp (DB_txn_BEGIN + 2) + +typedef struct _txn_ckp_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + DB_LSN ckp_lsn; + DB_LSN last_ckp; +} __txn_ckp_args; + +#endif diff --git a/db2/include/txn_ext.h b/db2/include/txn_ext.h new file mode 100644 index 0000000000..8ba0b0c44e --- /dev/null +++ b/db2/include/txn_ext.h @@ -0,0 +1,18 @@ +/* Do not edit: automatically built by dist/distrib. */ +int __txn_regop_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t)); +int __txn_regop_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __txn_regop_read __P((void *, __txn_regop_args **)); +int __txn_ckp_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + DB_LSN *, DB_LSN *)); +int __txn_ckp_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __txn_ckp_read __P((void *, __txn_ckp_args **)); +int __txn_init_print __P((DB_ENV *)); +int __txn_init_recover __P((DB_ENV *)); +int __txn_regop_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __txn_ckp_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); |