diff options
author | Ulrich Drepper <drepper@redhat.com> | 1998-06-09 15:16:55 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 1998-06-09 15:16:55 +0000 |
commit | bf7997b65c7887d2acda95f5201d818a19d81711 (patch) | |
tree | da3583de3a0b5892f90a4b1eb773a87b554ae37e /db2 | |
parent | 7646e67e6cc4c738a7b402c60fed39d52db0433b (diff) | |
download | glibc-bf7997b65c7887d2acda95f5201d818a19d81711.tar.gz glibc-bf7997b65c7887d2acda95f5201d818a19d81711.tar.xz glibc-bf7997b65c7887d2acda95f5201d818a19d81711.zip |
Update.
1998-06-09 Ulrich Drepper <drepper@cygnus.com> * sysdeps/unix/sysv/linux/netinet/ip.h (struct ip_options): Define __data member only for gcc. Reported by ak@muc.de. * misc/mntent.h: Undo last patch. * sysdeps/unix/sysv/linux/fstatvfs.c (fstatvfs): Undo last patch. * misc/tst/mntent.c: Adjust code for this change. * io/fts.c: Updated from a slightly more recent BSD version. * io/fts.h: Likewise. * libc.map: Add __libc_stack_end. * db2/Makefile (routines): Add lock_region. * db2/config.h: Update from db-2.4.14. * db2/db.h: Likewise. * db2/db_185.h: Likewise. * db2/db_int.h: Likewise. * db2/bt_close.c: Likewise. * db2/bt_compare.c: Likewise. * db2/bt_conv.c: Likewise. * db2/bt_cursor.c: Likewise. * db2/bt_delete.c: Likewise. * db2/bt_open.c: Likewise. * db2/bt_page.c: Likewise. * db2/bt_put.c: Likewise. * db2/bt_rec.c: Likewise. * db2/bt_recno.c: Likewise. * db2/bt_rsearch.c: Likewise. * db2/bt_search.c: Likewise. * db2/bt_split.c: Likewise. * db2/bt_stat.c: Likewise. * db2/btree.src: Likewise. * db2/btree_auto.c: Likewise. * db2/getlong.c: Likewise. * db2/db_appinit.c: Likewise. * db2/db_apprec.c: Likewise. * db2/db_byteorder.c: Likewise. * db2/db_err.c: Likewise. * db2/db_log2.c: Likewise. * db2/db_region.c: Likewise. * db2/db_salloc.c: Likewise. * db2/db_shash.c: Likewise. * db2/db.c: Likewise. * db2/db.src: Likewise. * db2/db_auto.c: Likewise. * db2/db_conv.c: Likewise. * db2/db_dispatch.c: Likewise. * db2/db_dup.c: Likewise. * db2/db_overflow.c: Likewise. * db2/db_pr.c: Likewise. * db2/db_rec.c: Likewise. * db2/db_ret.c: Likewise. * db2/db_thread.c: Likewise. * db2/db185.c: Likewise. * db2/db185_int.h: Likewise. * db2/dbm.c: Likewise. * db2/hash.c: Likewise. * db2/hash.src: Likewise. * db2/hash_auto.c: Likewise. * db2/hash_conv.c: Likewise. * db2/hash_debug.c: Likewise. * db2/hash_dup.c: Likewise. * db2/hash_func.c: Likewise. * db2/hash_page.c: Likewise. * db2/hash_rec.c: Likewise. * db2/hash_stat.c: Likewise. * db2/btree.h: Likewise. * db2/btree_ext.h: Likewise. * db2/clib_ext.h: Likewise. * db2/common_ext.h: Likewise. * db2/cxx_int.h: Likewise. * db2/db.h.src: Likewise. * db2/db_185.h.src: Likewise. * db2/db_am.h: Likewise. * db2/db_auto.h: Likewise. * db2/db_cxx.h: Likewise. * db2/db_dispatch.h: Likewise. * db2/db_ext.h: Likewise. * db2/db_int.h.src: Likewise. * db2/db_page.h: Likewise. * db2/db_shash.h: Likewise. * db2/db_swap.h: Likewise. * db2/hash.h: Likewise. * db2/hash_ext.h: Likewise. * db2/lock.h: Likewise. * db2/lock_ext.h: Likewise. * db2/log.h: Likewise. * db2/log_ext.h: Likewise. * db2/mp.h: Likewise. * db2/mp_ext.h: Likewise. * db2/mutex_ext.h: Likewise. * db2/os_ext.h: Likewise. * db2/os_func.h: Likewise. * db2/queue.h: Likewise. * db2/shqueue.h: Likewise. * db2/txn.h: Likewise. * db2/lock.c: Likewise. * db2/lock_conflict.c: Likewise. * db2/lock_deadlock.c: Likewise. * db2/lock_region.c: Likewise. * db2/lock_util.c: Likewise. * db2/log.c: Likewise. * db2/log.src: Likewise. * db2/log_archive.c: Likewise. * db2/log_auto.c: Likewise. * db2/log_compare.c: Likewise. * db2/log_findckp.c: Likewise. * db2/log_get.c: Likewise. * db2/log_put.c: Likewise. * db2/log_rec.c: Likewise. * db2/log_register.c: Likewise. * db2/mp_bh.c: Likewise. * db2/mp_fget.c: Likewise. * db2/mp_fopen.c: Likewise. * db2/mp_fput.c: Likewise. * db2/mp_fset.c: Likewise. * db2/mp_open.c: Likewise. * db2/mp_pr.c: Likewise. * db2/mp_region.c: Likewise. * db2/mp_sync.c: Likewise. * db2/68020.gcc: Likewise. * db2/mutex.c: Likewise. * db2/parisc.gcc: Likewise. * db2/parisc.hp: Likewise. * db2/sco.cc: Likewise. * db2/os_abs.c: Likewise. * db2/os_alloc.c: Likewise. * db2/os_config.c: Likewise. * db2/os_dir.c: Likewise. * db2/os_fid.c: Likewise. * db2/os_fsync.c: Likewise. * db2/os_map.c: Likewise. * db2/os_oflags.c: Likewise. * db2/os_open.c: Likewise. * db2/os_rpath.c: Likewise. * db2/os_rw.c: Likewise. * db2/os_seek.c: Likewise. * db2/os_sleep.c: Likewise. * db2/os_spin.c: Likewise. * db2/os_stat.c: Likewise. * db2/os_unlink.c: Likewise. * db2/db_archive.c: Likewise. * db2/db_checkpoint.c: Likewise. * db2/db_deadlock.c: Likewise. * db2/db_dump.c: Likewise. * db2/db_dump185.c: Likewise. * db2/db_load.c: Likewise. * db2/db_printlog.c: Likewise. * db2/db_recover.c: Likewise. * db2/db_stat.c: Likewise. * db2/txn.c: Likewise. * db2/txn.src: Likewise. * db2/txn_auto.c: Likewise. * db2/txn_rec.c: Likewise. * elf/rtld.c: Move definition of __libc_stack_end to ... * sysdeps/generic/dl-sysdep.h: ...here. * sysdeps/unix/sysv/linux/fstatvfs.c: Handle nodiratime option. * sysdeps/unix/sysv/linux/bits/statvfs.h: Define ST_NODIRATIME. * sysdeps/unix/sysv/linux/sys/mount.h: Define MS_NODIRATIME. 1998-06-08 21:44 Ulrich Drepper <drepper@cygnus.com> * sysdeps/unix/sysv/linux/fstatvfs.c: Handle constant option string from mntent correctly. 1998-06-06 Andreas Jaeger <aj@arthur.rhein-neckar.de> * sunrpc/Makefile (generated): Correct typo. 1998-06-04 Philip Blundell <philb@gnu.org> * elf/elf.h (EM_ARM, et al.): New definitions. * sysdeps/arm/dl-machine.h: Update for new draft ARM ELF ABI.
Diffstat (limited to 'db2')
142 files changed, 6504 insertions, 4570 deletions
diff --git a/db2/Makefile b/db2/Makefile index 35c67dadfe..cc530a8a71 100644 --- a/db2/Makefile +++ b/db2/Makefile @@ -65,7 +65,7 @@ libdb-routines := bt_close bt_compare bt_conv bt_cursor bt_delete \ os_spin db_overflow db_pr db_rec db_region db_ret db_salloc \ db_shash db_thread hash hash_auto hash_conv hash_debug \ hash_dup hash_func hash_page hash_rec hash_stat lock \ - lock_conflict lock_deadlock lock_util log log_archive \ + lock_conflict lock_deadlock lock_region lock_util log log_archive \ log_auto log_compare log_findckp log_get log_put log_rec \ log_register mp_bh mp_fget mp_fopen mp_fput mp_fset \ mp_open mp_pr mp_region mp_sync mutex txn txn_auto \ diff --git a/db2/btree/bt_close.c b/db2/btree/bt_close.c index ecccc9fe08..9df5c717e6 100644 --- a/db2/btree/bt_close.c +++ b/db2/btree/bt_close.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -47,18 +47,13 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_close.c 10.25 (Sleepycat) 1/6/98"; +static const char sccsid[] = "@(#)bt_close.c 10.32 (Sleepycat) 5/6/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <sys/mman.h> -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> -#include <unistd.h> #endif #include "db_int.h" @@ -104,12 +99,12 @@ __bam_close(dbp) * __bam_sync -- * Sync the btree to disk. * - * PUBLIC: int __bam_sync __P((DB *, int)); + * PUBLIC: int __bam_sync __P((DB *, u_int32_t)); */ int __bam_sync(argdbp, flags) DB *argdbp; - int flags; + u_int32_t flags; { DB *dbp; int ret; @@ -146,7 +141,7 @@ __bam_upstat(dbp) BTMETA *meta; DB_LOCK metalock; db_pgno_t pgno; - int flags, ret; + u_int32_t flags; /* * We use a no-op log call to log the update of the statistics onto the @@ -166,8 +161,8 @@ __bam_upstat(dbp) if (__bam_pget(dbp, (PAGE **)&meta, &pgno, 0) == 0) { /* Log the change. */ if (DB_LOGGING(dbp) && - (ret = __db_noop_log(dbp->dbenv->lg_info, dbp->txn, - &LSN(meta), 0)) == 0) + __db_noop_log(dbp->dbenv->lg_info, dbp->txn, &LSN(meta), 0, + dbp->log_fileid, PGNO_METADATA, &LSN(meta)) != 0) goto err; /* Update the statistics. */ diff --git a/db2/btree/bt_compare.c b/db2/btree/bt_compare.c index a68b1fa891..5c6d1e38ca 100644 --- a/db2/btree/bt_compare.c +++ b/db2/btree/bt_compare.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -47,14 +47,12 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_compare.c 10.4 (Sleepycat) 9/3/97"; +static const char sccsid[] = "@(#)bt_compare.c 10.9 (Sleepycat) 5/6/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> #endif @@ -106,7 +104,6 @@ __bam_cmp(dbp, k1, e) if (B_TYPE(bk->type) == B_OVERFLOW) bo = (BOVERFLOW *)bk; else { - memset(&k2, 0, sizeof(k2)); k2.data = bk->data; k2.size = bk->len; } @@ -115,7 +112,6 @@ __bam_cmp(dbp, k1, e) if (B_TYPE(bi->type) == B_OVERFLOW) bo = (BOVERFLOW *)(bi->data); else { - memset(&k2, 0, sizeof(k2)); k2.data = bi->data; k2.size = bi->len; } @@ -139,10 +135,21 @@ __bam_cmp(dbp, k1, e) * Otherwise, we need a contiguous record so we can hand it * to the user's routine. */ + memset(&k2, 0, sizeof(k2)); if (__db_goff(dbp, &k2, bo->tlen, - bo->pgno, &t->bt_rdata.data, &t->bt_rdata.ulen) != 0) - abort(); + bo->pgno, &t->bt_rdata.data, &t->bt_rdata.ulen) != 0) { + (void)__db_panic(dbp); + return (0); + } } + + /* + * XXX + * Note, we have not cleared the k2 DBT in this path. This should + * be okay, because the user's comparison routine had better not be + * looking at any fields other than the data/size. We don't clear + * it because we go through this path a lot and it's expensive. + */ return ((*t->bt_compare)(k1, &k2)); } diff --git a/db2/btree/bt_conv.c b/db2/btree/bt_conv.c index c89493cbed..3da4507723 100644 --- a/db2/btree/bt_conv.c +++ b/db2/btree/bt_conv.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_conv.c 10.5 (Sleepycat) 9/15/97"; +static const char sccsid[] = "@(#)bt_conv.c 10.6 (Sleepycat) 4/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES diff --git a/db2/btree/bt_cursor.c b/db2/btree/bt_cursor.c index f526c965e5..cfa388741e 100644 --- a/db2/btree/bt_cursor.c +++ b/db2/btree/bt_cursor.c @@ -1,22 +1,20 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_cursor.c 10.41 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)bt_cursor.c 10.53 (Sleepycat) 5/25/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> #endif @@ -25,24 +23,30 @@ static const char sccsid[] = "@(#)bt_cursor.c 10.41 (Sleepycat) 1/8/98"; #include "btree.h" static int __bam_c_close __P((DBC *)); -static int __bam_c_del __P((DBC *, int)); +static int __bam_c_del __P((DBC *, u_int32_t)); static int __bam_c_first __P((DB *, CURSOR *)); -static int __bam_c_get __P((DBC *, DBT *, DBT *, int)); +static int __bam_c_get __P((DBC *, DBT *, DBT *, u_int32_t)); +static int __bam_c_getstack __P((DB *, CURSOR *)); static int __bam_c_last __P((DB *, CURSOR *)); static int __bam_c_next __P((DB *, CURSOR *, int)); static int __bam_c_physdel __P((DB *, CURSOR *, PAGE *)); static int __bam_c_prev __P((DB *, CURSOR *)); -static int __bam_c_put __P((DBC *, DBT *, DBT *, int)); -static int __bam_c_rget __P((DB *, CURSOR *, DBT *, int)); -static int __bam_c_search __P((DB *, CURSOR *, const DBT *, u_int, int, int *)); +static int __bam_c_put __P((DBC *, DBT *, DBT *, u_int32_t)); +static int __bam_c_rget __P((DB *, CURSOR *, DBT *, u_int32_t)); +static int __bam_c_search + __P((DB *, CURSOR *, const DBT *, u_int32_t, int, int *)); /* Discard the current page/lock held by a cursor. */ #undef DISCARD #define DISCARD(dbp, cp) { \ - (void)memp_fput(dbp->mpf, (cp)->page, 0); \ - (cp)->page = NULL; \ - (void)__BT_TLPUT((dbp), (cp)->lock); \ - (cp)->lock = LOCK_INVALID; \ + if ((cp)->page != NULL) { \ + (void)memp_fput(dbp->mpf, (cp)->page, 0); \ + (cp)->page = NULL; \ + } \ + if ((cp)->lock != LOCK_INVALID) { \ + (void)__BT_TLPUT((dbp), (cp)->lock); \ + (cp)->lock = LOCK_INVALID; \ + } \ } /* @@ -85,9 +89,9 @@ __bam_cursor(dbp, txn, dbcp) * All cursors are queued from the master DB structure. Add the * cursor to that queue. */ - DB_THREAD_LOCK(dbp); + CURSOR_SETUP(dbp); TAILQ_INSERT_HEAD(&dbp->curs_queue, dbc, links); - DB_THREAD_UNLOCK(dbp); + CURSOR_TEARDOWN(dbp); *dbcp = dbc; return (0); @@ -128,13 +132,6 @@ __bam_c_iclose(dbp, dbc) CURSOR *cp; int ret; - /* - * All cursors are queued from the master DB structure. For - * now, discard the DB handle which triggered this call, and - * replace it with the cursor's reference. - */ - dbp = dbc->dbp; - /* If a cursor key was deleted, perform the actual deletion. */ cp = dbc->internal; ret = F_ISSET(cp, C_DELETED) ? __bam_c_physdel(dbp, cp, NULL) : 0; @@ -144,9 +141,9 @@ __bam_c_iclose(dbp, dbc) (void)__BT_TLPUT(dbp, cp->lock); /* Remove the cursor from the queue. */ - DB_THREAD_LOCK(dbp); + CURSOR_SETUP(dbp); TAILQ_REMOVE(&dbp->curs_queue, dbc, links); - DB_THREAD_UNLOCK(dbp); + CURSOR_TEARDOWN(dbp); /* Discard the structures. */ FREE(dbc->internal, sizeof(CURSOR)); @@ -162,8 +159,9 @@ __bam_c_iclose(dbp, dbc) static int __bam_c_del(dbc, flags) DBC *dbc; - int flags; + u_int32_t flags; { + BTREE *t; CURSOR *cp; DB *dbp; DB_LOCK lock; @@ -175,6 +173,7 @@ __bam_c_del(dbc, flags) DEBUG_LWRITE(dbc->dbp, dbc->txn, "bam_c_del", NULL, NULL, flags); cp = dbc->internal; + h = NULL; /* Check for invalid flags. */ if ((ret = __db_cdelchk(dbc->dbp, flags, @@ -186,6 +185,7 @@ __bam_c_del(dbc, flags) return (DB_KEYEMPTY); GETHANDLE(dbc->dbp, dbc->txn, &dbp, ret); + t = dbp->internal; /* * We don't physically delete the record until the cursor moves, @@ -235,8 +235,21 @@ __bam_c_del(dbc, flags) (void)__bam_ca_delete(dbp, pgno, indx, NULL, 0); ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY); + h = NULL; + + /* + * If it's a btree with record numbers, we have to adjust the + * counts. + */ + if (F_ISSET(dbp, DB_BT_RECNUM) && + (ret = __bam_c_getstack(dbp, cp)) == 0) { + ret = __bam_adjust(dbp, t, -1); + (void)__bam_stkrel(dbp); + } -err: PUTHANDLE(dbp); +err: if (h != NULL) + (void)memp_fput(dbp->mpf, h, 0); + PUTHANDLE(dbp); return (ret); } @@ -244,14 +257,14 @@ err: PUTHANDLE(dbp); * __bam_get -- * Retrieve a key/data pair from the tree. * - * PUBLIC: int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, int)); + * PUBLIC: int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); */ int __bam_get(argdbp, txn, key, data, flags) DB *argdbp; DB_TXN *txn; DBT *key, *data; - int flags; + u_int32_t flags; { DBC dbc; CURSOR cp; @@ -289,7 +302,7 @@ static int __bam_c_get(dbc, key, data, flags) DBC *dbc; DBT *key, *data; - int flags; + u_int32_t flags; { BTREE *t; CURSOR *cp, copy; @@ -448,7 +461,7 @@ __bam_c_rget(dbp, cp, data, flags) DB *dbp; CURSOR *cp; DBT *data; - int flags; + u_int32_t flags; { BTREE *t; DBT dbt; @@ -491,7 +504,7 @@ static int __bam_c_put(dbc, key, data, flags) DBC *dbc; DBT *key, *data; - int flags; + u_int32_t flags; { BTREE *t; CURSOR *cp, copy; @@ -499,7 +512,8 @@ __bam_c_put(dbc, key, data, flags) DBT dbt; db_indx_t indx; db_pgno_t pgno; - int exact, needkey, ret; + u_int32_t iiflags; + int exact, needkey, ret, stack; void *arg; DEBUG_LWRITE(dbc->dbp, dbc->txn, "bam_c_put", @@ -524,29 +538,34 @@ __bam_c_put(dbc, key, data, flags) * To split, we need a valid key for the page. Since it's a cursor, * we have to build one. */ + stack = 0; if (0) { -split: if (needkey) { +split: /* Acquire a copy of a key from the page. */ + if (needkey) { memset(&dbt, 0, sizeof(DBT)); - ret = __db_ret(dbp, cp->page, indx, - &dbt, &t->bt_rkey.data, &t->bt_rkey.ulen); - - DISCARD(dbp, cp); - - if (ret) + if ((ret = __db_ret(dbp, cp->page, indx, + &dbt, &t->bt_rkey.data, &t->bt_rkey.ulen)) != 0) goto err; arg = &dbt; - } else { - (void)__bam_stkrel(dbp); + } else arg = key; - } + + /* Discard any pinned pages. */ + if (stack) { + (void)__bam_stkrel(dbp); + stack = 0; + } else + DISCARD(dbp, cp); + if ((ret = __bam_split(dbp, arg)) != 0) goto err; } - /* If there's no key supplied, use the cursor. */ - if (flags == DB_KEYFIRST || flags == DB_KEYLAST) - needkey = 0; - else { + ret = 0; + switch (flags) { + case DB_AFTER: + case DB_BEFORE: + case DB_CURRENT: needkey = 1; if (cp->dpgno == PGNO_INVALID) { pgno = cp->pgno; @@ -555,41 +574,53 @@ split: if (needkey) { pgno = cp->dpgno; indx = cp->dindx; } - /* Acquire the current page. */ - if ((ret = __bam_lget(dbp, - 0, cp->pgno, DB_LOCK_WRITE, &cp->lock)) != 0) - goto err; - if ((ret = __bam_pget(dbp, &cp->page, &pgno, 0)) != 0) - goto err; - } + /* + * XXX + * This test is right -- we don't currently support duplicates + * in the presence of record numbers, so we don't worry about + * them if DB_BT_RECNUM is set. + */ + if (F_ISSET(dbp, DB_BT_RECNUM) && + (flags != DB_CURRENT || F_ISSET(cp, C_DELETED))) { + /* Acquire a complete stack. */ + if ((ret = __bam_c_getstack(dbp, cp)) != 0) + goto err; + cp->page = t->bt_csp->page; - ret = 0; - switch (flags) { - case DB_AFTER: - case DB_BEFORE: - case DB_CURRENT: + stack = 1; + iiflags = BI_DOINCR; + } else { + /* Acquire the current page. */ + if ((ret = __bam_lget(dbp, + 0, cp->pgno, DB_LOCK_WRITE, &cp->lock)) == 0) + ret = __bam_pget(dbp, &cp->page, &pgno, 0); + if (ret != 0) + goto err; + + iiflags = 0; + } if ((ret = __bam_iitem(dbp, &cp->page, - &indx, key, data, flags, 0)) == DB_NEEDSPLIT) + &indx, key, data, flags, iiflags)) == DB_NEEDSPLIT) goto split; break; case DB_KEYFIRST: - exact = 0; + exact = needkey = 0; if ((ret = __bam_c_search(dbp, cp, key, S_KEYFIRST, 0, &exact)) != 0) goto err; + stack = 1; indx = cp->dpgno == PGNO_INVALID ? cp->indx : cp->dindx; if ((ret = __bam_iitem(dbp, &cp->page, &indx, key, data, DB_BEFORE, exact ? 0 : BI_NEWKEY)) == DB_NEEDSPLIT) goto split; - if (ret) - goto err; break; case DB_KEYLAST: - exact = 0; + exact = needkey = 0; if ((ret = __bam_c_search(dbp, cp, key, S_KEYLAST, 0, &exact)) != 0) goto err; + stack = 1; indx = cp->dpgno == PGNO_INVALID ? cp->indx : cp->dindx; if ((ret = __bam_iitem(dbp, &cp->page, &indx, key, @@ -623,13 +654,27 @@ split: if (needkey) { if (copy.lock != LOCK_INVALID) (void)__BT_TLPUT(dbp, copy.lock); - /* Discard the pinned page. */ - ret = memp_fput(dbp->mpf, cp->page, 0); + /* + * Discard any pages pinned in the tree and their locks, except for + * the leaf page, for which we only discard the pin, not the lock. + * + * Note, the leaf page participated in the stack we acquired, and so + * we have to adjust the stack as necessary. If there was only a + * single page on the stack, we don't have to free further stack pages. + */ + + if (stack && BT_STK_POP(t) != NULL) + (void)__bam_stkrel(dbp); + + if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0) + goto err; + if (0) { -err: if (cp->page != NULL) - (void)memp_fput(dbp->mpf, cp->page, 0); - if (cp->lock != LOCK_INVALID) - (void)__BT_TLPUT(dbp, cp->lock); +err: /* Discard any pinned pages. */ + if (stack) + (void)__bam_stkrel(dbp); + else + DISCARD(dbp, cp); *cp = copy; } @@ -976,7 +1021,7 @@ __bam_c_search(dbp, cp, key, flags, isrecno, exactp) DB *dbp; CURSOR *cp; const DBT *key; - u_int flags; + u_int32_t flags; int isrecno, *exactp; { BTREE *t; @@ -1032,6 +1077,18 @@ __bam_c_search(dbp, cp, key, flags, isrecno, exactp) } else if ((ret = __bam_c_next(dbp, cp, 0)) != 0) return (ret); + /* + * If we don't specify an exact match (the DB_KEYFIRST/DB_KEYLAST or + * DB_SET_RANGE flags were set) __bam_search() may return a deleted + * item. For DB_KEYFIRST/DB_KEYLAST, we don't care since we're only + * using it for a tree position. For DB_SET_RANGE, we're returning + * the key, so we have to adjust it. + */ + if (LF_ISSET(S_DELNO) && cp->dpgno == PGNO_INVALID && + B_DISSET(GET_BKEYDATA(cp->page, cp->indx + O_INDX)->type)) + if ((ret = __bam_c_next(dbp, cp, 0)) != 0) + return (ret); + return (0); } @@ -1101,7 +1158,7 @@ __bam_cprint(dbp) CURSOR *cp; DBC *dbc; - DB_THREAD_LOCK(dbp); + CURSOR_SETUP(dbp); for (dbc = TAILQ_FIRST(&dbp->curs_queue); dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { cp = (CURSOR *)dbc->internal; @@ -1113,7 +1170,8 @@ __bam_cprint(dbp) fprintf(stderr, "(deleted)"); fprintf(stderr, "\n"); } - DB_THREAD_UNLOCK(dbp); + CURSOR_TEARDOWN(dbp); + return (0); } #endif /* DEBUG */ @@ -1135,7 +1193,7 @@ __bam_ca_delete(dbp, pgno, indx, curs, key_delete) { DBC *dbc; CURSOR *cp; - int count; + int count; /* !!!: Has to contain max number of cursors. */ /* * Adjust the cursors. We don't have to review the cursors for any @@ -1148,8 +1206,7 @@ __bam_ca_delete(dbp, pgno, indx, curs, key_delete) * locks on the same page, but, cursors within a thread must be single * threaded, so all we're locking here is the cursor linked list. */ - DB_THREAD_LOCK(dbp); - + CURSOR_SETUP(dbp); for (count = 0, dbc = TAILQ_FIRST(&dbp->curs_queue); dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { cp = (CURSOR *)dbc->internal; @@ -1180,8 +1237,8 @@ __bam_ca_delete(dbp, pgno, indx, curs, key_delete) F_SET(cp, C_DELETED); } } + CURSOR_TEARDOWN(dbp); - DB_THREAD_UNLOCK(dbp); return (count); } @@ -1192,11 +1249,11 @@ __bam_ca_delete(dbp, pgno, indx, curs, key_delete) * PUBLIC: void __bam_ca_di __P((DB *, db_pgno_t, u_int32_t, int)); */ void -__bam_ca_di(dbp, pgno, indx, value) +__bam_ca_di(dbp, pgno, indx, adjust) DB *dbp; db_pgno_t pgno; u_int32_t indx; - int value; + int adjust; { CURSOR *cp; DBC *dbc; @@ -1208,16 +1265,16 @@ __bam_ca_di(dbp, pgno, indx, value) /* * Adjust the cursors. See the comment in __bam_ca_delete(). */ - DB_THREAD_LOCK(dbp); + CURSOR_SETUP(dbp); for (dbc = TAILQ_FIRST(&dbp->curs_queue); dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { cp = (CURSOR *)dbc->internal; if (cp->pgno == pgno && cp->indx >= indx) - cp->indx += value; + cp->indx += adjust; if (cp->dpgno == pgno && cp->dindx >= indx) - cp->dindx += value; + cp->dindx += adjust; } - DB_THREAD_UNLOCK(dbp); + CURSOR_TEARDOWN(dbp); } /* @@ -1242,7 +1299,7 @@ __bam_ca_dup(dbp, fpgno, first, fi, tpgno, ti) * No need to test duplicates, this only gets called when moving * leaf page data items onto a duplicates page. */ - DB_THREAD_LOCK(dbp); + CURSOR_SETUP(dbp); for (dbc = TAILQ_FIRST(&dbp->curs_queue); dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { cp = (CURSOR *)dbc->internal; @@ -1258,7 +1315,7 @@ __bam_ca_dup(dbp, fpgno, first, fi, tpgno, ti) cp->dindx = ti; } } - DB_THREAD_UNLOCK(dbp); + CURSOR_TEARDOWN(dbp); } /* @@ -1285,14 +1342,14 @@ __bam_ca_move(dbp, fpgno, tpgno) * No need to test duplicates, this only gets called when copying * over the root page with a leaf or internal page. */ - DB_THREAD_LOCK(dbp); + CURSOR_SETUP(dbp); for (dbc = TAILQ_FIRST(&dbp->curs_queue); dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { cp = (CURSOR *)dbc->internal; if (cp->pgno == fpgno) cp->pgno = tpgno; } - DB_THREAD_UNLOCK(dbp); + CURSOR_TEARDOWN(dbp); } /* @@ -1333,7 +1390,7 @@ __bam_ca_replace(dbp, pgno, indx, pass) * for the cursor as it may have been changed by other cursor update * routines as the item was deleted/inserted. */ - DB_THREAD_LOCK(dbp); + CURSOR_SETUP(dbp); switch (pass) { case REPLACE_SETUP: /* Setup. */ for (dbc = TAILQ_FIRST(&dbp->curs_queue); @@ -1372,7 +1429,7 @@ __bam_ca_replace(dbp, pgno, indx, pass) } break; } - DB_THREAD_UNLOCK(dbp); + CURSOR_TEARDOWN(dbp); } /* @@ -1406,7 +1463,7 @@ __bam_ca_split(dbp, ppgno, lpgno, rpgno, split_indx, cleft) * the cursor is on the right page, it is decremented by the number of * records split to the left page. */ - DB_THREAD_LOCK(dbp); + CURSOR_SETUP(dbp); for (dbc = TAILQ_FIRST(&dbp->curs_queue); dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { cp = (CURSOR *)dbc->internal; @@ -1427,7 +1484,7 @@ __bam_ca_split(dbp, ppgno, lpgno, rpgno, split_indx, cleft) cp->dindx -= split_indx; } } - DB_THREAD_UNLOCK(dbp); + CURSOR_TEARDOWN(dbp); } /* @@ -1440,16 +1497,17 @@ __bam_c_physdel(dbp, cp, h) CURSOR *cp; PAGE *h; { + enum { DELETE_ITEM, DELETE_PAGE, NOTHING_FURTHER } cmd; BOVERFLOW bo; BTREE *t; DBT dbt; DB_LOCK lock; db_indx_t indx; db_pgno_t pgno, next_pgno, prev_pgno; - int local, normal, ret; + int delete_page, local_page, ret; t = dbp->internal; - ret = 0; + delete_page = ret = 0; /* Figure out what we're deleting. */ if (cp->dpgno == PGNO_INVALID) { @@ -1476,9 +1534,9 @@ __bam_c_physdel(dbp, cp, h) return (ret); if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0) return (ret); - local = 1; + local_page = 1; } else - local = 0; + local_page = 0; /* * If we're deleting a duplicate entry and there are other duplicate @@ -1515,9 +1573,9 @@ __bam_c_physdel(dbp, cp, h) if (NUM_ENT(h) == 1 && prev_pgno == PGNO_INVALID && next_pgno == PGNO_INVALID) - normal = 1; + cmd = DELETE_PAGE; else { - normal = 0; + cmd = DELETE_ITEM; /* Delete the duplicate. */ if ((ret = __db_drem(dbp, &h, indx, __bam_free)) != 0) @@ -1536,18 +1594,27 @@ __bam_c_physdel(dbp, cp, h) */ if ((h != NULL && pgno == h->pgno) || prev_pgno != PGNO_INVALID) - goto done; + cmd = NOTHING_FURTHER; } - /* Release any page we're holding and its lock. */ - if (local) { + /* + * Release any page we're holding and its lock. + * + * !!! + * If there is no subsequent page in the duplicate chain, then + * __db_drem will have put page "h" and set it to NULL. + */ + if (local_page) { if (h != NULL) (void)memp_fput(dbp->mpf, h, 0); (void)__BT_TLPUT(dbp, lock); - local = 0; + local_page = 0; } - /* Acquire the parent page. */ + if (cmd == NOTHING_FURTHER) + goto done; + + /* Acquire the parent page and switch the index to its entry. */ if ((ret = __bam_lget(dbp, 0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0) goto err; @@ -1555,11 +1622,10 @@ __bam_c_physdel(dbp, cp, h) (void)__BT_TLPUT(dbp, lock); goto err; } - local = 1; - - /* Switch to the parent page's entry. */ + local_page = 1; indx = cp->indx; - if (normal) + + if (cmd == DELETE_PAGE) goto btd; /* @@ -1582,47 +1648,60 @@ __bam_c_physdel(dbp, cp, h) goto done; } - /* Otherwise, do a normal btree delete. */ -btd: if ((ret = __bam_ditem(dbp, h, indx)) != 0) - goto err; - if ((ret = __bam_ditem(dbp, h, indx)) != 0) - goto err; - - /* - * If the page is empty, delete it. To delete a leaf page we need a - * copy of a key from the page. We use the first one that was there, - * since it's the last key that the page held. We malloc the page - * information instead of using the return key/data memory because - * we've already set them -- the reason that we've already set them - * is because we're (potentially) about to do a reverse split, which - * would make our saved page information useless. +btd: /* + * If the page is going to be emptied, delete it. To delete a leaf + * page we need a copy of a key from the page. We use the 0th page + * index since it's the last key that the page held. + * + * We malloc the page information instead of using the return key/data + * memory because we've already set them -- the reason we've already + * set them is because we're (potentially) about to do a reverse split, + * which would make our saved page information useless. * * XXX * The following operations to delete a page might deadlock. I think * that's OK. The problem is if we're deleting an item because we're * closing cursors because we've already deadlocked and want to call - * txn_abort(). If we fail due to deadlock, we'll leave an locked - * empty page in the tree, which won't be empty long because we're - * going to undo the delete. + * txn_abort(). If we fail due to deadlock, we leave a locked empty + * page in the tree, which won't be empty long because we're going to + * undo the delete. */ - if (NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT) { + if (NUM_ENT(h) == 2 && h->pgno != PGNO_ROOT) { memset(&dbt, 0, sizeof(DBT)); dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL; if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0) goto err; + delete_page = 1; + } - if (local) { - (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_TLPUT(dbp, lock); - local = 0; - } + /* + * Do a normal btree delete. + * + * XXX + * Delete the key item first, otherwise the duplicate checks in + * __bam_ditem() won't work! + */ + if ((ret = __bam_ditem(dbp, h, indx)) != 0) + goto err; + if ((ret = __bam_ditem(dbp, h, indx)) != 0) + goto err; - ret = __bam_dpage(dbp, &dbt); - __db_free(dbt.data); + /* Discard any remaining locks/pages. */ + if (local_page) { + (void)memp_fput(dbp->mpf, h, 0); + (void)__BT_TLPUT(dbp, lock); + local_page = 0; } + /* Delete the page if it was emptied. */ + if (delete_page) + ret = __bam_dpage(dbp, &dbt); + err: -done: if (local) { +done: if (delete_page) + __db_free(dbt.data); + + if (local_page) { (void)memp_fput(dbp->mpf, h, 0); (void)__BT_TLPUT(dbp, lock); } @@ -1631,3 +1710,43 @@ done: if (local) { ++t->lstat.bt_deleted; return (ret); } + +/* + * __bam_c_getstack -- + * Acquire a full stack for a cursor. + */ +static int +__bam_c_getstack(dbp, cp) + DB *dbp; + CURSOR *cp; +{ + DBT dbt; + PAGE *h; + db_pgno_t pgno; + int exact, ret; + + ret = 0; + h = NULL; + memset(&dbt, 0, sizeof(DBT)); + + /* Get the page with the current item on it. */ + pgno = cp->pgno; + if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0) + return (ret); + + /* Get a copy of a key from the page. */ + dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL; + if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0) + goto err; + + /* Get a write-locked stack for that page. */ + exact = 0; + ret = __bam_search(dbp, &dbt, S_KEYFIRST, 1, NULL, &exact); + + /* We no longer need the key or the page. */ +err: if (h != NULL) + (void)memp_fput(dbp->mpf, h, 0); + if (dbt.data != NULL) + __db_free(dbt.data); + return (ret); +} diff --git a/db2/btree/bt_delete.c b/db2/btree/bt_delete.c index baa8a25401..7e71037e46 100644 --- a/db2/btree/bt_delete.c +++ b/db2/btree/bt_delete.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -47,13 +47,12 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_delete.c 10.25 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)bt_delete.c 10.31 (Sleepycat) 5/6/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <stdio.h> #include <string.h> #endif @@ -67,14 +66,14 @@ static int __bam_dpages __P((DB *, BTREE *)); * __bam_delete -- * Delete the items referenced by a key. * - * PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, int)); + * PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); */ int __bam_delete(argdbp, txn, key, flags) DB *argdbp; DB_TXN *txn; DBT *key; - int flags; + u_int32_t flags; { BTREE *t; DB *dbp; @@ -87,8 +86,8 @@ __bam_delete(argdbp, txn, key, flags) stack = 0; /* Check for invalid flags. */ - if ((ret = - __db_delchk(argdbp, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0) + if ((ret = __db_delchk(argdbp, + key, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0) return (ret); GETHANDLE(argdbp, txn, &dbp, ret); @@ -107,6 +106,11 @@ __bam_delete(argdbp, txn, key, flags) break; for (; cnt > 0; --cnt, ++t->lstat.bt_deleted) if (__bam_ca_delete(dbp, h->pgno, indx, NULL, 1) == 0) { + /* + * XXX + * Delete the key item first, otherwise the duplicate + * checks in __bam_ditem() won't work! + */ if ((ret = __bam_ditem(dbp, h, indx)) != 0) goto err; if ((ret = __bam_ditem(dbp, h, indx)) != 0) @@ -138,14 +142,14 @@ err: if (stack) * __ram_delete -- * Delete the items referenced by a key. * - * PUBLIC: int __ram_delete __P((DB *, DB_TXN *, DBT *, int)); + * PUBLIC: int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); */ int __ram_delete(argdbp, txn, key, flags) DB *argdbp; DB_TXN *txn; DBT *key; - int flags; + u_int32_t flags; { BKEYDATA bk; BTREE *t; @@ -159,8 +163,8 @@ __ram_delete(argdbp, txn, key, flags) stack = 0; /* Check for invalid flags. */ - if ((ret = - __db_delchk(argdbp, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0) + if ((ret = __db_delchk(argdbp, + key, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0) return (ret); GETHANDLE(argdbp, txn, &dbp, ret); @@ -284,19 +288,32 @@ __bam_ditem(dbp, h, indx) case P_LBTREE: /* * If it's a duplicate key, discard the index and don't touch - * the actual page item. This works because no data item can - * have an index that matches any other index so even if the - * data item is in an index "slot", it won't match any other - * index. + * the actual page item. + * + * XXX + * This works because no data item can have an index matching + * any other index so even if the data item is in a key "slot", + * it won't match any other index. */ - if (!(indx % 2)) { - if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX]) - return (__bam_adjindx(dbp, - h, indx, indx - P_INDX, 0)); + if ((indx % 2) == 0) { + /* + * Check for a duplicate after us on the page. NOTE: + * we have to delete the key item before deleting the + * data item, otherwise the "indx + P_INDX" calculation + * won't work! + */ if (indx + P_INDX < (u_int32_t)NUM_ENT(h) && h->inp[indx] == h->inp[indx + P_INDX]) return (__bam_adjindx(dbp, h, indx, indx + O_INDX, 0)); + /* + * Check for a duplicate before us on the page. It + * doesn't matter if we delete the key item before or + * after the data item for the purposes of this one. + */ + if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX]) + return (__bam_adjindx(dbp, + h, indx, indx - P_INDX, 0)); } /* FALLTHROUGH */ case P_LRECNO: @@ -396,7 +413,8 @@ __bam_dpage(dbp, key) DB_LOCK lock; PAGE *h; db_pgno_t pgno; - int exact, level, ret; + int level; /* !!!: has to hold number of tree levels. */ + int exact, ret; ret = 0; t = dbp->internal; @@ -527,13 +545,14 @@ __bam_dpages(dbp, t) goto release; /* - * If we deleted the next-to-last item from the root page, the tree - * can collapse a level. Try and write lock the remaining root + 1 - * page and copy it onto the root page. If we can't get the lock, - * that's okay, the tree just stays a level deeper than we'd like. + * If we just deleted the last or next-to-last item from the root page, + * the tree can collapse a level. Write lock the last page referenced + * by the root page and copy it over the root page. If we can't get a + * write lock, that's okay, the tree just remains a level deeper than + * we'd like. */ h = epg->page; - if (h->pgno == PGNO_ROOT && NUM_ENT(h) == 1) { + if (h->pgno == PGNO_ROOT && NUM_ENT(h) <= 1) { pgno = TYPE(epg->page) == P_IBTREE ? GET_BINTERNAL(epg->page, 0)->pgno : GET_RINTERNAL(epg->page, 0)->pgno; @@ -573,13 +592,21 @@ __bam_dpages(dbp, t) (void)memp_fset(dbp->mpf, epg->page, DB_MPOOL_DIRTY); /* - * Free the last page in that level of the btree and discard - * the lock. (The call to __bam_free discards our reference + * Free the page copied onto the root page and discard its + * lock. (The call to __bam_free() discards our reference * to the page.) + * + * It's possible that the reverse split we're doing involves + * pages from the stack of pages we're deleting. Don't free + * the page twice. */ - (void)__bam_free(dbp, h); + if (h->pgno == (epg + 1)->page->pgno) + (void)memp_fput(dbp->mpf, h, 0); + else { + (void)__bam_free(dbp, h); + ++t->lstat.bt_freed; + } (void)__BT_TLPUT(dbp, lock); - ++t->lstat.bt_freed; /* Adjust the cursors. */ __bam_ca_move(dbp, h->pgno, PGNO_ROOT); @@ -596,12 +623,17 @@ __bam_dpages(dbp, t) * Don't bother checking for errors. We've unlinked the subtree from * the tree, and there's no possibility of recovery. */ - for (; ++epg <= t->bt_csp; ++t->lstat.bt_freed) { + while (++epg <= t->bt_csp) { + /* + * XXX + * Why do we need to do this? Isn't the page already empty? + */ if (NUM_ENT(epg->page) != 0) (void)__bam_ditem(dbp, epg->page, epg->indx); (void)__bam_free(dbp, epg->page); (void)__BT_TLPUT(dbp, epg->lock); + ++t->lstat.bt_freed; } return (0); diff --git a/db2/btree/bt_open.c b/db2/btree/bt_open.c index dd9f10927a..f5974ec61e 100644 --- a/db2/btree/bt_open.c +++ b/db2/btree/bt_open.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_open.c 10.22 (Sleepycat) 1/6/98"; +static const char sccsid[] = "@(#)bt_open.c 10.27 (Sleepycat) 5/6/98"; #endif /* not lint */ /* @@ -60,21 +60,15 @@ static const char sccsid[] = "@(#)bt_open.c 10.22 (Sleepycat) 1/6/98"; #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <sys/stat.h> #include <errno.h> -#include <fcntl.h> #include <limits.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> -#include <unistd.h> #endif #include "db_int.h" #include "db_page.h" #include "btree.h" -#include "common_ext.h" static int __bam_keyalloc __P((BTREE *)); static int __bam_setmeta __P((DB *, BTREE *)); @@ -295,6 +289,7 @@ __bam_setmeta(dbp, t) } /* Initialize the tree structure metadata information. */ + memset(meta, 0, sizeof(BTMETA)); ZERO_LSN(meta->lsn); meta->pgno = PGNO_METADATA; meta->magic = DB_BTREEMAGIC; @@ -303,7 +298,6 @@ __bam_setmeta(dbp, t) meta->maxkey = t->bt_maxkey; meta->minkey = t->bt_minkey; meta->free = PGNO_INVALID; - meta->flags = 0; if (dbp->type == DB_RECNO) F_SET(meta, BTM_RECNO); if (F_ISSET(dbp, DB_AM_DUP)) @@ -314,8 +308,6 @@ __bam_setmeta(dbp, t) F_SET(meta, BTM_RECNUM); if (F_ISSET(dbp, DB_RE_RENUMBER)) F_SET(meta, BTM_RENUMBER); - meta->re_len = 0; - meta->re_pad = 0; memcpy(meta->uid, dbp->lock.fileid, DB_FILE_ID_LEN); /* Create and initialize a root page. */ diff --git a/db2/btree/bt_page.c b/db2/btree/bt_page.c index 853317e835..87f2811398 100644 --- a/db2/btree/bt_page.c +++ b/db2/btree/bt_page.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -47,14 +47,13 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_page.c 10.7 (Sleepycat) 1/7/98"; +static const char sccsid[] = "@(#)bt_page.c 10.12 (Sleepycat) 5/6/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <stdio.h> #include <string.h> #endif @@ -142,7 +141,8 @@ __bam_free(dbp, h) DBT ldbt; DB_LOCK metalock; db_pgno_t pgno; - int is_dirty, ret, t_ret; + u_int32_t dirty_flag; + int ret, t_ret; /* * Retrieve the metadata page and insert the page at the head of @@ -150,7 +150,7 @@ __bam_free(dbp, h) * fail, then we need to put the page with which we were called * back because our caller assumes we take care of it. */ - is_dirty = 0; + dirty_flag = 0; pgno = PGNO_METADATA; if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &metalock)) != 0) goto err; @@ -178,7 +178,7 @@ __bam_free(dbp, h) * The page should have nothing interesting on it, re-initialize it, * leaving only the page number and the LSN. */ -#ifdef DEBUG +#ifdef DIAGNOSTIC { db_pgno_t __pgno; DB_LSN __lsn; __pgno = h->pgno; __lsn = h->lsn; @@ -198,8 +198,8 @@ __bam_free(dbp, h) ret = t_ret; /* Discard the caller's page reference. */ - is_dirty = DB_MPOOL_DIRTY; -err: if ((t_ret = memp_fput(dbp->mpf, h, is_dirty)) != 0 && ret == 0) + dirty_flag = DB_MPOOL_DIRTY; +err: if ((t_ret = memp_fput(dbp->mpf, h, dirty_flag)) != 0 && ret == 0) ret = t_ret; /* @@ -248,8 +248,10 @@ __bam_lget(dbp, do_couple, pgno, mode, lockp) u_int32_t locker; int ret; - if (!F_ISSET(dbp, DB_AM_LOCKING)) + if (!F_ISSET(dbp, DB_AM_LOCKING)) { + *lockp = LOCK_INVALID; return (0); + } locker = dbp->txn == NULL ? dbp->locker : dbp->txn->txnid; dbp->lock.pgno = pgno; @@ -300,15 +302,15 @@ __bam_lput(dbp, lock) * __bam_pget -- * The standard page get call. * - * PUBLIC: int __bam_pget __P((DB *, PAGE **, db_pgno_t *, int)); + * PUBLIC: int __bam_pget __P((DB *, PAGE **, db_pgno_t *, u_int32_t)); */ int -__bam_pget(dbp, hp, pgnop, mflags) +__bam_pget(dbp, hp, pgnop, mpool_flags) DB *dbp; PAGE **hp; db_pgno_t *pgnop; - int mflags; + u_int32_t mpool_flags; { return (memp_fget((dbp)->mpf, - pgnop, mflags, hp) == 0 ? 0 : __db_pgerr(dbp, *pgnop)); + pgnop, mpool_flags, hp) == 0 ? 0 : __db_pgerr(dbp, *pgnop)); } diff --git a/db2/btree/bt_put.c b/db2/btree/bt_put.c index 87f3fd9aff..a93faac98c 100644 --- a/db2/btree/bt_put.c +++ b/db2/btree/bt_put.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -47,15 +47,13 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_put.c 10.38 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)bt_put.c 10.45 (Sleepycat) 5/25/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> #endif @@ -75,21 +73,22 @@ static u_int32_t __bam_partsize __P((DBT *, PAGE *, u_int32_t)); * __bam_put -- * Add a new key/data pair or replace an existing pair (btree). * - * PUBLIC: int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, int)); + * PUBLIC: int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); */ int __bam_put(argdbp, txn, key, data, flags) DB *argdbp; DB_TXN *txn; DBT *key, *data; - int flags; + u_int32_t flags; { BTREE *t; CURSOR c; DB *dbp; PAGE *h; db_indx_t indx; - int exact, iflags, isdeleted, newkey, replace, ret, stack; + u_int32_t iitem_flags, insert_flags; + int exact, isdeleted, newkey, ret, stack; DEBUG_LWRITE(argdbp, txn, "bam_put", key, data, flags); @@ -121,14 +120,13 @@ retry: /* * been marked for deletion, we do a replace, otherwise, it has to be * a set of duplicates, and we simply append a new one to the set. */ - isdeleted = replace = 0; + isdeleted = 0; if (exact) { if ((ret = __bam_isdeleted(dbp, h, indx, &isdeleted)) != 0) goto err; - if (isdeleted) { - replace = 1; + if (isdeleted) __bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP); - } else + else if (flags == DB_NOOVERWRITE) { ret = DB_KEYEXIST; goto err; @@ -179,42 +177,38 @@ retry: /* t->bt_csp->page = h = c.page; indx = c.dindx; } - iflags = DB_AFTER; + insert_flags = DB_AFTER; } else - iflags = DB_CURRENT; + insert_flags = DB_CURRENT; } else - iflags = DB_BEFORE; + insert_flags = DB_BEFORE; /* * The pages we're using may be modified by __bam_iitem(), so make * sure we reset the stack. */ - ret = __bam_iitem(dbp, - &h, &indx, key, data, iflags, newkey ? BI_NEWKEY : 0); + iitem_flags = 0; + if (newkey) + iitem_flags |= BI_NEWKEY; + if (isdeleted) + iitem_flags |= BI_DOINCR; + ret = __bam_iitem(dbp, &h, &indx, key, data, insert_flags, iitem_flags); t->bt_csp->page = h; t->bt_csp->indx = indx; switch (ret) { case 0: - /* - * Done. Clean up the cursor, and, if we're doing record - * numbers, adjust the internal page counts. - */ - if (replace) + /* Done. Clean up the cursor. */ + if (isdeleted) __bam_ca_replace(dbp, h->pgno, indx, REPLACE_SUCCESS); - - if (!replace && F_ISSET(dbp, DB_BT_RECNUM)) - ret = __bam_adjust(dbp, t, 1); break; case DB_NEEDSPLIT: /* * We have to split the page. Back out the cursor setup, * discard the stack of pages, and do the split. */ - if (replace) { - replace = 0; + if (isdeleted) __bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED); - } (void)__bam_stkrel(dbp); stack = 0; @@ -225,7 +219,7 @@ retry: /* goto retry; /* NOTREACHED */ default: - if (replace) + if (isdeleted) __bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED); break; } @@ -393,7 +387,8 @@ __bam_lookup(dbp, key, exactp) for (indx = 0; indx < (db_indx_t)(NUM_ENT(h) - P_INDX) && h->inp[indx] == h->inp[indx + P_INDX]; - indx += P_INDX); + indx += P_INDX) + ; e.indx = indx; } goto fast; @@ -427,7 +422,7 @@ slow: return (__bam_search(dbp, key, S_INSERT, 1, NULL, exactp)); * Insert an item into the tree. * * PUBLIC: int __bam_iitem __P((DB *, - * PUBLIC: PAGE **, db_indx_t *, DBT *, DBT *, int, int)); + * PUBLIC: PAGE **, db_indx_t *, DBT *, DBT *, u_int32_t, u_int32_t)); */ int __bam_iitem(dbp, hp, indxp, key, data, op, flags) @@ -435,13 +430,13 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) PAGE **hp; db_indx_t *indxp; DBT *key, *data; - int op, flags; + u_int32_t op, flags; { BTREE *t; BKEYDATA *bk; DBT tdbt; PAGE *h; - db_indx_t indx; + db_indx_t indx, nbytes; u_int32_t data_size, have_bytes, need_bytes, needed; int bigkey, bigdata, dupadjust, replace, ret; @@ -466,12 +461,27 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) ++*indxp; /* Remove the current item if it's a DB_CURRENT op. */ - if (op == DB_CURRENT && (ret = __db_ditem(dbp, *hp, *indxp, - BKEYDATA_SIZE(GET_BKEYDATA(*hp, *indxp)->len))) != 0) - return (ret); + if (op == DB_CURRENT) { + bk = GET_BKEYDATA(*hp, *indxp); + switch (B_TYPE(bk->type)) { + case B_KEYDATA: + nbytes = BKEYDATA_SIZE(bk->len); + break; + case B_OVERFLOW: + nbytes = BOVERFLOW_SIZE; + break; + default: + return (__db_pgfmt(dbp, h->pgno)); + } + if ((ret = __db_ditem(dbp, *hp, *indxp, nbytes)) != 0) + return (ret); + } /* Put the new/replacement item onto the page. */ - return (__db_dput(dbp, data, hp, indxp, __bam_new)); + if ((ret = __db_dput(dbp, data, hp, indxp, __bam_new)) != 0) + return (ret); + + goto done; } /* Handle fixed-length records: build the real record. */ @@ -568,7 +578,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) case DB_BEFORE: /* 2. Insert a new key/data pair. */ break; default: - abort(); + return (EINVAL); } /* Add the key. */ @@ -638,7 +648,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) replace = 1; break; default: - abort(); + return (EINVAL); } } @@ -666,9 +676,8 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) return (ret); } - ++t->lstat.bt_added; - - ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY); + if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0) + return (ret); /* * If the page is at least 50% full, and we added a duplicate, see if @@ -681,9 +690,25 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) return (ret); } + /* + * If we've changed the record count, update the tree. Record counts + * need to be updated in recno databases and in btree databases where + * we are supporting records. In both cases, adjust the count if the + * operation wasn't performed on the current record or when the caller + * overrides and wants the adjustment made regardless. + */ +done: if (LF_ISSET(BI_DOINCR) || + (op != DB_CURRENT && + (F_ISSET(dbp, DB_BT_RECNUM) || dbp->type == DB_RECNO))) + if ((ret = __bam_adjust(dbp, t, 1)) != 0) + return (ret); + + /* If we've modified a recno file, set the flag */ if (t->bt_recno != NULL) F_SET(t->bt_recno, RECNO_MODIFIED); + ++t->lstat.bt_added; + return (ret); } @@ -1036,8 +1061,8 @@ __bam_partial(dbp, dbt, h, indx, nbytes) BOVERFLOW *bo; DBT copy; u_int32_t len, tlen; - int ret; u_int8_t *p; + int ret; COMPQUIET(bo, NULL); @@ -1065,59 +1090,62 @@ __bam_partial(dbp, dbt, h, indx, nbytes) bk->len = 0; } - /* We use nul bytes for extending the record, get it over with. */ + /* + * We use nul bytes for any part of the record that isn't specified, + * get it over with. + */ memset(t->bt_rdata.data, 0, nbytes); - tlen = 0; if (B_TYPE(bk->type) == B_OVERFLOW) { - /* Take up to doff bytes from the record. */ + /* + * In the case of an overflow record, we shift things around + * in the current record rather than allocate a separate copy. + */ memset(©, 0, sizeof(copy)); if ((ret = __db_goff(dbp, ©, bo->tlen, bo->pgno, &t->bt_rdata.data, &t->bt_rdata.ulen)) != 0) return (ret); - tlen += dbt->doff; + + /* Skip any leading data from the original record. */ + tlen = dbt->doff; + p = (u_int8_t *)t->bt_rdata.data + dbt->doff; /* - * If the original record was larger than the offset: - * If dlen > size, shift the remaining data down. - * If dlen < size, shift the remaining data up. + * Copy in any trailing data from the original record. + * + * If the original record was larger than the original offset + * plus the bytes being deleted, there is trailing data in the + * original record we need to preserve. If we aren't deleting + * the same number of bytes as we're inserting, copy it up or + * down, into place. + * * Use memmove(), the regions may overlap. */ - p = t->bt_rdata.data; - if (bo->tlen > dbt->doff) - if (dbt->dlen > dbt->size) { - tlen += len = bo->tlen - - dbt->doff - (dbt->dlen - dbt->size); - memmove(p + dbt->doff + dbt->size, - p + dbt->doff + dbt->dlen, len); - } else if (dbt->dlen < dbt->size) { - tlen += len = bo->tlen - - dbt->doff - (dbt->size - dbt->dlen); - memmove(p + dbt->doff + dbt->dlen, - p + dbt->doff + dbt->size, len); - } else - tlen += bo->tlen - dbt->doff; + if (bo->tlen > dbt->doff + dbt->dlen) { + len = bo->tlen - (dbt->doff + dbt->dlen); + if (dbt->dlen != dbt->size) + memmove(p + dbt->size, p + dbt->dlen, len); + tlen += len; + } - /* Copy in the user's data. */ - memcpy((u_int8_t *)t->bt_rdata.data + dbt->doff, - dbt->data, dbt->size); + /* Copy in the application provided data. */ + memcpy(p, dbt->data, dbt->size); tlen += dbt->size; } else { - /* Take up to doff bytes from the record. */ + /* Copy in any leading data from the original record. */ memcpy(t->bt_rdata.data, bk->data, dbt->doff > bk->len ? bk->len : dbt->doff); - tlen += dbt->doff; + tlen = dbt->doff; + p = (u_int8_t *)t->bt_rdata.data + dbt->doff; - /* Copy in the user's data. */ - memcpy((u_int8_t *)t->bt_rdata.data + - dbt->doff, dbt->data, dbt->size); + /* Copy in the application provided data. */ + memcpy(p, dbt->data, dbt->size); tlen += dbt->size; - /* Copy in any remaining data. */ + /* Copy in any trailing data from the original record. */ len = dbt->doff + dbt->dlen; if (bk->len > len) { - memcpy((u_int8_t *)t->bt_rdata.data + dbt->doff + - dbt->size, bk->data + len, bk->len - len); + memcpy(p + dbt->size, bk->data + len, bk->len - len); tlen += bk->len - len; } } diff --git a/db2/btree/bt_rec.c b/db2/btree/bt_rec.c index 90ee13764e..fe33825ec4 100644 --- a/db2/btree/bt_rec.c +++ b/db2/btree/bt_rec.c @@ -1,23 +1,20 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_rec.c 10.18 (Sleepycat) 12/15/97"; +static const char sccsid[] = "@(#)bt_rec.c 10.21 (Sleepycat) 4/28/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <ctype.h> #include <errno.h> -#include <stddef.h> -#include <stdlib.h> #include <string.h> #endif @@ -27,7 +24,6 @@ static const char sccsid[] = "@(#)bt_rec.c 10.18 (Sleepycat) 12/15/97"; #include "hash.h" #include "btree.h" #include "log.h" -#include "db_dispatch.h" #include "common_ext.h" /* @@ -51,7 +47,7 @@ __bam_pg_alloc_recover(logp, dbtp, lsnp, redo, info) PAGE *pagep; DB *file_dbp, *mdbp; db_pgno_t pgno; - int cmp_n, cmp_p, created, modified, ret; + int cmp_n, cmp_p, modified, ret; REC_PRINT(__bam_pg_alloc_print); REC_INTRO(__bam_pg_alloc_read); @@ -86,18 +82,17 @@ __bam_pg_alloc_recover(logp, dbtp, lsnp, redo, info) } /* Fix up the allocated page. */ - created = IS_ZERO_LSN(LSN(pagep)); modified = 0; cmp_n = log_compare(lsnp, &LSN(pagep)); cmp_p = log_compare(&LSN(pagep), &argp->page_lsn); - if ((created || cmp_p == 0) && redo) { + if (cmp_p == 0 && redo) { /* Need to redo update described. */ P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID, PGNO_INVALID, 0, argp->ptype); pagep->lsn = *lsnp; modified = 1; - } else if ((created || cmp_n == 0) && !redo) { + } else if (cmp_n == 0 && !redo) { /* Need to undo update described. */ P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID, meta->free, 0, P_INVALID); diff --git a/db2/btree/bt_recno.c b/db2/btree/bt_recno.c index 70ab63b8d4..38dbbd1c55 100644 --- a/db2/btree/bt_recno.c +++ b/db2/btree/bt_recno.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_recno.c 10.26 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)bt_recno.c 10.37 (Sleepycat) 5/23/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -16,8 +16,6 @@ static const char sccsid[] = "@(#)bt_recno.c 10.26 (Sleepycat) 1/8/98"; #include <errno.h> #include <limits.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> #endif @@ -25,16 +23,17 @@ static const char sccsid[] = "@(#)bt_recno.c 10.26 (Sleepycat) 1/8/98"; #include "db_page.h" #include "btree.h" -static int __ram_add __P((DB *, db_recno_t *, DBT *, int, int)); +static int __ram_add __P((DB *, db_recno_t *, DBT *, u_int32_t, u_int32_t)); static int __ram_c_close __P((DBC *)); -static int __ram_c_del __P((DBC *, int)); -static int __ram_c_get __P((DBC *, DBT *, DBT *, int)); -static int __ram_c_put __P((DBC *, DBT *, DBT *, int)); +static int __ram_c_del __P((DBC *, u_int32_t)); +static int __ram_c_get __P((DBC *, DBT *, DBT *, u_int32_t)); +static int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t)); static int __ram_fmap __P((DB *, db_recno_t)); -static int __ram_get __P((DB *, DB_TXN *, DBT *, DBT *, int)); -static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, int)); +static int __ram_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); +static int __ram_iget __P((DB *, DBT *, DBT *)); +static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); static int __ram_source __P((DB *, RECNO *, const char *)); -static int __ram_sync __P((DB *, int)); +static int __ram_sync __P((DB *, u_int32_t)); static int __ram_update __P((DB *, db_recno_t, int)); static int __ram_vmap __P((DB *, db_recno_t)); static int __ram_writeback __P((DB *)); @@ -142,7 +141,7 @@ __ram_open(dbp, type, dbinfo) err: /* If we mmap'd a source file, discard it. */ if (rp->re_smap != NULL) - (void)__db_unmap(rp->re_smap, rp->re_msize); + (void)__db_unmapfile(rp->re_smap, rp->re_msize); /* If we opened a source file, discard it. */ if (rp->re_fd != -1) @@ -199,9 +198,9 @@ __ram_cursor(dbp, txn, dbcp) * All cursors are queued from the master DB structure. Add the * cursor to that queue. */ - DB_THREAD_LOCK(dbp); + CURSOR_SETUP(dbp); TAILQ_INSERT_HEAD(&dbp->curs_queue, dbc, links); - DB_THREAD_UNLOCK(dbp); + CURSOR_TEARDOWN(dbp); *dbcp = dbc; return (0); @@ -216,16 +215,10 @@ __ram_get(argdbp, txn, key, data, flags) DB *argdbp; DB_TXN *txn; DBT *key, *data; - int flags; + u_int32_t flags; { - BTREE *t; DB *dbp; - PAGE *h; - db_indx_t indx; - db_recno_t recno; - int exact, ret, stack; - - stack = 0; + int ret; DEBUG_LWRITE(argdbp, txn, "ram_get", key, NULL, flags); @@ -234,6 +227,30 @@ __ram_get(argdbp, txn, key, data, flags) return (ret); GETHANDLE(argdbp, txn, &dbp, ret); + + ret = __ram_iget(dbp, key, data); + + PUTHANDLE(dbp); + return (ret); +} + +/* + * __ram_iget -- + * Internal ram get function, called for both standard and cursor + * get after the flags have been checked. + */ +static int +__ram_iget(dbp, key, data) + DB *dbp; + DBT *key, *data; +{ + BTREE *t; + PAGE *h; + db_indx_t indx; + db_recno_t recno; + int exact, ret, stack; + + stack = 0; t = dbp->internal; /* Check the user's record number and fill in as necessary. */ @@ -265,7 +282,6 @@ done: /* Discard the stack. */ if (stack) __bam_stkrel(dbp); - PUTHANDLE(dbp); return (ret); } @@ -278,7 +294,7 @@ __ram_put(argdbp, txn, key, data, flags) DB *argdbp; DB_TXN *txn; DBT *key, *data; - int flags; + u_int32_t flags; { BTREE *t; DB *dbp; @@ -324,7 +340,7 @@ __ram_put(argdbp, txn, key, data, flags) static int __ram_sync(argdbp, flags) DB *argdbp; - int flags; + u_int32_t flags; { DB *dbp; int ret; @@ -361,7 +377,7 @@ __ram_close(argdbp) /* Close any underlying mmap region. */ if (rp->re_smap != NULL) - (void)__db_unmap(rp->re_smap, rp->re_msize); + (void)__db_unmapfile(rp->re_smap, rp->re_msize); /* Close any backing source file descriptor. */ if (rp->re_fd != -1) @@ -403,17 +419,10 @@ __ram_c_iclose(dbp, dbc) DB *dbp; DBC *dbc; { - /* - * All cursors are queued from the master DB structure. For - * now, discard the DB handle which triggered this call, and - * replace it with the cursor's reference. - */ - dbp = dbc->dbp; - /* Remove the cursor from the queue. */ - DB_THREAD_LOCK(dbp); + CURSOR_SETUP(dbp); TAILQ_REMOVE(&dbp->curs_queue, dbc, links); - DB_THREAD_UNLOCK(dbp); + CURSOR_TEARDOWN(dbp); /* Discard the structures. */ FREE(dbc->internal, sizeof(RCURSOR)); @@ -429,7 +438,7 @@ __ram_c_iclose(dbp, dbc) static int __ram_c_del(dbc, flags) DBC *dbc; - int flags; + u_int32_t flags; { DBT key; RCURSOR *cp; @@ -466,7 +475,7 @@ static int __ram_c_get(dbc, key, data, flags) DBC *dbc; DBT *key, *data; - int flags; + u_int32_t flags; { BTREE *t; DB *dbp; @@ -537,7 +546,7 @@ retry: /* Update the record number. */ /* * Return the key if the user didn't give us one, and then pass it - * into __ram_get(). + * into __ram_iget(). */ if (flags != DB_SET && flags != DB_SET_RANGE && (ret = __db_retcopy(key, &cp->recno, sizeof(cp->recno), @@ -555,7 +564,7 @@ retry: /* Update the record number. */ * * Skip any keys that don't really exist. */ - if ((ret = __ram_get(dbp, dbc->txn, key, data, 0)) != 0) + if ((ret = __ram_iget(dbp, key, data)) != 0) if (ret == DB_KEYEMPTY && (flags == DB_NEXT || flags == DB_PREV)) goto retry; @@ -575,7 +584,7 @@ static int __ram_c_put(dbc, key, data, flags) DBC *dbc; DBT *key, *data; - int flags; + u_int32_t flags; { BTREE *t; RCURSOR *cp, copy; @@ -624,28 +633,21 @@ split: arg = &cp->recno; if ((ret = __bam_stkrel(dbp)) != 0) goto err; - if (flags != DB_CURRENT) { - /* Adjust the counts. */ - if ((ret = __bam_adjust(dbp, t, 1)) != 0) - goto err; - - switch (flags) { - case DB_AFTER: - /* Adjust the cursors. */ - __ram_ca(dbp, cp->recno, CA_IAFTER); - - /* Set this cursor to reference the new record. */ - cp->recno = copy.recno + 1; - break; - case DB_BEFORE: - /* Adjust the cursors. */ - __ram_ca(dbp, cp->recno, CA_IBEFORE); + switch (flags) { + case DB_AFTER: + /* Adjust the cursors. */ + __ram_ca(dbp, cp->recno, CA_IAFTER); - /* Set this cursor to reference the new record. */ - cp->recno = copy.recno; - break; - } + /* Set this cursor to reference the new record. */ + cp->recno = copy.recno + 1; + break; + case DB_BEFORE: + /* Adjust the cursors. */ + __ram_ca(dbp, cp->recno, CA_IBEFORE); + /* Set this cursor to reference the new record. */ + cp->recno = copy.recno; + break; } /* @@ -679,7 +681,7 @@ __ram_ca(dbp, recno, op) /* * Adjust the cursors. See the comment in __bam_ca_delete(). */ - DB_THREAD_LOCK(dbp); + CURSOR_SETUP(dbp); for (dbc = TAILQ_FIRST(&dbp->curs_queue); dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { cp = (RCURSOR *)dbc->internal; @@ -698,7 +700,7 @@ __ram_ca(dbp, recno, op) break; } } - DB_THREAD_UNLOCK(dbp); + CURSOR_TEARDOWN(dbp); } #ifdef DEBUG @@ -715,14 +717,15 @@ __ram_cprint(dbp) DBC *dbc; RCURSOR *cp; - DB_THREAD_LOCK(dbp); + CURSOR_SETUP(dbp); for (dbc = TAILQ_FIRST(&dbp->curs_queue); dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { cp = (RCURSOR *)dbc->internal; fprintf(stderr, "%#0x: recno: %lu\n", (u_int)cp, (u_long)cp->recno); } - DB_THREAD_UNLOCK(dbp); + CURSOR_TEARDOWN(dbp); + return (0); } #endif /* DEBUG */ @@ -853,11 +856,11 @@ __ram_source(dbp, rp, fname) const char *fname; { size_t size; - u_int32_t mbytes, bytes; - int oflags, ret; + u_int32_t bytes, mbytes, oflags; + int ret; if ((ret = __db_appname(dbp->dbenv, - DB_APP_DATA, NULL, fname, NULL, &rp->re_source)) != 0) + DB_APP_DATA, NULL, fname, 0, NULL, &rp->re_source)) != 0) return (ret); oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0; @@ -886,7 +889,8 @@ __ram_source(dbp, rp, fname) } size = mbytes * MEGABYTE + bytes; - if ((ret = __db_map(rp->re_fd, (size_t)size, 1, 1, &rp->re_smap)) != 0) + if ((ret = __db_mapfile(rp->re_source, + rp->re_fd, (size_t)size, 1, &rp->re_smap)) != 0) goto err; rp->re_cmap = rp->re_smap; rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size); @@ -952,7 +956,7 @@ __ram_writeback(dbp) * open will fail. */ if (rp->re_smap != NULL) { - (void)__db_unmap(rp->re_smap, rp->re_msize); + (void)__db_unmapfile(rp->re_smap, rp->re_msize); rp->re_smap = NULL; } @@ -1078,19 +1082,22 @@ __ram_fmap(dbp, top) sp = (u_int8_t *)rp->re_cmap; ep = (u_int8_t *)rp->re_emap; - while (recno <= top) { + while (recno < top) { if (sp >= ep) { F_SET(rp, RECNO_EOF); return (DB_NOTFOUND); } len = rp->re_len; for (p = t->bt_rdata.data; - sp < ep && len > 0; *p++ = *sp++, --len); + sp < ep && len > 0; *p++ = *sp++, --len) + ; /* - * Another process may have read some portion of the input - * file already, in which case we just want to discard the - * new record. + * Another process may have read this record from the input + * file and stored it into the database already, in which + * case we don't need to repeat that operation. We detect + * this by checking if the last record we've read is greater + * or equal to the number of records in the database. * * XXX * We should just do a seek, since the records are fixed @@ -1138,17 +1145,20 @@ __ram_vmap(dbp, top) sp = (u_int8_t *)rp->re_cmap; ep = (u_int8_t *)rp->re_emap; - while (recno <= top) { + while (recno < top) { if (sp >= ep) { F_SET(rp, RECNO_EOF); return (DB_NOTFOUND); } - for (data.data = sp; sp < ep && *sp != delim; ++sp); + for (data.data = sp; sp < ep && *sp != delim; ++sp) + ; /* - * Another process may have read some portion of the input - * file already, in which case we just want to discard the - * new record. + * Another process may have read this record from the input + * file and stored it into the database already, in which + * case we don't need to repeat that operation. We detect + * this by checking if the last record we've read is greater + * or equal to the number of records in the database. */ if (rp->re_last >= recno) { data.size = sp - (u_int8_t *)data.data; @@ -1172,12 +1182,13 @@ __ram_add(dbp, recnop, data, flags, bi_flags) DB *dbp; db_recno_t *recnop; DBT *data; - int flags, bi_flags; + u_int32_t flags, bi_flags; { + BKEYDATA *bk; BTREE *t; PAGE *h; db_indx_t indx; - int exact, ret, stack; + int exact, isdeleted, ret, stack; t = dbp->internal; @@ -1190,34 +1201,63 @@ retry: /* Find the slot for insertion. */ stack = 1; /* - * The recno access method doesn't currently support duplicates, so - * if an identical key is already in the tree we're either overwriting - * it or an error is returned. + * If DB_NOOVERWRITE is set and the item already exists in the tree, + * return an error unless the item has been marked for deletion. */ - if (exact && LF_ISSET(DB_NOOVERWRITE)) { - ret = DB_KEYEXIST; - goto err; + isdeleted = 0; + if (exact) { + bk = GET_BKEYDATA(h, indx); + if (B_DISSET(bk->type)) { + isdeleted = 1; + __bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP); + } else + if (LF_ISSET(DB_NOOVERWRITE)) { + ret = DB_KEYEXIST; + goto err; + } } /* * Select the arguments for __bam_iitem() and do the insert. If the * key is an exact match, or we're replacing the data item with a - * new data item. If the key isn't an exact match, we're inserting - * a new key/data pair, before the search location. + * new data item, replace the current item. If the key isn't an exact + * match, we're inserting a new key/data pair, before the search + * location. */ - if ((ret = __bam_iitem(dbp, &h, &indx, NULL, - data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) == DB_NEEDSPLIT) { + switch (ret = __bam_iitem(dbp, + &h, &indx, NULL, data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) { + case 0: + /* + * Done. Clean up the cursor and adjust the internal page + * counts. + */ + if (isdeleted) + __bam_ca_replace(dbp, h->pgno, indx, REPLACE_SUCCESS); + break; + case DB_NEEDSPLIT: + /* + * We have to split the page. Back out the cursor setup, + * discard the stack of pages, and do the split. + */ + if (isdeleted) + __bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED); + (void)__bam_stkrel(dbp); stack = 0; + if ((ret = __bam_split(dbp, recnop)) != 0) - goto err; + break; + goto retry; + /* NOTREACHED */ + default: + if (isdeleted) + __bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED); + break; } - if (!exact && ret == 0) - __bam_adjust(dbp, t, 1); - err: if (stack) __bam_stkrel(dbp); + return (ret); } diff --git a/db2/btree/bt_rsearch.c b/db2/btree/bt_rsearch.c index ee26221e25..caa6b3515e 100644 --- a/db2/btree/bt_rsearch.c +++ b/db2/btree/bt_rsearch.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -44,14 +44,11 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_rsearch.c 10.8 (Sleepycat) 8/24/97"; +static const char sccsid[] = "@(#)bt_rsearch.c 10.15 (Sleepycat) 5/6/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> - -#include <stdio.h> -#include <stdlib.h> #endif #include "db_int.h" @@ -62,13 +59,13 @@ static const char sccsid[] = "@(#)bt_rsearch.c 10.8 (Sleepycat) 8/24/97"; * __bam_rsearch -- * Search a btree for a record number. * - * PUBLIC: int __bam_rsearch __P((DB *, db_recno_t *, u_int, int, int *)); + * PUBLIC: int __bam_rsearch __P((DB *, db_recno_t *, u_int32_t, int, int *)); */ int __bam_rsearch(dbp, recnop, flags, stop, exactp) DB *dbp; db_recno_t *recnop; - u_int flags; + u_int32_t flags; int stop, *exactp; { BINTERNAL *bi; @@ -78,7 +75,7 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp) RINTERNAL *ri; db_indx_t indx, top; db_pgno_t pg; - db_recno_t recno, total; + db_recno_t i, recno, total; int isappend, ret, stack; t = dbp->internal; @@ -136,8 +133,7 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp) *exactp = 1; else { *exactp = 0; - if (flags == S_DELETE || - flags == S_FIND || recno > total + 1) { + if (!PAST_END_OK(flags) || recno > total + 1) { (void)memp_fput(dbp->mpf, h, 0); (void)__BT_LPUT(dbp, lock); return (DB_NOTFOUND); @@ -164,30 +160,65 @@ __bam_rsearch(dbp, recnop, flags, stop, exactp) stack = 1; } - /* Records in the tree are 0-based, and record numbers are 1-based. */ - --recno; - + /* + * !!! + * Record numbers in the tree are 0-based, but the recno is + * 1-based. All of the calculations below have to take this + * into account. + */ for (total = 0;;) { switch (TYPE(h)) { case P_LBTREE: - BT_STK_ENTER(t, h, (recno - total) * P_INDX, lock, ret); + recno -= total; + + /* + * There may be logically deleted records on the page, + * walk the page correcting for them. The record may + * not exist if there are enough deleted records in the + * page. + */ + if (recno <= NUM_ENT(h)) + for (i = recno - 1;; --i) { + if (B_DISSET(GET_BKEYDATA(h, + i * P_INDX + O_INDX)->type)) + ++recno; + if (i == 0) + break; + } + if (recno > NUM_ENT(h)) { + *exactp = 0; + if (!PAST_END_OK(flags) || + recno > (db_recno_t)(NUM_ENT(h) + 1)) { + ret = DB_NOTFOUND; + goto err; + } + + } + + /* Correct from 1-based to 0-based for a page offset. */ + --recno; + BT_STK_ENTER(t, h, recno * P_INDX, lock, ret); return (ret); case P_IBTREE: for (indx = 0, top = NUM_ENT(h);;) { bi = GET_BINTERNAL(h, indx); - if (++indx == top || total + bi->nrecs > recno) + if (++indx == top || total + bi->nrecs >= recno) break; total += bi->nrecs; } pg = bi->pgno; break; case P_LRECNO: - BT_STK_ENTER(t, h, recno - total, lock, ret); + recno -= total; + + /* Correct from 1-based to 0-based for a page offset. */ + --recno; + BT_STK_ENTER(t, h, recno, lock, ret); return (ret); case P_IRECNO: for (indx = 0, top = NUM_ENT(h);;) { ri = GET_RINTERNAL(h, indx); - if (++indx == top || total + ri->nrecs > recno) + if (++indx == top || total + ri->nrecs >= recno) break; total += ri->nrecs; } @@ -244,13 +275,13 @@ err: BT_STK_POP(t); * __bam_adjust -- * Adjust the tree after adding or deleting a record. * - * PUBLIC: int __bam_adjust __P((DB *, BTREE *, int)); + * PUBLIC: int __bam_adjust __P((DB *, BTREE *, int32_t)); */ int __bam_adjust(dbp, t, adjust) DB *dbp; BTREE *t; - int adjust; + int32_t adjust; { EPG *epg; PAGE *h; @@ -264,7 +295,7 @@ __bam_adjust(dbp, t, adjust) (ret = __bam_cadjust_log(dbp->dbenv->lg_info, dbp->txn, &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h), (u_int32_t)epg->indx, - (int32_t)adjust, 1)) != 0) + adjust, 1)) != 0) return (ret); if (TYPE(h) == P_IBTREE) @@ -322,26 +353,31 @@ db_recno_t __bam_total(h) PAGE *h; { - db_recno_t recs; - db_indx_t nxt, top; + db_recno_t nrecs; + db_indx_t indx, top; + + nrecs = 0; + top = NUM_ENT(h); switch (TYPE(h)) { case P_LBTREE: - recs = NUM_ENT(h) / 2; + /* Check for logically deleted records. */ + for (indx = 0; indx < top; indx += P_INDX) + if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type)) + ++nrecs; break; case P_IBTREE: - for (recs = 0, nxt = 0, top = NUM_ENT(h); nxt < top; ++nxt) - recs += GET_BINTERNAL(h, nxt)->nrecs; + for (indx = 0; indx < top; indx += O_INDX) + nrecs += GET_BINTERNAL(h, indx)->nrecs; break; case P_LRECNO: - recs = NUM_ENT(h); + nrecs = NUM_ENT(h); break; case P_IRECNO: - for (recs = 0, nxt = 0, top = NUM_ENT(h); nxt < top; ++nxt) - recs += GET_RINTERNAL(h, nxt)->nrecs; + for (indx = 0; indx < top; indx += O_INDX) + nrecs += GET_RINTERNAL(h, indx)->nrecs; break; - default: - abort(); } - return (recs); + + return (nrecs); } diff --git a/db2/btree/bt_search.c b/db2/btree/bt_search.c index c39c9af322..09ce46d90a 100644 --- a/db2/btree/bt_search.c +++ b/db2/btree/bt_search.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -47,15 +47,13 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_search.c 10.9 (Sleepycat) 11/18/97"; +static const char sccsid[] = "@(#)bt_search.c 10.15 (Sleepycat) 5/6/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> #endif @@ -68,13 +66,13 @@ static const char sccsid[] = "@(#)bt_search.c 10.9 (Sleepycat) 11/18/97"; * Search a btree for a key. * * PUBLIC: int __bam_search __P((DB *, - * PUBLIC: const DBT *, u_int, int, db_recno_t *, int *)); + * PUBLIC: const DBT *, u_int32_t, int, db_recno_t *, int *)); */ int __bam_search(dbp, key, flags, stop, recnop, exactp) DB *dbp; const DBT *key; - u_int flags; + u_int32_t flags; int stop, *exactp; db_recno_t *recnop; { @@ -109,8 +107,7 @@ __bam_search(dbp, key, flags, stop, recnop, exactp) * Retrieve the root page. */ pg = PGNO_ROOT; - stack = F_ISSET(dbp, DB_BT_RECNUM) && - (flags == S_INSERT || flags == S_DELETE); + stack = F_ISSET(dbp, DB_BT_RECNUM) && LF_ISSET(S_STACK); if ((ret = __bam_lget(dbp, 0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0) return (ret); @@ -179,6 +176,14 @@ __bam_search(dbp, key, flags, stop, recnop, exactp) if (LF_ISSET(S_EXACT)) goto notfound; + /* + * !!! + * Possibly returning a deleted record -- DB_SET_RANGE, + * DB_KEYFIRST and DB_KEYLAST don't require an exact + * match, and we don't want to walk multiple pages here + * to find an undeleted record. This is handled in the + * __bam_c_search() routine. + */ BT_STK_ENTER(t, h, base, lock, ret); return (ret); } @@ -249,7 +254,10 @@ match: *exactp = 1; /* * If we got here, we know that we have a btree leaf page. * - * If there are duplicates, go to the first/last one. + * If there are duplicates, go to the first/last one. This is + * safe because we know that we're not going to leave the page, + * all duplicate sets that are not on overflow pages exist on a + * single leaf page. */ if (LF_ISSET(S_DUPLAST)) while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) && @@ -261,8 +269,8 @@ match: *exactp = 1; indx -= P_INDX; /* - * Now check if we are allowed to return deleted item; if not - * find/last the first non-deleted item. + * Now check if we are allowed to return deleted items; if not + * find the next (or previous) non-deleted item. */ if (LF_ISSET(S_DELNO)) { if (LF_ISSET(S_DUPLAST)) diff --git a/db2/btree/bt_split.c b/db2/btree/bt_split.c index 219d486dc5..da9417c781 100644 --- a/db2/btree/bt_split.c +++ b/db2/btree/bt_split.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -44,7 +44,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_split.c 10.18 (Sleepycat) 11/23/97"; +static const char sccsid[] = "@(#)bt_split.c 10.23 (Sleepycat) 5/23/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -52,8 +52,6 @@ static const char sccsid[] = "@(#)bt_split.c 10.18 (Sleepycat) 11/23/97"; #include <errno.h> #include <limits.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> #endif @@ -168,8 +166,10 @@ __bam_root(dbp, cp) t = dbp->internal; /* Yeah, right. */ - if (cp->page->level >= MAXBTREELEVEL) - return (ENOSPC); + if (cp->page->level >= MAXBTREELEVEL) { + ret = ENOSPC; + goto err; + } /* Create new left and right pages for the split. */ lp = rp = NULL; @@ -237,18 +237,16 @@ __bam_page(dbp, pp, cp) DB *dbp; EPG *pp, *cp; { - BTREE *t; DB_LOCK tplock; PAGE *lp, *rp, *tp; int ret; - t = dbp->internal; lp = rp = tp = NULL; ret = -1; /* Create new right page for the split. */ if ((ret = __bam_new(dbp, TYPE(cp->page), &rp)) != 0) - return (ret); + goto err; P_INIT(rp, dbp->pgsize, rp->pgno, ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->pgno, ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->next_pgno, @@ -259,7 +257,7 @@ __bam_page(dbp, pp, cp) ret = ENOMEM; goto err; } -#ifdef DEBUG +#ifdef DIAGNOSTIC memset(lp, 0xff, dbp->pgsize); #endif P_INIT(lp, dbp->pgsize, cp->page->pgno, @@ -906,13 +904,13 @@ __bam_copy(dbp, pp, cp, nxt, stop) PAGE *pp, *cp; u_int32_t nxt, stop; { - db_indx_t dup, nbytes, off; + db_indx_t nbytes, off; /* * Copy the rest of the data to the right page. Nxt is the next * offset placed on the target page. */ - for (dup = off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) { + for (off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) { switch (TYPE(pp)) { case P_IBTREE: if (B_TYPE(GET_BINTERNAL(pp, nxt)->type) == B_KEYDATA) diff --git a/db2/btree/bt_stat.c b/db2/btree/bt_stat.c index e88b5dac2d..2236434b38 100644 --- a/db2/btree/bt_stat.c +++ b/db2/btree/bt_stat.c @@ -1,21 +1,20 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_stat.c 10.14 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)bt_stat.c 10.17 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <stdlib.h> #include <string.h> #endif @@ -29,14 +28,14 @@ static void __bam_add_rstat __P((DB_BTREE_LSTAT *, DB_BTREE_STAT *)); * __bam_stat -- * Gather/print the btree statistics * - * PUBLIC: int __bam_stat __P((DB *, void *, void *(*)(size_t), int)); + * PUBLIC: int __bam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t)); */ int __bam_stat(argdbp, spp, db_malloc, flags) DB *argdbp; void *spp; void *(*db_malloc) __P((size_t)); - int flags; + u_int32_t flags; { BTMETA *meta; BTREE *t; diff --git a/db2/btree/btree.src b/db2/btree/btree.src index 6145696d28..928dce2196 100644 --- a/db2/btree/btree.src +++ b/db2/btree/btree.src @@ -1,16 +1,12 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. + * + * @(#)btree.src 10.8 (Sleepycat) 4/10/98 */ -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)btree.src 10.6 (Sleepycat) 11/2/97"; -#endif /* not lint */ - PREFIX bam /* diff --git a/db2/btree/btree_auto.c b/db2/btree/btree_auto.c index 18bbd5db37..75eadb1d62 100644 --- a/db2/btree/btree_auto.c +++ b/db2/btree/btree_auto.c @@ -15,8 +15,6 @@ #include "db_dispatch.h" #include "btree.h" #include "db_am.h" -#include "common_ext.h" - /* * PUBLIC: int __bam_pg_alloc_log * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, @@ -85,7 +83,7 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags, bp += sizeof(ptype); memcpy(bp, &next, sizeof(next)); bp += sizeof(next); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -101,22 +99,23 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__bam_pg_alloc_print(notused1, dbtp, lsnp, notused3, notused4) +__bam_pg_alloc_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __bam_pg_alloc_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __bam_pg_alloc_read(dbtp->data, &argp)) != 0) return (ret); @@ -249,7 +248,7 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags, } memcpy(bp, &next, sizeof(next)); bp += sizeof(next); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -265,22 +264,23 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__bam_pg_free_print(notused1, dbtp, lsnp, notused3, notused4) +__bam_pg_free_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __bam_pg_free_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __bam_pg_free_read(dbtp->data, &argp)) != 0) return (ret); @@ -297,11 +297,11 @@ __bam_pg_free_print(notused1, dbtp, lsnp, notused3, notused4) (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset); printf("\theader: "); for (i = 0; i < argp->header.size; i++) { - c = ((char *)argp->header.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->header.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tnext: %lu\n", (u_long)argp->next); @@ -443,7 +443,7 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags, memcpy(bp, pg->data, pg->size); bp += pg->size; } -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -459,22 +459,23 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__bam_split_print(notused1, dbtp, lsnp, notused3, notused4) +__bam_split_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __bam_split_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __bam_split_read(dbtp->data, &argp)) != 0) return (ret); @@ -498,11 +499,11 @@ __bam_split_print(notused1, dbtp, lsnp, notused3, notused4) (u_long)argp->nlsn.file, (u_long)argp->nlsn.offset); printf("\tpg: "); for (i = 0; i < argp->pg.size; i++) { - c = ((char *)argp->pg.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->pg.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\n"); @@ -639,7 +640,7 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags, else memset(bp, 0, sizeof(*rootlsn)); bp += sizeof(*rootlsn); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -655,22 +656,23 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4) +__bam_rsplit_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __bam_rsplit_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __bam_rsplit_read(dbtp->data, &argp)) != 0) return (ret); @@ -685,21 +687,21 @@ __bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tpgno: %lu\n", (u_long)argp->pgno); printf("\tpgdbt: "); for (i = 0; i < argp->pgdbt.size; i++) { - c = ((char *)argp->pgdbt.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->pgdbt.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tnrec: %lu\n", (u_long)argp->nrec); printf("\trootent: "); for (i = 0; i < argp->rootent.size; i++) { - c = ((char *)argp->rootent.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->rootent.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\trootlsn: [%lu][%lu]\n", @@ -817,7 +819,7 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags, bp += sizeof(indx_copy); memcpy(bp, &is_insert, sizeof(is_insert)); bp += sizeof(is_insert); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -833,22 +835,23 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__bam_adj_print(notused1, dbtp, lsnp, notused3, notused4) +__bam_adj_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __bam_adj_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __bam_adj_read(dbtp->data, &argp)) != 0) return (ret); @@ -975,7 +978,7 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags, bp += sizeof(adjust); memcpy(bp, &total, sizeof(total)); bp += sizeof(total); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -991,22 +994,23 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__bam_cadjust_print(notused1, dbtp, lsnp, notused3, notused4) +__bam_cadjust_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __bam_cadjust_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __bam_cadjust_read(dbtp->data, &argp)) != 0) return (ret); @@ -1124,7 +1128,7 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags, bp += sizeof(*lsn); memcpy(bp, &indx, sizeof(indx)); bp += sizeof(indx); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -1140,22 +1144,23 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__bam_cdel_print(notused1, dbtp, lsnp, notused3, notused4) +__bam_cdel_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __bam_cdel_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __bam_cdel_read(dbtp->data, &argp)) != 0) return (ret); @@ -1307,7 +1312,7 @@ int __bam_repl_log(logp, txnid, ret_lsnp, flags, bp += sizeof(prefix); memcpy(bp, &suffix, sizeof(suffix)); bp += sizeof(suffix); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -1323,22 +1328,23 @@ int __bam_repl_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__bam_repl_print(notused1, dbtp, lsnp, notused3, notused4) +__bam_repl_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __bam_repl_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __bam_repl_read(dbtp->data, &argp)) != 0) return (ret); @@ -1357,20 +1363,20 @@ __bam_repl_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tisdeleted: %lu\n", (u_long)argp->isdeleted); printf("\torig: "); for (i = 0; i < argp->orig.size; i++) { - c = ((char *)argp->orig.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->orig.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\trepl: "); for (i = 0; i < argp->repl.size; i++) { - c = ((char *)argp->repl.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->repl.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tprefix: %lu\n", (u_long)argp->prefix); diff --git a/db2/clib/getlong.c b/db2/clib/getlong.c index 85f4e8c9e2..4e144b14dc 100644 --- a/db2/clib/getlong.c +++ b/db2/clib/getlong.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)getlong.c 10.2 (Sleepycat) 5/1/97"; +static const char sccsid[] = "@(#)getlong.c 10.3 (Sleepycat) 4/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES diff --git a/db2/common/db_appinit.c b/db2/common/db_appinit.c index 4ee9e4f40c..6ec007be0a 100644 --- a/db2/common/db_appinit.c +++ b/db2/common/db_appinit.c @@ -1,23 +1,21 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_appinit.c 10.38 (Sleepycat) 1/7/98"; +static const char sccsid[] = "@(#)db_appinit.c 10.52 (Sleepycat) 6/2/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES -#include <sys/param.h> -#include <sys/stat.h> +#include <sys/types.h> #include <ctype.h> #include <errno.h> -#include <fcntl.h> #include <signal.h> #include <stdlib.h> #include <string.h> @@ -34,14 +32,14 @@ static const char sccsid[] = "@(#)db_appinit.c 10.38 (Sleepycat) 1/7/98"; #include "clib_ext.h" #include "common_ext.h" -static int __db_home __P((DB_ENV *, const char *, int)); +static int __db_home __P((DB_ENV *, const char *, u_int32_t)); static int __db_parse __P((DB_ENV *, char *)); -static int __db_tmp_dir __P((DB_ENV *, int)); -static int __db_tmp_open __P((DB_ENV *, char *, int *)); +static int __db_tmp_dir __P((DB_ENV *, u_int32_t)); +static int __db_tmp_open __P((DB_ENV *, u_int32_t, char *, int *)); /* * db_version -- - * Return verision information. + * Return version information. */ char * db_version(majverp, minverp, patchp) @@ -65,16 +63,18 @@ db_appinit(db_home, db_config, dbenv, flags) const char *db_home; char * const *db_config; DB_ENV *dbenv; - int flags; + u_int32_t flags; { FILE *fp; - int ret; + int mode, ret; char * const *p; char *lp, buf[MAXPATHLEN * 2]; /* Validate arguments. */ if (dbenv == NULL) return (EINVAL); + + #ifdef HAVE_SPINLOCKS #define OKFLAGS \ (DB_CREATE | DB_NOMMAP | DB_THREAD | DB_INIT_LOCK | DB_INIT_LOG | \ @@ -89,10 +89,9 @@ db_appinit(db_home, db_config, dbenv, flags) if ((ret = __db_fchk(dbenv, "db_appinit", flags, OKFLAGS)) != 0) return (ret); -#define RECOVERY_FLAGS (DB_CREATE | DB_INIT_TXN | DB_INIT_LOG) - if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) && - LF_ISSET(RECOVERY_FLAGS) != RECOVERY_FLAGS) - return (__db_ferr(dbenv, "db_appinit", 1)); + /* Transactions imply logging. */ + if (LF_ISSET(DB_INIT_TXN)) + LF_SET(DB_INIT_LOG); /* Convert the db_appinit(3) flags. */ if (LF_ISSET(DB_THREAD)) @@ -147,47 +146,48 @@ db_appinit(db_home, db_config, dbenv, flags) F_SET(dbenv, DB_ENV_APPINIT); /* - * If we are doing recovery, remove all the regions. + * If we are doing recovery, remove all the old shared memory + * regions. */ if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) { - /* Remove all the old shared memory regions. */ - if ((ret = log_unlink(NULL, 1 /* force */, dbenv)) != 0) + if ((ret = log_unlink(NULL, 1, dbenv)) != 0) goto err; - if ((ret = memp_unlink(NULL, 1 /* force */, dbenv)) != 0) + if ((ret = memp_unlink(NULL, 1, dbenv)) != 0) goto err; - if ((ret = lock_unlink(NULL, 1 /* force */, dbenv)) != 0) + if ((ret = lock_unlink(NULL, 1, dbenv)) != 0) goto err; - if ((ret = txn_unlink(NULL, 1 /* force */, dbenv)) != 0) + if ((ret = txn_unlink(NULL, 1, dbenv)) != 0) goto err; } - /* Transactions imply logging. */ - if (LF_ISSET(DB_INIT_TXN)) - LF_SET(DB_INIT_LOG); - - /* Default permissions are 0660. */ -#undef DB_DEFPERM -#define DB_DEFPERM (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) - - /* Initialize the subsystems. */ + /* + * Create the new shared regions. + * + * Default permissions are read-write for both owner and group. + */ + mode = __db_omode("rwrw--"); if (LF_ISSET(DB_INIT_LOCK) && (ret = lock_open(NULL, LF_ISSET(DB_CREATE | DB_THREAD), - DB_DEFPERM, dbenv, &dbenv->lk_info)) != 0) + mode, dbenv, &dbenv->lk_info)) != 0) goto err; if (LF_ISSET(DB_INIT_LOG) && (ret = log_open(NULL, LF_ISSET(DB_CREATE | DB_THREAD), - DB_DEFPERM, dbenv, &dbenv->lg_info)) != 0) + mode, dbenv, &dbenv->lg_info)) != 0) goto err; if (LF_ISSET(DB_INIT_MPOOL) && (ret = memp_open(NULL, LF_ISSET(DB_CREATE | DB_MPOOL_PRIVATE | DB_NOMMAP | DB_THREAD), - DB_DEFPERM, dbenv, &dbenv->mp_info)) != 0) + mode, dbenv, &dbenv->mp_info)) != 0) goto err; if (LF_ISSET(DB_INIT_TXN) && (ret = txn_open(NULL, LF_ISSET(DB_CREATE | DB_THREAD | DB_TXN_NOSYNC), - DB_DEFPERM, dbenv, &dbenv->tx_info)) != 0) + mode, dbenv, &dbenv->tx_info)) != 0) goto err; - /* Initialize recovery. */ + /* + * If the application is running with transactions, initialize the + * function tables. Once that's done, do recovery for any previous + * run. + */ if (LF_ISSET(DB_INIT_TXN)) { if ((ret = __bam_init_recover(dbenv)) != 0) goto err; @@ -199,12 +199,12 @@ db_appinit(db_home, db_config, dbenv, flags) goto err; if ((ret = __txn_init_recover(dbenv)) != 0) goto err; - } - /* Run recovery if necessary. */ - if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) && (ret = - __db_apprec(dbenv, LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL))) != 0) - goto err; + if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) && + (ret = __db_apprec(dbenv, + LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL))) != 0) + goto err; + } return (ret); @@ -282,21 +282,21 @@ db_appexit(dbenv) * it in allocated space. * * PUBLIC: int __db_appname __P((DB_ENV *, - * PUBLIC: APPNAME, const char *, const char *, int *, char **)); + * PUBLIC: APPNAME, const char *, const char *, u_int32_t, int *, char **)); */ int -__db_appname(dbenv, appname, dir, file, fdp, namep) +__db_appname(dbenv, appname, dir, file, tmp_oflags, fdp, namep) DB_ENV *dbenv; APPNAME appname; const char *dir, *file; + u_int32_t tmp_oflags; int *fdp; char **namep; { DB_ENV etmp; size_t len; - int ret, slash, tmp_create, tmp_free; + int data_entry, ret, slash, tmp_create, tmp_free; const char *a, *b, *c; - int data_entry; char *p, *start; a = b = c = NULL; @@ -349,8 +349,8 @@ __db_appname(dbenv, appname, dir, file, fdp, namep) * * DB_ENV APPNAME RESULT * ------------------------------------------- - * null DB_APP_TMP <tmp>/<create> - * set DB_APP_TMP DB_HOME/DB_TMP_DIR/<create> + * null DB_APP_TMP* <tmp>/<create> + * set DB_APP_TMP* DB_HOME/DB_TMP_DIR/<create> */ retry: switch (appname) { case DB_APP_NONE: @@ -431,7 +431,14 @@ done: len = (c == NULL ? 0 : strlen(c) + 1) + (file == NULL ? 0 : strlen(file) + 1); - if ((start = (char *)__db_malloc(len)) == NULL) { + /* + * Allocate space to hold the current path information, as well as any + * temporary space that we're going to need to create a temporary file + * name. + */ +#define DB_TRAIL "XXXXXX" + if ((start = + (char *)__db_malloc(len + sizeof(DB_TRAIL) + 10)) == NULL) { __db_err(dbenv, "%s", strerror(ENOMEM)); if (tmp_free) FREES(etmp.db_tmp_dir); @@ -460,14 +467,15 @@ done: len = FREES(etmp.db_tmp_dir); /* Create the file if so requested. */ - if (tmp_create) { - ret = __db_tmp_open(dbenv, start, fdp); + if (tmp_create && + (ret = __db_tmp_open(dbenv, tmp_oflags, start, fdp)) != 0) { FREES(start); - } else { - *namep = start; - ret = 0; + return (ret); } - return (ret); + + if (namep != NULL) + *namep = start; + return (0); } /* @@ -478,7 +486,7 @@ static int __db_home(dbenv, db_home, flags) DB_ENV *dbenv; const char *db_home; - int flags; + u_int32_t flags; { const char *p; @@ -532,10 +540,12 @@ __db_parse(dbenv, s) return (ENOMEM); tp = local_s; - while ((name = strsep(&tp, " \t")) != NULL && *name == '\0'); + while ((name = strsep(&tp, " \t")) != NULL && *name == '\0') + ; if (name == NULL) goto illegal; - while ((value = strsep(&tp, " \t")) != NULL && *value == '\0'); + while ((value = strsep(&tp, " \t")) != NULL && *value == '\0') + ; if (value == NULL) { illegal: ret = EINVAL; __db_err(dbenv, "illegal name-value pair: %s", s); @@ -591,7 +601,7 @@ static char *sTempFolder; static int __db_tmp_dir(dbenv, flags) DB_ENV *dbenv; - int flags; + u_int32_t flags; { static const char * list[] = { /* Ordered: see db_appinit(3). */ "/var/tmp", @@ -671,49 +681,45 @@ __db_tmp_dir(dbenv, flags) * Create a temporary file. */ static int -__db_tmp_open(dbenv, dir, fdp) +__db_tmp_open(dbenv, flags, path, fdp) DB_ENV *dbenv; - char *dir; + u_int32_t flags; + char *path; int *fdp; { #ifdef HAVE_SIGFILLSET sigset_t set, oset; #endif u_long pid; - size_t len; - int isdir, ret; - char *trv, buf[MAXPATHLEN]; + int mode, isdir, ret; + const char *p; + char *trv; /* * Check the target directory; if you have six X's and it doesn't * exist, this runs for a *very* long time. */ - if ((ret = __db_exists(dir, &isdir)) != 0) { - __db_err(dbenv, "%s: %s", dir, strerror(ret)); + if ((ret = __db_exists(path, &isdir)) != 0) { + __db_err(dbenv, "%s: %s", path, strerror(ret)); return (ret); } if (!isdir) { - __db_err(dbenv, "%s: %s", dir, strerror(EINVAL)); + __db_err(dbenv, "%s: %s", path, strerror(EINVAL)); return (EINVAL); } /* Build the path. */ -#define DB_TRAIL "/XXXXXX" - if ((len = strlen(dir)) + sizeof(DB_TRAIL) > sizeof(buf)) { - __db_err(dbenv, - "tmp_open: %s: %s", buf, strerror(ENAMETOOLONG)); - return (ENAMETOOLONG); - } - (void)strcpy(buf, dir); - (void)strcpy(buf + len, DB_TRAIL); - buf[len] = PATH_SEPARATOR[0]; /* WIN32 */ + for (trv = path; *trv != '\0'; ++trv) + ; + *trv = PATH_SEPARATOR[0]; + for (p = DB_TRAIL; (*++trv = *p) != '\0'; ++p) + ; /* * Replace the X's with the process ID. Pid should be a pid_t, * but we use unsigned long for portability. */ - for (pid = getpid(), - trv = buf + len + sizeof(DB_TRAIL) - 1; *--trv == 'X'; pid /= 10) + for (pid = getpid(); *--trv == 'X'; pid /= 10) switch (pid % 10) { case 0: *trv = '0'; break; case 1: *trv = '1'; break; @@ -728,30 +734,33 @@ __db_tmp_open(dbenv, dir, fdp) } ++trv; + /* Set up open flags and mode. */ + LF_SET(DB_CREATE | DB_EXCL); + mode = __db_omode("rw----"); + /* - * Try and open a file. We block every signal we can get our hands + * Try to open a file. We block every signal we can get our hands * on so that, if we're interrupted at the wrong time, the temporary * file isn't left around -- of course, if we drop core in-between * the calls we'll hang forever, but that's probably okay. ;-} */ #ifdef HAVE_SIGFILLSET - (void)sigfillset(&set); + if (LF_ISSET(DB_TEMPORARY)) + (void)sigfillset(&set); #endif for (;;) { #ifdef HAVE_SIGFILLSET - (void)sigprocmask(SIG_BLOCK, &set, &oset); + if (LF_ISSET(DB_TEMPORARY)) + (void)sigprocmask(SIG_BLOCK, &set, &oset); #endif -#define DB_TEMPOPEN DB_CREATE | DB_EXCL | DB_TEMPORARY - if ((ret = __db_open(buf, - DB_TEMPOPEN, DB_TEMPOPEN, S_IRUSR | S_IWUSR, fdp)) == 0) { + ret = __db_open(path, flags, flags, mode, fdp); #ifdef HAVE_SIGFILLSET + if (LF_ISSET(DB_TEMPORARY)) (void)sigprocmask(SIG_SETMASK, &oset, NULL); #endif + if (ret == 0) return (0); - } -#ifdef HAVE_SIGFILLSET - (void)sigprocmask(SIG_SETMASK, &oset, NULL); -#endif + /* * XXX: * If we don't get an EEXIST error, then there's something @@ -761,7 +770,7 @@ __db_tmp_open(dbenv, dir, fdp) */ if (ret != EEXIST) { __db_err(dbenv, - "tmp_open: %s: %s", buf, strerror(ret)); + "tmp_open: %s: %s", path, strerror(ret)); return (ret); } diff --git a/db2/common/db_apprec.c b/db2/common/db_apprec.c index 7a42e13317..df707eafef 100644 --- a/db2/common/db_apprec.c +++ b/db2/common/db_apprec.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ @@ -9,18 +9,17 @@ #ifndef lint static const char copyright[] = -"@(#) Copyright (c) 1997\n\ +"@(#) Copyright (c) 1996, 1997, 1998\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_apprec.c 10.23 (Sleepycat) 1/17/98"; +static const char sccsid[] = "@(#)db_apprec.c 10.30 (Sleepycat) 5/3/98"; #endif #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <time.h> #include <string.h> -#include <stdlib.h> +#include <time.h> #endif #include "db_int.h" @@ -36,18 +35,19 @@ static const char sccsid[] = "@(#)db_apprec.c 10.23 (Sleepycat) 1/17/98"; * __db_apprec -- * Perform recovery. * - * PUBLIC: int __db_apprec __P((DB_ENV *, int)); + * PUBLIC: int __db_apprec __P((DB_ENV *, u_int32_t)); */ int __db_apprec(dbenv, flags) DB_ENV *dbenv; - int flags; + u_int32_t flags; { DBT data; DB_LOG *lp; DB_LSN ckp_lsn, first_lsn, lsn; time_t now; - int is_thread, ret; + u_int32_t is_thread; + int ret; void *txninfo; lp = dbenv->lg_info; @@ -91,14 +91,14 @@ __db_apprec(dbenv, flags) if ((ret = log_get(lp, &ckp_lsn, &data, DB_CHECKPOINT)) != 0) { /* * If we don't find a checkpoint, start from the beginning. - * If that fails, we're done. Note, we require that there - * be log records if we're performing recovery, and fail if - * there aren't. + * If that fails, we're done. Note, we do not require that + * there be log records if we're performing recovery. */ if ((ret = log_get(lp, &ckp_lsn, &data, DB_FIRST)) != 0) { - __db_err(dbenv, "First log record not found"); if (ret == DB_NOTFOUND) - ret = EINVAL; + ret = 0; + else + __db_err(dbenv, "First log record not found"); goto out; } } @@ -134,14 +134,17 @@ __db_apprec(dbenv, flags) } else if ((ret = __log_findckp(lp, &first_lsn)) == DB_NOTFOUND) { /* - * If recovery was specified, there must be log files. - * If we don't find one, it's an error. (This should - * have been caught above, when a log_get() of DB_FIRST - * or DB_CHECKPOINT succeeded, but paranoia is good.) + * We don't require that log files exist if recovery + * was specified. */ - ret = EINVAL; + ret = 0; goto out; } + + if (dbenv->db_verbose) + __db_err(lp->dbenv, "Recovery starting from [%lu][%lu]", + (u_long)first_lsn.file, (u_long)first_lsn.offset); + for (ret = log_get(lp, &lsn, &data, DB_LAST); ret == 0 && log_compare(&lsn, &first_lsn) > 0; ret = log_get(lp, &lsn, &data, DB_PREV)) { @@ -175,21 +178,21 @@ __db_apprec(dbenv, flags) __log_close_files(lp); /* - * Now set the maximum transaction id, set the last checkpoint lsn, - * and the current time. Then take a checkpoint. + * Now set the last checkpoint lsn and the current time, + * take a checkpoint, and reset the txnid. */ (void)time(&now); - dbenv->tx_info->region->last_txnid = ((__db_txnhead *)txninfo)->maxid; dbenv->tx_info->region->last_ckp = ckp_lsn; dbenv->tx_info->region->time_ckp = (u_int32_t)now; if ((ret = txn_checkpoint(dbenv->tx_info, 0, 0)) != 0) goto out; + dbenv->tx_info->region->last_txnid = TXN_MINIMUM; if (dbenv->db_verbose) { __db_err(lp->dbenv, "Recovery complete at %.24s", ctime(&now)); - __db_err(lp->dbenv, "%s %lu %s [%lu][%lu]", + __db_err(lp->dbenv, "%s %lx %s [%lu][%lu]", "Maximum transaction id", - (u_long)dbenv->tx_info->region->last_txnid, + ((DB_TXNHEAD *)txninfo)->maxid, "Recovery checkpoint", (u_long)dbenv->tx_info->region->last_ckp.file, (u_long)dbenv->tx_info->region->last_ckp.offset); diff --git a/db2/common/db_byteorder.c b/db2/common/db_byteorder.c index e486132073..cadf742851 100644 --- a/db2/common/db_byteorder.c +++ b/db2/common/db_byteorder.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_byteorder.c 10.4 (Sleepycat) 9/4/97"; +static const char sccsid[] = "@(#)db_byteorder.c 10.5 (Sleepycat) 4/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES diff --git a/db2/common/db_err.c b/db2/common/db_err.c index fc59aadbaf..98a414279e 100644 --- a/db2/common/db_err.c +++ b/db2/common/db_err.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_err.c 10.21 (Sleepycat) 1/13/98"; +static const char sccsid[] = "@(#)db_err.c 10.25 (Sleepycat) 5/2/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -26,6 +26,7 @@ static const char sccsid[] = "@(#)db_err.c 10.21 (Sleepycat) 1/13/98"; #include "db_int.h" #include "common_ext.h" +static int __db_keyempty __P((const DB_ENV *)); static int __db_rdonly __P((const DB_ENV *, const char *)); /* @@ -81,11 +82,11 @@ __db_err(dbenv, fmt, va_alist) * appears before the assignment in the __db__panic() call. */ static int __db_ecursor __P((DB *, DB_TXN *, DBC **)); -static int __db_edel __P((DB *, DB_TXN *, DBT *, int)); +static int __db_edel __P((DB *, DB_TXN *, DBT *, u_int32_t)); static int __db_efd __P((DB *, int *)); -static int __db_egp __P((DB *, DB_TXN *, DBT *, DBT *, int)); -static int __db_estat __P((DB *, void *, void *(*)(size_t), int)); -static int __db_esync __P((DB *, int)); +static int __db_egp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); +static int __db_estat __P((DB *, void *, void *(*)(size_t), u_int32_t)); +static int __db_esync __P((DB *, u_int32_t)); /* * __db_ecursor -- @@ -113,7 +114,7 @@ __db_edel(a, b, c, d) DB *a; DB_TXN *b; DBT *c; - int d; + u_int32_t d; { COMPQUIET(a, NULL); COMPQUIET(b, NULL); @@ -147,7 +148,7 @@ __db_egp(a, b, c, d, e) DB *a; DB_TXN *b; DBT *c, *d; - int e; + u_int32_t e; { COMPQUIET(a, NULL); COMPQUIET(b, NULL); @@ -167,7 +168,7 @@ __db_estat(a, b, c, d) DB *a; void *b; void *(*c) __P((size_t)); - int d; + u_int32_t d; { COMPQUIET(a, NULL); COMPQUIET(b, NULL); @@ -184,7 +185,7 @@ __db_estat(a, b, c, d) static int __db_esync(a, b) DB *a; - int b; + u_int32_t b; { COMPQUIET(a, NULL); COMPQUIET(b, 0); @@ -208,6 +209,10 @@ __db_panic(dbp) * * We should call mpool and have it shut down the file, so we get * other processes sharing this file as well. + * + * Chaos reigns within. + * Reflect, repent, and reboot. + * Order shall return. */ dbp->cursor = __db_ecursor; dbp->del = __db_edel; @@ -235,13 +240,13 @@ __db_panic(dbp) * __db_fchk -- * General flags checking routine. * - * PUBLIC: int __db_fchk __P((DB_ENV *, const char *, int, int)); + * PUBLIC: int __db_fchk __P((DB_ENV *, const char *, u_int32_t, u_int32_t)); */ int __db_fchk(dbenv, name, flags, ok_flags) DB_ENV *dbenv; const char *name; - int flags, ok_flags; + u_int32_t flags, ok_flags; { DB_CHECK_FLAGS(dbenv, name, flags, ok_flags); return (0); @@ -251,13 +256,14 @@ __db_fchk(dbenv, name, flags, ok_flags) * __db_fcchk -- * General combination flags checking routine. * - * PUBLIC: int __db_fcchk __P((DB_ENV *, const char *, int, int, int)); + * PUBLIC: int __db_fcchk + * PUBLIC: __P((DB_ENV *, const char *, u_int32_t, u_int32_t, u_int32_t)); */ int __db_fcchk(dbenv, name, flags, flag1, flag2) DB_ENV *dbenv; const char *name; - int flags, flag1, flag2; + u_int32_t flags, flag1, flag2; { DB_CHECK_FCOMBO(dbenv, name, flags, flag1, flag2); return (0); @@ -267,12 +273,13 @@ __db_fcchk(dbenv, name, flags, flag1, flag2) * __db_cdelchk -- * Common cursor delete argument checking routine. * - * PUBLIC: int __db_cdelchk __P((const DB *, int, int, int)); + * PUBLIC: int __db_cdelchk __P((const DB *, u_int32_t, int, int)); */ int __db_cdelchk(dbp, flags, isrdonly, isvalid) const DB *dbp; - int flags, isrdonly, isvalid; + u_int32_t flags; + int isrdonly, isvalid; { /* Check for changes to a read-only tree. */ if (isrdonly) @@ -292,17 +299,18 @@ __db_cdelchk(dbp, flags, isrdonly, isvalid) * __db_cgetchk -- * Common cursor get argument checking routine. * - * PUBLIC: int __db_cgetchk __P((const DB *, DBT *, DBT *, int, int)); + * PUBLIC: int __db_cgetchk __P((const DB *, DBT *, DBT *, u_int32_t, int)); */ int __db_cgetchk(dbp, key, data, flags, isvalid) const DB *dbp; DBT *key, *data; - int flags, isvalid; + u_int32_t flags; + int isvalid; { - int check_key; + int key_einval, key_flags; - check_key = 0; + key_flags = key_einval = 0; /* Check for invalid dbc->c_get() function flags. */ switch (flags) { @@ -311,10 +319,13 @@ __db_cgetchk(dbp, key, data, flags, isvalid) case DB_LAST: case DB_NEXT: case DB_PREV: + key_flags = 1; + break; case DB_SET_RANGE: - check_key = 1; + key_einval = key_flags = 1; break; case DB_SET: + key_einval = 1; break; case DB_GET_RECNO: if (!F_ISSET(dbp, DB_BT_RECNUM)) @@ -323,14 +334,14 @@ __db_cgetchk(dbp, key, data, flags, isvalid) case DB_SET_RECNO: if (!F_ISSET(dbp, DB_BT_RECNUM)) goto err; - check_key = 1; + key_einval = key_flags = 1; break; default: err: return (__db_ferr(dbp->dbenv, "c_get", 0)); } /* Check for invalid key/data flags. */ - if (check_key) + if (key_flags) DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags, DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL); DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags, @@ -340,11 +351,15 @@ err: return (__db_ferr(dbp->dbenv, "c_get", 0)); if (F_ISSET(dbp, DB_AM_THREAD)) { if (!F_ISSET(data, DB_DBT_USERMEM | DB_DBT_MALLOC)) return (__db_ferr(dbp->dbenv, "threaded data", 1)); - if (check_key && + if (key_flags && !F_ISSET(key, DB_DBT_USERMEM | DB_DBT_MALLOC)) return (__db_ferr(dbp->dbenv, "threaded key", 1)); } + /* Check for missing keys. */ + if (key_einval && (key->data == NULL || key->size == 0)) + return (__db_keyempty(dbp->dbenv)); + /* * The cursor must be initialized for DB_CURRENT, return -1 for an * invalid cursor, otherwise 0. @@ -357,23 +372,24 @@ err: return (__db_ferr(dbp->dbenv, "c_get", 0)); * Common cursor put argument checking routine. * * PUBLIC: int __db_cputchk __P((const DB *, - * PUBLIC: const DBT *, DBT *, int, int, int)); + * PUBLIC: const DBT *, DBT *, u_int32_t, int, int)); */ int __db_cputchk(dbp, key, data, flags, isrdonly, isvalid) const DB *dbp; const DBT *key; DBT *data; - int flags, isrdonly, isvalid; + u_int32_t flags; + int isrdonly, isvalid; { - int check_key; + int key_einval, key_flags; /* Check for changes to a read-only tree. */ if (isrdonly) return (__db_rdonly(dbp->dbenv, "c_put")); /* Check for invalid dbc->c_put() function flags. */ - check_key = 0; + key_einval = key_flags = 0; switch (flags) { case DB_AFTER: case DB_BEFORE: @@ -388,19 +404,23 @@ __db_cputchk(dbp, key, data, flags, isrdonly, isvalid) case DB_KEYLAST: if (dbp->type == DB_RECNO) goto err; - check_key = 1; + key_einval = key_flags = 1; break; default: err: return (__db_ferr(dbp->dbenv, "c_put", 0)); } /* Check for invalid key/data flags. */ - if (check_key) + if (key_flags) DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags, DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL); DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags, DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL); + /* Check for missing keys. */ + if (key_einval && (key->data == NULL || key->size == 0)) + return (__db_keyempty(dbp->dbenv)); + /* * The cursor must be initialized for anything other than DB_KEYFIRST * and DB_KEYLAST, return -1 for an invalid cursor, otherwise 0. @@ -413,12 +433,14 @@ err: return (__db_ferr(dbp->dbenv, "c_put", 0)); * __db_delchk -- * Common delete argument checking routine. * - * PUBLIC: int __db_delchk __P((const DB *, int, int)); + * PUBLIC: int __db_delchk __P((const DB *, DBT *, u_int32_t, int)); */ int -__db_delchk(dbp, flags, isrdonly) +__db_delchk(dbp, key, flags, isrdonly) const DB *dbp; - int flags, isrdonly; + DBT *key; + u_int32_t flags; + int isrdonly; { /* Check for changes to a read-only tree. */ if (isrdonly) @@ -427,6 +449,10 @@ __db_delchk(dbp, flags, isrdonly) /* Check for invalid db->del() function flags. */ DB_CHECK_FLAGS(dbp->dbenv, "delete", flags, 0); + /* Check for missing keys. */ + if (key->data == NULL || key->size == 0) + return (__db_keyempty(dbp->dbenv)); + return (0); } @@ -434,14 +460,14 @@ __db_delchk(dbp, flags, isrdonly) * __db_getchk -- * Common get argument checking routine. * - * PUBLIC: int __db_getchk __P((const DB *, const DBT *, DBT *, int)); + * PUBLIC: int __db_getchk __P((const DB *, const DBT *, DBT *, u_int32_t)); */ int __db_getchk(dbp, key, data, flags) const DB *dbp; const DBT *key; DBT *data; - int flags; + u_int32_t flags; { /* Check for invalid db->get() function flags. */ DB_CHECK_FLAGS(dbp->dbenv, @@ -457,6 +483,10 @@ __db_getchk(dbp, key, data, flags) !F_ISSET(data, DB_DBT_MALLOC | DB_DBT_USERMEM)) return (__db_ferr(dbp->dbenv, "threaded data", 1)); + /* Check for missing keys. */ + if (key->data == NULL || key->size == 0) + return (__db_keyempty(dbp->dbenv)); + return (0); } @@ -464,14 +494,16 @@ __db_getchk(dbp, key, data, flags) * __db_putchk -- * Common put argument checking routine. * - * PUBLIC: int __db_putchk __P((const DB *, DBT *, const DBT *, int, int, int)); + * PUBLIC: int __db_putchk + * PUBLIC: __P((const DB *, DBT *, const DBT *, u_int32_t, int, int)); */ int __db_putchk(dbp, key, data, flags, isrdonly, isdup) const DB *dbp; DBT *key; const DBT *data; - int flags, isrdonly, isdup; + u_int32_t flags; + int isrdonly, isdup; { /* Check for changes to a read-only tree. */ if (isrdonly) @@ -488,12 +520,17 @@ __db_putchk(dbp, key, data, flags, isrdonly, isdup) DB_CHECK_FCOMBO(dbp->dbenv, "data", data->flags, DB_DBT_MALLOC, DB_DBT_USERMEM); + /* Check for missing keys. */ + if (key->data == NULL || key->size == 0) + return (__db_keyempty(dbp->dbenv)); + /* Check for partial puts in the presence of duplicates. */ if (isdup && F_ISSET(data, DB_DBT_PARTIAL)) { __db_err(dbp->dbenv, "a partial put in the presence of duplicates requires a cursor operation"); return (EINVAL); } + return (0); } @@ -501,12 +538,12 @@ __db_putchk(dbp, key, data, flags, isrdonly, isdup) * __db_statchk -- * Common stat argument checking routine. * - * PUBLIC: int __db_statchk __P((const DB *, int)); + * PUBLIC: int __db_statchk __P((const DB *, u_int32_t)); */ int __db_statchk(dbp, flags) const DB *dbp; - int flags; + u_int32_t flags; { /* Check for invalid db->stat() function flags. */ DB_CHECK_FLAGS(dbp->dbenv, "stat", flags, DB_RECORDCOUNT); @@ -522,12 +559,12 @@ __db_statchk(dbp, flags) * __db_syncchk -- * Common sync argument checking routine. * - * PUBLIC: int __db_syncchk __P((const DB *, int)); + * PUBLIC: int __db_syncchk __P((const DB *, u_int32_t)); */ int __db_syncchk(dbp, flags) const DB *dbp; - int flags; + u_int32_t flags; { /* Check for invalid db->sync() function flags. */ DB_CHECK_FLAGS(dbp->dbenv, "sync", flags, 0); @@ -542,13 +579,13 @@ __db_syncchk(dbp, flags) * PUBLIC: int __db_ferr __P((const DB_ENV *, const char *, int)); */ int -__db_ferr(dbenv, name, combo) +__db_ferr(dbenv, name, iscombo) const DB_ENV *dbenv; const char *name; - int combo; + int iscombo; { __db_err(dbenv, "illegal flag %sspecified to %s", - combo ? "combination " : "", name); + iscombo ? "combination " : "", name); return (EINVAL); } @@ -564,3 +601,15 @@ __db_rdonly(dbenv, name) __db_err(dbenv, "%s: attempt to modify a read-only tree", name); return (EACCES); } + +/* + * __db_keyempty -- + * Common missing or empty key value message. + */ +static int +__db_keyempty(dbenv) + const DB_ENV *dbenv; +{ + __db_err(dbenv, "missing or empty key value specified"); + return (EINVAL); +} diff --git a/db2/common/db_log2.c b/db2/common/db_log2.c index 9af01116f6..d6b14f540b 100644 --- a/db2/common/db_log2.c +++ b/db2/common/db_log2.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -43,7 +43,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_log2.c 10.3 (Sleepycat) 6/21/97"; +static const char sccsid[] = "@(#)db_log2.c 10.5 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -63,6 +63,7 @@ __db_log2(num) u_int32_t i, limit; limit = 1; - for (i = 0; limit < num; limit = limit << 1, i++); + for (i = 0; limit < num; limit = limit << 1, i++) + ; return (i); } diff --git a/db2/common/db_region.c b/db2/common/db_region.c index 02d939e3e6..6d15f7f092 100644 --- a/db2/common/db_region.c +++ b/db2/common/db_region.c @@ -1,59 +1,20 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ -/* - * Copyright (c) 1995, 1996 - * The President and Fellows of Harvard University. All rights reserved. - * - * This code is derived from software contributed to Harvard by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_region.c 10.21 (Sleepycat) 1/16/98"; +static const char sccsid[] = "@(#)db_region.c 10.46 (Sleepycat) 5/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <sys/stat.h> #include <errno.h> -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> #include <unistd.h> #endif @@ -61,548 +22,840 @@ static const char sccsid[] = "@(#)db_region.c 10.21 (Sleepycat) 1/16/98"; #include "db_int.h" #include "common_ext.h" -static int __db_rmap __P((DB_ENV *, int, size_t, void *)); +static int __db_growregion __P((REGINFO *, size_t)); /* - * __db_rcreate -- - * - * Common interface for creating a shared region. Handles synchronization - * across multiple processes. - * - * The dbenv contains the environment for this process, including naming - * information. The path argument represents the parameters passed to - * the open routines and may be either a file or a directory. If it is - * a directory, it must exist. If it is a file, then the file parameter - * must be NULL, otherwise, file is the name to be created inside the - * directory path. - * - * The function returns a pointer to the shared region that has been mapped - * into memory, NULL on error. + * __db_rattach -- + * Optionally create and attach to a shared memory region. * - * PUBLIC: int __db_rcreate __P((DB_ENV *, APPNAME, - * PUBLIC: const char *, const char *, int, size_t, int, int *, void *)); + * PUBLIC: int __db_rattach __P((REGINFO *)); */ int -__db_rcreate(dbenv, appname, path, file, mode, size, oflags, fdp, retp) - DB_ENV *dbenv; - APPNAME appname; - const char *path, *file; - int mode, oflags, *fdp; - size_t size; - void *retp; +__db_rattach(infop) + REGINFO *infop; { - RLAYOUT *rp; - int fd, ret; - char *name; + RLAYOUT *rlp, rl; + size_t grow_region, size; + ssize_t nr, nw; + u_int32_t flags, mbytes, bytes; + u_int8_t *p; + int malloc_possible, ret, retry_cnt; + + grow_region = 0; + malloc_possible = 1; + ret = retry_cnt = 0; + + /* Round off the requested size to the next page boundary. */ + DB_ROUNDOFF(infop->size); + + /* Some architectures have hard limits on the maximum region size. */ +#ifdef DB_REGIONSIZE_MAX + if (infop->size > DB_REGIONSIZE_MAX) { + __db_err(infop->dbenv, "__db_rattach: cache size too large"); + return (EINVAL); + } +#endif - fd = -1; - rp = NULL; + /* Intialize the return information in the REGINFO structure. */ +loop: infop->addr = NULL; + infop->fd = -1; + infop->segid = INVALID_SEGID; + if (infop->name != NULL) { + FREES(infop->name); + infop->name = NULL; + } + F_CLR(infop, REGION_CANGROW | REGION_CREATED); +#ifndef HAVE_SPINLOCKS /* - * Get the filename -- note, if it's a temporary file, it will - * be created by the underlying temporary file creation code, - * so we have to check the file descriptor to be sure it's an - * error. + * XXX + * Lacking spinlocks, we must have a file descriptor for fcntl(2) + * locking, which implies using mmap(2) to map in a regular file. + * (Theoretically, we could probably get a file descriptor to lock + * other types of shared regions, but I don't see any reason to + * bother.) */ - if ((ret = __db_appname(dbenv, appname, path, file, &fd, &name)) != 0) - return (ret); + malloc_possible = 0; +#endif +#ifdef __hppa /* - * Now open the file. We need to make sure that multiple processes - * that attempt to create the region at the same time are properly - * ordered, so we open it DB_EXCL and DB_CREATE so two simultaneous - * attempts to create the region will return failure in one of the - * attempts. + * XXX + * HP-UX won't permit mutexes to live in anything but shared memory. + * Instantiate a shared region file on that architecture, regardless. */ - oflags |= DB_CREATE | DB_EXCL; - if (fd == -1 && - (ret = __db_open(name, oflags, oflags, mode, &fd)) != 0) { - if (ret != EEXIST) - __db_err(dbenv, - "region create: %s: %s", name, strerror(ret)); - goto err; + malloc_possible = 0; +#endif + /* + * If a region is truly private, malloc the memory. That's faster + * than either anonymous memory or a shared file. + */ + if (malloc_possible && F_ISSET(infop, REGION_PRIVATE)) { + if ((infop->addr = __db_malloc(infop->size)) == NULL) + return (ENOMEM); + + /* + * It's sometimes significantly faster to page-fault in all + * of the region's pages before we run the application, as + * we can see fairly nasty side-effects when we page-fault + * while holding various locks, i.e., the lock takes a long + * time, and other threads convoy behind the lock holder. + */ + if (DB_GLOBAL(db_region_init)) + for (p = infop->addr; + p < (u_int8_t *)infop->addr + infop->size; + p += DB_VMPAGESIZE) + p[0] = '\0'; + + F_SET(infop, REGION_CREATED | REGION_MALLOC); + goto region_init; } - *fdp = fd; - /* Grow the region to the correct size. */ - if ((ret = __db_rgrow(dbenv, fd, size)) != 0) - goto err; + /* + * Get the name of the region (creating the file if a temporary file + * is being used). The dbenv contains the current DB environment, + * including naming information. The path argument may be a file or + * a directory. If path is a directory, it must exist and file is the + * file name to be created inside the directory. If path is a file, + * then file must be NULL. + */ + if ((ret = __db_appname(infop->dbenv, infop->appname, infop->path, + infop->file, infop->dbflags, &infop->fd, &infop->name)) != 0) + return (ret); + if (infop->fd != -1) + F_SET(infop, REGION_CREATED); - /* Map the region in. */ - if ((ret = __db_rmap(dbenv, fd, size, &rp)) != 0) - goto err; + /* + * Try to create the file, if we have authority. We have to make sure + * that multiple threads/processes attempting to simultaneously create + * the region are properly ordered, so we open it using DB_CREATE and + * DB_EXCL, so two attempts to create the region will return failure in + * one. + */ + if (infop->fd == -1 && infop->dbflags & DB_CREATE) { + flags = infop->dbflags; + LF_SET(DB_EXCL); + if ((ret = __db_open(infop->name, + flags, flags, infop->mode, &infop->fd)) == 0) + F_SET(infop, REGION_CREATED); + else + if (ret != EEXIST) + goto errmsg; + } - /* Initialize the region. */ - if ((ret = __db_rinit(dbenv, rp, fd, size, 1)) != 0) - goto err; + /* If we couldn't create the file, try and open it. */ + if (infop->fd == -1) { + flags = infop->dbflags; + LF_CLR(DB_CREATE | DB_EXCL); + if ((ret = __db_open(infop->name, + flags, flags, infop->mode, &infop->fd)) != 0) + goto errmsg; + } - if (name != NULL) - FREES(name); + /* + * There are three cases we support: + * 1. Named anonymous memory (shmget(2)). + * 2. Unnamed anonymous memory (mmap(2): MAP_ANON/MAP_ANONYMOUS). + * 3. Memory backed by a regular file (mmap(2)). + * + * We instantiate a backing file in all cases, which contains at least + * the RLAYOUT structure, and in case #4, contains the actual region. + * This is necessary for a couple of reasons: + * + * First, the mpool region uses temporary files to name regions, and + * since you may have multiple regions in the same directory, we need + * a filesystem name to ensure that they don't collide. + * + * Second, applications are allowed to forcibly remove regions, even + * if they don't know anything about them other than the name. If a + * region is backed by anonymous memory, there has to be some way for + * the application to find out that information, and, in some cases, + * determine ID information for the anonymous memory. + */ + if (F_ISSET(infop, REGION_CREATED)) { + /* + * If we're using anonymous memory to back this region, set + * the flag. + */ + if (DB_GLOBAL(db_region_anon)) + F_SET(infop, REGION_ANONYMOUS); - *(void **)retp = rp; - return (0); + /* + * If we're using a regular file to back a region we created, + * grow it to the specified size. + */ + if (!DB_GLOBAL(db_region_anon) && + (ret = __db_growregion(infop, infop->size)) != 0) + goto err; + } else { + /* + * If we're joining a region, figure out what it looks like. + * + * XXX + * We have to figure out if the file is a regular file backing + * a region that we want to map into our address space, or a + * file with the information we need to find a shared anonymous + * region that we want to map into our address space. + * + * All this noise is because some systems don't have a coherent + * VM and buffer cache, and worse, if you mix operations on the + * VM and buffer cache, half the time you hang the system. + * + * There are two possibilities. If the file is the size of an + * RLAYOUT structure, then we know that the real region is in + * shared memory, because otherwise it would be bigger. (As + * the RLAYOUT structure size is smaller than a disk sector, + * the only way it can be this size is if deliberately written + * that way.) In which case, retrieve the information we need + * from the RLAYOUT structure and use it to acquire the shared + * memory. + * + * If the structure is larger than an RLAYOUT structure, then + * the file is backing the shared memory region, and we use + * the current size of the file without reading any information + * from the file itself so that we don't confuse the VM. + * + * And yes, this makes me want to take somebody and kill them, + * but I can't think of any other solution. + */ + if ((ret = __db_ioinfo(infop->name, + infop->fd, &mbytes, &bytes, NULL)) != 0) + goto errmsg; + size = mbytes * MEGABYTE + bytes; + + if (size <= sizeof(RLAYOUT)) { + /* + * If the size is too small, the read fails or the + * valid flag is incorrect, assume it's because the + * RLAYOUT information hasn't been written out yet, + * and retry. + */ + if (size < sizeof(RLAYOUT)) + goto retry; + if ((ret = + __db_read(infop->fd, &rl, sizeof(rl), &nr)) != 0) + goto retry; + if (rl.valid != DB_REGIONMAGIC) + goto retry; + + /* Copy the size, memory id and characteristics. */ + size = rl.size; + infop->segid = rl.segid; + if (F_ISSET(&rl, REGION_ANONYMOUS)) + F_SET(infop, REGION_ANONYMOUS); + } -err: if (fd != -1) { - if (rp != NULL) - (void)__db_unmap(rp, rp->size); - (void)__db_unlink(name); - (void)__db_close(fd); + /* + * If the region is larger than we think, that's okay, use the + * current size. If it's smaller than we think, and we were + * just using the default size, that's okay, use the current + * size. If it's smaller than we think and we really care, + * save the size and we'll catch that further down -- we can't + * correct it here because we have to have a lock to grow the + * region. + */ + if (infop->size > size && !F_ISSET(infop, REGION_SIZEDEF)) + grow_region = infop->size; + infop->size = size; } - if (name != NULL) - FREES(name); - return (ret); -} - -/* - * __db_rinit -- - * Initialize the region. - * - * PUBLIC: int __db_rinit __P((DB_ENV *, RLAYOUT *, int, size_t, int)); - */ -int -__db_rinit(dbenv, rp, fd, size, lock_region) - DB_ENV *dbenv; - RLAYOUT *rp; - size_t size; - int fd, lock_region; -{ - int ret; - COMPQUIET(dbenv, NULL); + /* + * Map the region into our address space. If we're creating it, the + * underlying routines will make it the right size. + * + * There are at least two cases where we can "reasonably" fail when + * we attempt to map in the region. On Windows/95, closing the last + * reference to a region causes it to be zeroed out. On UNIX, when + * using the shmget(2) interfaces, the region will no longer exist + * if the system was rebooted. In these cases, the underlying map call + * returns EAGAIN, and we *remove* our file and try again. There are + * obvious races in doing this, but it should eventually settle down + * to a winner and then things should proceed normally. + */ + if ((ret = __db_mapregion(infop->name, infop)) != 0) + if (ret == EAGAIN) { + /* + * Pretend we created the region even if we didn't so + * that our error processing unlinks it. + */ + F_SET(infop, REGION_CREATED); + ret = 0; + goto retry; + } else + goto err; +region_init: /* - * Initialize the common information. + * Initialize the common region information. * * !!! * We have to order the region creates so that two processes don't try - * to simultaneously create the region and so that processes that are - * joining the region never see inconsistent data. We'd like to play - * file permissions games, but we can't because WNT filesystems won't - * open a file mode 0. - * - * If the lock_region flag is set, the process creating the region - * acquires the lock before the setting the version number. Any - * process joining the region checks the version number before - * attempting to acquire the lock. (The lock_region flag may not be - * set -- the mpool code sometimes malloc's private regions but still - * needs to initialize them, specifically, the mutex for threads.) + * to simultaneously create the region. This is handled by using the + * DB_CREATE and DB_EXCL flags when we create the "backing" region file. * - * We have to check the version number first, because if the version - * number has not been written, it's possible that the mutex has not - * been initialized in which case an attempt to get it could lead to - * random behavior. If the version number isn't there (the file size - * is too small) or it's 0, we know that the region is being created. - * - * We also make sure to check the return of __db_mutex_lock() here, - * even though we don't usually check elsewhere. This is the first - * lock we attempt to acquire, and if it fails we have to know. (It - * can fail -- SunOS, using fcntl(2) for locking, with an in-memory - * filesystem specified as the database home.) + * We also have to order region joins so that processes joining regions + * never see inconsistent data. We'd like to play permissions games + * with the backing file, but we can't because WNT filesystems won't + * open a file mode 0. */ - __db_mutex_init(&rp->lock, MUTEX_LOCK_OFFSET(rp, &rp->lock)); - if (lock_region && (ret = __db_mutex_lock(&rp->lock, fd)) != 0) - return (ret); - - rp->refcnt = 1; - rp->size = size; - rp->flags = 0; - db_version(&rp->majver, &rp->minver, &rp->patch); + rlp = (RLAYOUT *)infop->addr; + if (F_ISSET(infop, REGION_CREATED)) { + /* + * The process creating the region acquires a lock before it + * sets the valid flag. Any processes joining the region will + * check the valid flag before acquiring the lock. + * + * Check the return of __db_mutex_init() and __db_mutex_lock(), + * even though we don't usually check elsewhere. This is the + * first lock we initialize and acquire, and we have to know if + * it fails. (It CAN fail, e.g., SunOS, when using fcntl(2) + * for locking, with an in-memory filesystem specified as the + * database home.) + */ + if ((ret = __db_mutex_init(&rlp->lock, + MUTEX_LOCK_OFFSET(rlp, &rlp->lock))) != 0 || + (ret = __db_mutex_lock(&rlp->lock, infop->fd)) != 0) + goto err; - return (0); -} + /* Initialize the remaining region information. */ + rlp->refcnt = 1; + rlp->size = infop->size; + db_version(&rlp->majver, &rlp->minver, &rlp->patch); + rlp->segid = infop->segid; + rlp->flags = 0; + if (F_ISSET(infop, REGION_ANONYMOUS)) + F_SET(rlp, REGION_ANONYMOUS); -/* - * __db_ropen -- - * Construct the name of a file, open it and map it in. - * - * PUBLIC: int __db_ropen __P((DB_ENV *, - * PUBLIC: APPNAME, const char *, const char *, int, int *, void *)); - */ -int -__db_ropen(dbenv, appname, path, file, flags, fdp, retp) - DB_ENV *dbenv; - APPNAME appname; - const char *path, *file; - int flags, *fdp; - void *retp; -{ - RLAYOUT *rp; - size_t size; - u_int32_t mbytes, bytes; - int fd, ret; - char *name; + /* + * Fill in the valid field last -- use a magic number, memory + * may not be zero-filled, and we want to minimize the chance + * for collision. + */ + rlp->valid = DB_REGIONMAGIC; - fd = -1; - rp = NULL; + /* + * If the region is anonymous, write the RLAYOUT information + * into the backing file so that future region join and unlink + * calls can find it. + * + * XXX + * We MUST do the seek before we do the write. On Win95, while + * closing the last reference to an anonymous shared region + * doesn't discard the region, it does zero it out. So, the + * REGION_CREATED may be set, but the file may have already + * been written and the file descriptor may be at the end of + * the file. + */ + if (F_ISSET(infop, REGION_ANONYMOUS)) { + if ((ret = __db_seek(infop->fd, 0, 0, 0, 0, 0)) != 0) + goto err; + if ((ret = + __db_write(infop->fd, rlp, sizeof(*rlp), &nw)) != 0) + goto err; + } + } else { + /* + * Check the valid flag to ensure the region is initialized. + * If the valid flag has not been set, the mutex may not have + * been initialized, and an attempt to get it could lead to + * random behavior. + */ + if (rlp->valid != DB_REGIONMAGIC) + goto retry; - /* Get the filename. */ - if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0) - return (ret); + /* Get the region lock. */ + (void)__db_mutex_lock(&rlp->lock, infop->fd); - /* Open the file. */ - if ((ret = __db_open(name, flags, DB_MUTEXDEBUG, 0, &fd)) != 0) { - __db_err(dbenv, "region open: %s: %s", name, strerror(ret)); - goto err2; - } + /* + * We now own the region. There are a couple of things that + * may have gone wrong, however. + * + * Problem #1: while we were waiting for the lock, the region + * was deleted. Detected by re-checking the valid flag, since + * it's cleared by the delete region routines. + */ + if (rlp->valid != DB_REGIONMAGIC) { + (void)__db_mutex_unlock(&rlp->lock, infop->fd); + goto retry; + } - *fdp = fd; + /* + * Problem #2: We want a bigger region than has previously been + * created. Detected by checking if the region is smaller than + * our caller requested. If it is, we grow the region, (which + * does the detach and re-attach for us). + */ + if (grow_region != 0 && + (ret = __db_rgrow(infop, grow_region)) != 0) { + (void)__db_mutex_unlock(&rlp->lock, infop->fd); + goto err; + } - /* - * Map the file in. We have to do things in a strange order so that - * we don't get into a situation where the file was just created and - * isn't yet initialized. See the comment in __db_rcreate() above. - * - * XXX - * We'd like to test to see if the file is too big to mmap. Since we - * don't know what size or type off_t's or size_t's are, or the largest - * unsigned integral type is, or what random insanity the local C - * compiler will perpetrate, doing the comparison in a portable way is - * flatly impossible. Hope that mmap fails if the file is too large. - * - */ - if ((ret = __db_ioinfo(name, fd, &mbytes, &bytes, NULL)) != 0) { - __db_err(dbenv, "%s: %s", name, strerror(ret)); - goto err2; - } - size = mbytes * MEGABYTE + bytes; + /* + * Problem #3: when we checked the size of the file, it was + * still growing as part of creation. Detected by the fact + * that infop->size isn't the same size as the region. + */ + if (infop->size != rlp->size) { + (void)__db_mutex_unlock(&rlp->lock, infop->fd); + goto retry; + } - /* Check to make sure the first block has been written. */ - if (size < sizeof(RLAYOUT)) { - ret = EAGAIN; - goto err2; + /* Increment the reference count. */ + ++rlp->refcnt; } - /* Map in whatever is there. */ - if ((ret = __db_rmap(dbenv, fd, size, &rp)) != 0) - goto err2; + /* Return the region in a locked condition. */ - /* - * Check to make sure the region has been initialized. We can't just - * grab the lock because the lock may not have been initialized yet. - */ - if (rp->majver == 0) { - ret = EAGAIN; - goto err2; - } - - /* Get the region lock. */ - if (!LF_ISSET(DB_MUTEXDEBUG)) - (void)__db_mutex_lock(&rp->lock, fd); + if (0) { +errmsg: __db_err(infop->dbenv, "%s: %s", infop->name, strerror(ret)); - /* - * The file may have been half-written if we were descheduled between - * getting the size of the file and checking the major version. Check - * to make sure we got the entire file. - */ - if ((ret = __db_ioinfo(name, fd, &mbytes, &bytes, NULL)) != 0) { - __db_err(dbenv, "%s: %s", name, strerror(ret)); - goto err1; - } - if (size != mbytes * MEGABYTE + bytes) { - ret = EAGAIN; - goto err1; - } +err: +retry: /* Discard the region. */ + if (infop->addr != NULL) { + (void)__db_unmapregion(infop); + infop->addr = NULL; + } - /* The file may have just been deleted. */ - if (F_ISSET(rp, DB_R_DELETED)) { - ret = EAGAIN; - goto err1; - } + /* Discard the backing file. */ + if (infop->fd != -1) { + (void)__db_close(infop->fd); + infop->fd = -1; - /* Increment the reference count. */ - ++rp->refcnt; + if (F_ISSET(infop, REGION_CREATED)) + (void)__db_unlink(infop->name); + } - /* Release the lock. */ - if (!LF_ISSET(DB_MUTEXDEBUG)) - (void)__db_mutex_unlock(&rp->lock, fd); + /* Discard the name. */ + if (infop->name != NULL) { + FREES(infop->name); + infop->name = NULL; + } - FREES(name); + /* + * If we had a temporary error, wait a few seconds and + * try again. + */ + if (ret == 0) { + if (++retry_cnt <= 3) { + __db_sleep(retry_cnt * 2, 0); + goto loop; + } + ret = EAGAIN; + } + } - *(void **)retp = rp; - return (0); + /* + * XXX + * HP-UX won't permit mutexes to live in anything but shared memory. + * Instantiate a shared region file on that architecture, regardless. + * + * XXX + * There's a problem in cleaning this up on application exit, or on + * application failure. If an application opens a database without + * an environment, we create a temporary backing mpool region for it. + * That region is marked REGION_PRIVATE, but as HP-UX won't permit + * mutexes to live in anything but shared memory, we instantiate a + * real file plus a memory region of some form. If the application + * crashes, the necessary information to delete the backing file and + * any system region (e.g., the shmget(2) segment ID) is no longer + * available. We can't completely fix the problem, but we try. + * + * The underlying UNIX __db_mapregion() code preferentially uses the + * mmap(2) interface with the MAP_ANON/MAP_ANONYMOUS flags for regions + * that are marked REGION_PRIVATE. This means that we normally aren't + * holding any system resources when we get here, in which case we can + * delete the backing file. This results in a short race, from the + * __db_open() call above to here. + * + * If, for some reason, we are holding system resources when we get + * here, we don't have any choice -- we can't delete the backing file + * because we may need it to detach from the resources. Set the + * REGION_LASTDETACH flag, so that we do all necessary cleanup when + * the application closes the region. + */ + if (F_ISSET(infop, REGION_PRIVATE) && !F_ISSET(infop, REGION_MALLOC)) + if (F_ISSET(infop, REGION_HOLDINGSYS)) + F_SET(infop, REGION_LASTDETACH); + else { + F_SET(infop, REGION_REMOVED); + F_CLR(infop, REGION_CANGROW); + + (void)__db_close(infop->fd); + (void)__db_unlink(infop->name); + } -err1: if (!LF_ISSET(DB_MUTEXDEBUG)) - (void)__db_mutex_unlock(&rp->lock, fd); -err2: if (rp != NULL) - (void)__db_unmap(rp, rp->size); - if (fd != -1) - (void)__db_close(fd); - FREES(name); return (ret); } /* - * __db_rclose -- - * Close a shared memory region. + * __db_rdetach -- + * De-attach from a shared memory region. * - * PUBLIC: int __db_rclose __P((DB_ENV *, int, void *)); + * PUBLIC: int __db_rdetach __P((REGINFO *)); */ int -__db_rclose(dbenv, fd, ptr) - DB_ENV *dbenv; - int fd; - void *ptr; +__db_rdetach(infop) + REGINFO *infop; { - RLAYOUT *rp; - int ret, t_ret; - const char *fail; + RLAYOUT *rlp; + int detach, ret, t_ret; - rp = ptr; - fail = NULL; + ret = 0; - /* Get the lock. */ - if ((ret = __db_mutex_lock(&rp->lock, fd)) != 0) { - fail = "lock get"; - goto err; + /* + * If the region was removed when it was created, no further action + * is required. + */ + if (F_ISSET(infop, REGION_REMOVED)) + goto done; + /* + * If the region was created in memory returned by malloc, the only + * action required is freeing the memory. + */ + if (F_ISSET(infop, REGION_MALLOC)) { + __db_free(infop->addr); + goto done; } + /* Otherwise, attach to the region and optionally delete it. */ + rlp = infop->addr; + + /* Get the lock. */ + (void)__db_mutex_lock(&rlp->lock, infop->fd); + /* Decrement the reference count. */ - --rp->refcnt; + if (rlp->refcnt == 0) + __db_err(infop->dbenv, + "region rdetach: reference count went to zero!"); + else + --rlp->refcnt; + + /* + * If we're going to remove the region, clear the valid flag so + * that any region join that's blocked waiting for us will know + * what happened. + */ + detach = 0; + if (F_ISSET(infop, REGION_LASTDETACH)) + if (rlp->refcnt == 0) { + detach = 1; + rlp->valid = 0; + } else + ret = EBUSY; /* Release the lock. */ - if ((t_ret = __db_mutex_unlock(&rp->lock, fd)) != 0 && fail == NULL) { - ret = t_ret; - fail = "lock release"; - } + (void)__db_mutex_unlock(&rlp->lock, infop->fd); - /* Discard the region. */ - if ((t_ret = __db_unmap(ptr, rp->size)) != 0 && fail == NULL) { - ret = t_ret; - fail = "munmap"; - } + /* Close the backing file descriptor. */ + (void)__db_close(infop->fd); + infop->fd = -1; - if ((t_ret = __db_close(fd)) != 0 && fail == NULL) { + /* Discard our mapping of the region. */ + if ((t_ret = __db_unmapregion(infop)) != 0 && ret == 0) ret = t_ret; - fail = "close"; + + /* Discard the region itself. */ + if (detach) { + if ((t_ret = + __db_unlinkregion(infop->name, infop) != 0) && ret == 0) + ret = t_ret; + if ((t_ret = __db_unlink(infop->name) != 0) && ret == 0) + ret = t_ret; } - if (fail == NULL) - return (0); +done: /* Discard the name. */ + if (infop->name != NULL) { + FREES(infop->name); + infop->name = NULL; + } -err: __db_err(dbenv, "region detach: %s: %s", fail, strerror(ret)); return (ret); } /* * __db_runlink -- - * Remove a shared memory region. + * Remove a region. * - * PUBLIC: int __db_runlink __P((DB_ENV *, - * PUBLIC: APPNAME, const char *, const char *, int)); + * PUBLIC: int __db_runlink __P((REGINFO *, int)); */ int -__db_runlink(dbenv, appname, path, file, force) - DB_ENV *dbenv; - APPNAME appname; - const char *path, *file; +__db_runlink(infop, force) + REGINFO *infop; int force; { - RLAYOUT *rp; - int cnt, fd, ret, t_ret; + RLAYOUT rl, *rlp; + size_t size; + ssize_t nr; + u_int32_t mbytes, bytes; + int fd, ret, t_ret; char *name; - rp = NULL; + /* + * XXX + * We assume that we've created a new REGINFO structure for this + * call, not used one that was already initialized. Regardless, + * if anyone is planning to use it after we're done, they're going + * to be sorely disappointed. + * + * If force isn't set, we attach to the region, set a flag to delete + * the region on last close, and let the region delete code do the + * work. + */ + if (!force) { + if ((ret = __db_rattach(infop)) != 0) + return (ret); - /* Get the filename. */ - if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0) - return (ret); + rlp = (RLAYOUT *)infop->addr; + (void)__db_mutex_unlock(&rlp->lock, infop->fd); - /* If the file doesn't exist, we're done. */ - if (__db_exists(name, NULL)) - goto done; + F_SET(infop, REGION_LASTDETACH); + + return (__db_rdetach(infop)); + } /* - * If we're called with a force flag, try and unlink the file. This - * may not succeed if the file is currently open, but there's nothing - * we can do about that. There is a race condition between the check - * for existence above and the actual unlink. If someone else snuck - * in and removed it before we do the remove, then we might get an - * ENOENT error. If we get the ENOENT, we treat it as success, just - * as we do above. + * Otherwise, we don't want to attach to the region. We may have been + * called to clean up if a process died leaving a region locked and/or + * corrupted, which could cause the attach to hang. */ - if (force) { - if ((ret = __db_unlink(name)) != 0 && ret != ENOENT) - goto err1; - goto done; + if ((ret = __db_appname(infop->dbenv, infop->appname, + infop->path, infop->file, infop->dbflags, NULL, &name)) != 0) + return (ret); + + /* + * An underlying file is created for all regions other than private + * (REGION_PRIVATE) ones, regardless of whether or not it's used to + * back the region. If that file doesn't exist, we're done. + */ + if (__db_exists(name, NULL) != 0) { + FREES(name); + return (0); } - /* Open and lock the region. */ - if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0) - goto err1; - (void)__db_mutex_lock(&rp->lock, fd); + /* + * See the comments in __db_rattach -- figure out if this is a regular + * file backing a region or if it's a regular file with information + * about a region. + */ + if ((ret = __db_open(name, DB_RDONLY, DB_RDONLY, 0, &fd)) != 0) + goto errmsg; + if ((ret = __db_ioinfo(name, fd, &mbytes, &bytes, NULL)) != 0) + goto errmsg; + size = mbytes * MEGABYTE + bytes; - /* If the region is currently being deleted, fail. */ - if (F_ISSET(rp, DB_R_DELETED)) { - ret = ENOENT; /* XXX: ENOENT? */ - goto err2; - } + if (size <= sizeof(RLAYOUT)) { + if ((ret = __db_read(fd, &rl, sizeof(rl), &nr)) != 0) + goto errmsg; + if (rl.valid != DB_REGIONMAGIC) { + __db_err(infop->dbenv, + "%s: illegal region magic number", name); + ret = EINVAL; + goto err; + } - /* If the region is currently in use by someone else, fail. */ - if (rp->refcnt > 1) { - ret = EBUSY; - goto err2; + /* Set the size, memory id and characteristics. */ + infop->size = rl.size; + infop->segid = rl.segid; + if (F_ISSET(&rl, REGION_ANONYMOUS)) + F_SET(infop, REGION_ANONYMOUS); + } else { + infop->size = size; + infop->segid = INVALID_SEGID; } - /* Set the delete flag. */ - F_SET(rp, DB_R_DELETED); - - /* Release the lock and close the region. */ - (void)__db_mutex_unlock(&rp->lock, fd); - if ((t_ret = __db_rclose(dbenv, fd, rp)) != 0 && ret == 0) - goto err1; + /* Remove the underlying region. */ + ret = __db_unlinkregion(name, infop); /* - * Unlink the region. There's a race here -- other threads or - * processes might be opening the region while we're trying to - * remove it. They'll fail, because we've set the DELETED flag, - * but they could still stop us from succeeding in the unlink. + * Unlink the backing file. Close the open file descriptor first, + * because some architectures (e.g., Win32) won't unlink a file if + * open file descriptors remain. */ - for (cnt = 5; cnt > 0; --cnt) { - if ((ret = __db_unlink(name)) == 0) - break; - (void)__db_sleep(0, 250000); - } - if (ret == 0) { -done: FREES(name); - return (0); - } - - /* Not a clue. Try to clear the DB_R_DELETED flag. */ - if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0) - goto err1; - (void)__db_mutex_lock(&rp->lock, fd); - F_CLR(rp, DB_R_DELETED); - /* FALLTHROUGH */ + (void)__db_close(fd); + if ((t_ret = __db_unlink(name)) != 0 && ret == 0) + ret = t_ret; -err2: (void)__db_mutex_unlock(&rp->lock, fd); - (void)__db_rclose(dbenv, fd, rp); -err1: __db_err(dbenv, "region unlink: %s: %s", name, strerror(ret)); + if (0) { +errmsg: __db_err(infop->dbenv, "%s: %s", name, strerror(ret)); +err: (void)__db_close(fd); + } FREES(name); return (ret); } /* - * DB creates all regions on 4K boundaries so that we don't make the - * underlying VM unhappy. - */ -#define __DB_VMPAGESIZE (4 * 1024) - -/* * __db_rgrow -- - * Extend a region by a specified amount. + * Extend a region. * - * PUBLIC: int __db_rgrow __P((DB_ENV *, int, size_t)); + * PUBLIC: int __db_rgrow __P((REGINFO *, size_t)); */ int -__db_rgrow(dbenv, fd, incr) - DB_ENV *dbenv; - int fd; - size_t incr; +__db_rgrow(infop, new_size) + REGINFO *infop; + size_t new_size; +{ + RLAYOUT *rlp; + size_t increment; + int ret; + + /* + * !!! + * This routine MUST be called with the region already locked. + */ + + /* The underlying routines have flagged if this region can grow. */ + if (!F_ISSET(infop, REGION_CANGROW)) + return (EINVAL); + + /* + * Round off the requested size to the next page boundary, and + * determine the additional space required. + */ + rlp = (RLAYOUT *)infop->addr; + DB_ROUNDOFF(new_size); + increment = new_size - rlp->size; + + if ((ret = __db_growregion(infop, increment)) != 0) + return (ret); + + /* Update the on-disk region size. */ + rlp->size = new_size; + + /* Detach from and reattach to the region. */ + return (__db_rreattach(infop, new_size)); +} + +/* + * __db_growregion -- + * Grow a shared memory region. + */ +static int +__db_growregion(infop, increment) + REGINFO *infop; + size_t increment; { + db_pgno_t pages; size_t i; - ssize_t nw; - int mmap_init_needed, ret; - char buf[__DB_VMPAGESIZE]; + ssize_t nr, nw; + u_int32_t relative; + int ret; + char buf[DB_VMPAGESIZE]; /* Seek to the end of the region. */ - if ((ret = __db_seek(fd, 0, 0, 0, SEEK_END)) != 0) + if ((ret = __db_seek(infop->fd, 0, 0, 0, 0, SEEK_END)) != 0) goto err; /* Write nuls to the new bytes. */ memset(buf, 0, sizeof(buf)); /* - * Historically, some systems required that all of the bytes of the - * region be written before it could be mmapped and accessed randomly. - * - * Windows/95 doesn't have that problem, but it leaves file contents - * uninitialized. Win/NT apparently initializes them. + * Some systems require that all of the bytes of the region be + * written before it can be mapped and accessed randomly, and + * other systems don't zero out the pages. */ -#ifdef MMAP_INIT_NEEDED - mmap_init_needed = 1; -#else - mmap_init_needed = __os_oldwin(); -#endif - if (mmap_init_needed) + if (__db_mapinit()) /* Extend the region by writing each new page. */ - for (i = 0; i < incr; i += __DB_VMPAGESIZE) { - if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0) + for (i = 0; i < increment; i += DB_VMPAGESIZE) { + if ((ret = + __db_write(infop->fd, buf, sizeof(buf), &nw)) != 0) goto err; if (nw != sizeof(buf)) goto eio; } else { /* - * Extend the region by writing the last page. - * - * Round off the increment to the next page boundary. + * Extend the region by writing the last page. If the region + * is >4Gb, increment may be larger than the maximum possible + * seek "relative" argument, as it's an unsigned 32-bit value. + * Break the offset into pages of 1MB each so that we don't + * overflow (2^20 + 2^32 is bigger than any memory I expect + * to see for awhile). */ - incr += __DB_VMPAGESIZE - 1; - incr -= incr % __DB_VMPAGESIZE; - - /* Write the last page, not the page after the last. */ - if ((ret = - __db_seek(fd, 0, 0, incr - __DB_VMPAGESIZE, SEEK_CUR)) != 0) + pages = (increment - DB_VMPAGESIZE) / MEGABYTE; + relative = (increment - DB_VMPAGESIZE) % MEGABYTE; + if ((ret = __db_seek(infop->fd, + MEGABYTE, pages, relative, 0, SEEK_CUR)) != 0) goto err; - if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0) + if ((ret = __db_write(infop->fd, buf, sizeof(buf), &nw)) != 0) goto err; if (nw != sizeof(buf)) goto eio; + + /* + * It's sometimes significantly faster to page-fault in all + * of the region's pages before we run the application, as + * we can see fairly nasty side-effects when we page-fault + * while holding various locks, i.e., the lock takes a long + * time, and other threads convoy behind the lock holder. + */ + if (DB_GLOBAL(db_region_init)) { + pages = increment / MEGABYTE; + relative = increment % MEGABYTE; + if ((ret = __db_seek(infop->fd, + MEGABYTE, pages, relative, 1, SEEK_END)) != 0) + goto err; + + /* Read a byte from each page. */ + for (i = 0; i < increment; i += DB_VMPAGESIZE) { + if ((ret = + __db_read(infop->fd, buf, 1, &nr)) != 0) + goto err; + if (nr != 1) + goto eio; + if ((ret = __db_seek(infop->fd, + 0, 0, DB_VMPAGESIZE - 1, 0, SEEK_CUR)) != 0) + goto err; + } + } } return (0); eio: ret = EIO; -err: __db_err(dbenv, "region grow: %s", strerror(ret)); +err: __db_err(infop->dbenv, "region grow: %s", strerror(ret)); return (ret); } /* - * __db_rremap -- - * Unmap the old region and map in a new region of a new size. If - * either call fails, returns NULL, else returns the address of the - * new region. + * __db_rreattach -- + * Detach from and reattach to a region. * - * PUBLIC: int __db_rremap __P((DB_ENV *, void *, size_t, size_t, int, void *)); + * PUBLIC: int __db_rreattach __P((REGINFO *, size_t)); */ int -__db_rremap(dbenv, ptr, oldsize, newsize, fd, retp) - DB_ENV *dbenv; - void *ptr, *retp; - size_t oldsize, newsize; - int fd; +__db_rreattach(infop, new_size) + REGINFO *infop; + size_t new_size; { int ret; - if ((ret = __db_unmap(ptr, oldsize)) != 0) { - __db_err(dbenv, "region remap: munmap: %s", strerror(ret)); - return (ret); +#ifdef DIAGNOSTIC + if (infop->name == NULL) { + __db_err(infop->dbenv, "__db_rreattach: name was NULL"); + return (EINVAL); } +#endif + /* + * If we're growing an already mapped region, we have to unmap it + * and get it back. We have it locked, so nobody else can get in, + * which makes it fairly straight-forward to do, as everybody else + * is going to block while we do the unmap/remap. NB: if we fail + * to get it back, the pooch is genuinely screwed, because we can + * never release the lock we're holding. + * + * Detach from the region. We have to do this first so architectures + * that don't permit a file to be mapped into different places in the + * address space simultaneously, e.g., HP's PaRisc, will work. + */ + if ((ret = __db_unmapregion(infop)) != 0) + return (ret); - return (__db_rmap(dbenv, fd, newsize, retp)); -} - -/* - * __db_rmap -- - * Attach to a shared memory region. - */ -static int -__db_rmap(dbenv, fd, size, retp) - DB_ENV *dbenv; - int fd; - size_t size; - void *retp; -{ - RLAYOUT *rp; - int ret; + /* Update the caller's REGINFO size to the new map size. */ + infop->size = new_size; - if ((ret = __db_map(fd, size, 0, 0, (void **)&rp)) != 0) { - __db_err(dbenv, "region map: mmap %s", strerror(ret)); - return (ret); - } - if (rp->size < size) - rp->size = size; + /* Attach to the region. */ + ret = __db_mapregion(infop->name, infop); - *(void **)retp = rp; - return (0); + return (ret); } diff --git a/db2/common/db_salloc.c b/db2/common/db_salloc.c index f0202ddb90..0fa696bf7e 100644 --- a/db2/common/db_salloc.c +++ b/db2/common/db_salloc.c @@ -1,21 +1,21 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_salloc.c 10.6 (Sleepycat) 7/5/97"; +static const char sccsid[] = "@(#)db_salloc.c 10.13 (Sleepycat) 5/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <stdio.h> +#include <string.h> #endif #include "db_int.h" @@ -109,11 +109,13 @@ __db_shalloc(p, len, align, retp) *(void **)retp = rp; +#define SHALLOC_FRAGMENT 32 /* - * If there are at least 32 bytes of additional memory, divide - * the chunk into two chunks. + * If there are at least SHALLOC_FRAGMENT additional bytes of + * memory, divide the chunk into two chunks. */ - if ((u_int8_t *)rp >= (u_int8_t *)&elp->links + 32) { + if ((u_int8_t *)rp >= + (u_int8_t *)&elp->links + SHALLOC_FRAGMENT) { sp = rp; *--sp = elp->len - ((u_int8_t *)rp - (u_int8_t *)&elp->links); @@ -136,7 +138,7 @@ __db_shalloc(p, len, align, retp) return (0); } - /* Nothing found large enough; need to figure out how to grow region. */ + /* Nothing found large enough; need to grow the region. */ return (ENOMEM); } @@ -159,12 +161,18 @@ __db_shalloc_free(regionp, ptr) * Step back over flagged length fields to find the beginning of * the object and its real size. */ - for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp); + for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp) + ; ptr = sp; newp = (struct __data *)((u_int8_t *)ptr - sizeof(size_t)); free_size = newp->len; + /* Trash the returned memory. */ +#ifdef DIAGNOSTIC + memset(ptr, 0xff, free_size); +#endif + /* * Walk the list, looking for where this entry goes. * @@ -177,7 +185,8 @@ __db_shalloc_free(regionp, ptr) hp = (struct __head *)regionp; for (elp = SH_LIST_FIRST(hp, __data), lastp = NULL; elp != NULL && (void *)elp < (void *)ptr; - lastp = elp, elp = SH_LIST_NEXT(elp, links, __data)); + lastp = elp, elp = SH_LIST_NEXT(elp, links, __data)) + ; /* * Elp is either NULL (we reached the end of the list), or the slot @@ -259,32 +268,34 @@ __db_shsizeof(ptr) * Step back over flagged length fields to find the beginning of * the object and its real size. */ - for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp); + for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp) + ; elp = (struct __data *)((u_int8_t *)sp - sizeof(size_t)); return (elp->len); } -#ifdef DEBUG /* * __db_shalloc_dump -- * - * PUBLIC: void __db_shalloc_dump __P((FILE *, void *)); + * PUBLIC: void __db_shalloc_dump __P((void *, FILE *)); */ void -__db_shalloc_dump(fp, addr) - FILE *fp; +__db_shalloc_dump(addr, fp) void *addr; + FILE *fp; { struct __data *elp; + /* Make it easy to call from the debugger. */ if (fp == NULL) fp = stderr; + fprintf(fp, "%s\nMemory free list\n", DB_LINE); + for (elp = SH_LIST_FIRST((struct __head *)addr, __data); elp != NULL; elp = SH_LIST_NEXT(elp, links, __data)) fprintf(fp, "%#lx: %lu\t", (u_long)elp, (u_long)elp->len); fprintf(fp, "\n"); } -#endif diff --git a/db2/common/db_shash.c b/db2/common/db_shash.c index ab188f564f..3f48a55907 100644 --- a/db2/common/db_shash.c +++ b/db2/common/db_shash.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_shash.c 10.4 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)db_shash.c 10.9 (Sleepycat) 4/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -19,39 +19,75 @@ static const char sccsid[] = "@(#)db_shash.c 10.4 (Sleepycat) 1/8/98"; #include "shqueue.h" #include "common_ext.h" -/* Powers-of-2 and close-by prime number pairs. */ +/* + * Table of good hash values. Up to ~250,000 buckets, we use powers of 2. + * After that, we slow the rate of increase by half. For each choice, we + * then use a nearby prime number as the hash value. + * + * If a terabyte is the maximum cache we'll see, and we assume there are + * 10 1K buckets on each hash chain, then 107374182 is the maximum number + * of buckets we'll ever need. + */ static const struct { - u_int power; - u_int prime; + u_int32_t power; + u_int32_t prime; } list[] = { - { 64, 67}, - { 128, 131}, - { 256, 257}, - { 512, 521}, - {1024, 1031}, - {2048, 2053}, - {4096, 4099}, - {8192, 8191}, - {0, 0} + { 64, 67}, /* 2^6 */ + { 128, 131}, /* 2^7 */ + { 256, 257}, /* 2^8 */ + { 512, 521}, /* 2^9 */ + { 1024, 1031}, /* 2^10 */ + { 2048, 2053}, /* 2^11 */ + { 4096, 4099}, /* 2^12 */ + { 8192, 8191}, /* 2^13 */ + { 16384, 16381}, /* 2^14 */ + { 32768, 32771}, /* 2^15 */ + { 65536, 65537}, /* 2^16 */ + { 131072, 131071}, /* 2^17 */ + { 262144, 262147}, /* 2^18 */ + { 393216, 393209}, /* 2^18 + 2^18/2 */ + { 524288, 524287}, /* 2^19 */ + { 786432, 786431}, /* 2^19 + 2^19/2 */ + { 1048576, 1048573}, /* 2^20 */ + { 1572864, 1572869}, /* 2^20 + 2^20/2 */ + { 2097152, 2097169}, /* 2^21 */ + { 3145728, 3145721}, /* 2^21 + 2^21/2 */ + { 4194304, 4194301}, /* 2^22 */ + { 6291456, 6291449}, /* 2^22 + 2^22/2 */ + { 8388608, 8388617}, /* 2^23 */ + { 12582912, 12582917}, /* 2^23 + 2^23/2 */ + { 16777216, 16777213}, /* 2^24 */ + { 25165824, 25165813}, /* 2^24 + 2^24/2 */ + { 33554432, 33554393}, /* 2^25 */ + { 50331648, 50331653}, /* 2^25 + 2^25/2 */ + { 67108864, 67108859}, /* 2^26 */ + { 100663296, 100663291}, /* 2^26 + 2^26/2 */ + { 134217728, 134217757}, /* 2^27 */ + { 201326592, 201326611}, /* 2^27 + 2^27/2 */ + { 268435456, 268435459}, /* 2^28 */ + { 402653184, 402653189}, /* 2^28 + 2^28/2 */ + { 536870912, 536870909}, /* 2^29 */ + { 805306368, 805306357}, /* 2^29 + 2^29/2 */ + {1073741824, 1073741827}, /* 2^30 */ + {0, 0} }; /* * __db_tablesize -- * Choose a size for the hash table. * - * PUBLIC: int __db_tablesize __P((u_int)); + * PUBLIC: int __db_tablesize __P((u_int32_t)); */ int __db_tablesize(n_buckets) - u_int n_buckets; + u_int32_t n_buckets; { int i; /* - * We try to be clever about how big we make the hash tables. Pick - * a prime number close to the "suggested" number of elements that - * will be in the hash table. We shoot for minimum collisions (i.e. - * one element in each bucket). We use 64 as the minimum table size. + * We try to be clever about how big we make the hash tables. Use a + * prime number close to the "suggested" number of elements that will + * be in the hash table. Use 64 as the minimum hash table size. * * Ref: Sedgewick, Algorithms in C, "Hash Functions" */ @@ -73,14 +109,14 @@ __db_tablesize(n_buckets) * __db_hashinit -- * Initialize a hash table that resides in shared memory. * - * PUBLIC: void __db_hashinit __P((void *, int)); + * PUBLIC: void __db_hashinit __P((void *, u_int32_t)); */ void __db_hashinit(begin, nelements) void *begin; - int nelements; + u_int32_t nelements; { - int i; + u_int32_t i; SH_TAILQ_HEAD(hash_head) *headp; headp = (struct hash_head *)begin; diff --git a/db2/config.h b/db2/config.h index 7f784a0d9b..e5e105830e 100644 --- a/db2/config.h +++ b/db2/config.h @@ -24,6 +24,9 @@ /* Define to `unsigned' if <sys/types.h> doesn't define. */ /* #undef size_t */ +/* Define if the `S_IS*' macros in <sys/stat.h> do not work properly. */ +/* #undef STAT_MACROS_BROKEN */ + /* Define if you have the ANSI C header files. */ #define STDC_HEADERS 1 @@ -36,14 +39,17 @@ /* Define if you want a debugging version. */ /* #undef DEBUG */ +/* Define if you want a version with run-time diagnostic checking. */ +/* #undef DIAGNOSTIC */ + /* Define if you have sigfillset (and sigprocmask). */ #define HAVE_SIGFILLSET 1 -/* Define if seeking to 64-bit file offsets requires the _llseek() call. */ -/* #undef HAVE_LLSEEK */ - -/* Define if seeking to 64-bit file offsets requires the _lseeki64() call. */ -/* #undef HAVE_LSEEKI */ +/* Define if building on AIX, HP, Solaris to get big-file environment. */ +/* #undef HAVE_FILE_OFFSET_BITS */ +#ifdef HAVE_FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 64 +#endif /* Define if you have spinlocks. */ /* #undef HAVE_SPINLOCKS */ @@ -51,6 +57,12 @@ /* Define if you want to use mc68020/gcc assembly spinlocks. */ /* #undef HAVE_ASSEM_MC68020_GCC */ +/* Define if you want to use parisc/gcc assembly spinlocks. */ +/* #undef HAVE_ASSEM_PARISC_GCC */ + +/* Define if you want to use sco/cc assembly spinlocks. */ +/* #undef HAVE_ASSEM_SCO_CC */ + /* Define if you want to use sparc/gcc assembly spinlocks. */ /* #undef HAVE_ASSEM_SPARC_GCC */ @@ -69,6 +81,9 @@ /* Define if you have the SGI abilock_t spinlocks. */ /* #undef HAVE_FUNC_SGI */ +/* Define if you have the ReliantUNIX spinlock_t spinlocks. */ +/* #undef HAVE_FUNC_RELIANT */ + /* Define if you have the Solaris mutex_t spinlocks. */ /* #undef HAVE_FUNC_SOLARIS */ @@ -102,12 +117,12 @@ /* Define if you have the select function. */ #define HAVE_SELECT 1 +/* Define if you have the shmget function. */ +#define HAVE_SHMGET 1 + /* Define if you have the snprintf function. */ #define HAVE_SNPRINTF 1 -/* Define if you have the strdup function. */ -#define HAVE_STRDUP 1 - /* Define if you have the strerror function. */ #define HAVE_STRERROR 1 diff --git a/db2/db.h b/db2/db.h index 6a75bcd33d..e1f5c72044 100644 --- a/db2/db.h +++ b/db2/db.h @@ -1,10 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)db.h.src 10.102 (Sleepycat) 1/18/98 + * @(#)db.h.src 10.131 (Sleepycat) 6/2/98 */ #ifndef _DB_H_ @@ -54,8 +54,7 @@ * * !!! * We also provide the standard u_int, u_long etc., if they're not provided - * by the system. This isn't completely necessary, but the example programs - * need them. + * by the system. */ #ifndef __BIT_TYPES_DEFINED__ #define __BIT_TYPES_DEFINED__ @@ -72,9 +71,9 @@ #define DB_VERSION_MAJOR 2 -#define DB_VERSION_MINOR 3 -#define DB_VERSION_PATCH 16 -#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.16: (1/19/98)" +#define DB_VERSION_MINOR 4 +#define DB_VERSION_PATCH 14 +#define DB_VERSION_STRING "Sleepycat Software: DB 2.4.14: (6/2/98)" typedef u_int32_t db_pgno_t; /* Page number type. */ typedef u_int16_t db_indx_t; /* Page offset type. */ @@ -95,6 +94,7 @@ struct __db_bt_stat; typedef struct __db_bt_stat DB_BTREE_STAT; struct __db_dbt; typedef struct __db_dbt DBT; struct __db_env; typedef struct __db_env DB_ENV; struct __db_info; typedef struct __db_info DB_INFO; +struct __db_lock_stat; typedef struct __db_lock_stat DB_LOCK_STAT; struct __db_lockregion; typedef struct __db_lockregion DB_LOCKREGION; struct __db_lockreq; typedef struct __db_lockreq DB_LOCKREQ; struct __db_locktab; typedef struct __db_locktab DB_LOCKTAB; @@ -102,6 +102,7 @@ struct __db_log; typedef struct __db_log DB_LOG; struct __db_log_stat; typedef struct __db_log_stat DB_LOG_STAT; struct __db_lsn; typedef struct __db_lsn DB_LSN; struct __db_mpool; typedef struct __db_mpool DB_MPOOL; +struct __db_mpool_finfo;typedef struct __db_mpool_finfo DB_MPOOL_FINFO; struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT; struct __db_mpool_stat; typedef struct __db_mpool_stat DB_MPOOL_STAT; struct __db_mpoolfile; typedef struct __db_mpoolfile DB_MPOOLFILE; @@ -134,7 +135,7 @@ struct __db_dbt { * There are a set of functions that the application can replace with its * own versions, and some other knobs which can be turned at run-time. */ -#define DB_FUNC_CALLOC 1 /* ANSI C calloc. */ +#define DB_FUNC_CALLOC 1 /* DELETED: ANSI C calloc. */ #define DB_FUNC_CLOSE 2 /* POSIX 1003.1 close. */ #define DB_FUNC_DIRFREE 3 /* DB: free directory list. */ #define DB_FUNC_DIRLIST 4 /* DB: create directory list. */ @@ -149,12 +150,18 @@ struct __db_dbt { #define DB_FUNC_REALLOC 13 /* ANSI C realloc. */ #define DB_FUNC_SEEK 14 /* POSIX 1003.1 lseek. */ #define DB_FUNC_SLEEP 15 /* DB: sleep secs/usecs. */ -#define DB_FUNC_STRDUP 16 /* DB: strdup(3). */ +#define DB_FUNC_STRDUP 16 /* DELETED: DB: strdup(3). */ #define DB_FUNC_UNLINK 17 /* POSIX 1003.1 unlink. */ #define DB_FUNC_UNMAP 18 /* DB: unmap shared memory file. */ #define DB_FUNC_WRITE 19 /* POSIX 1003.1 write. */ #define DB_FUNC_YIELD 20 /* DB: yield thread to scheduler. */ #define DB_TSL_SPINS 21 /* DB: initialize spin count. */ +#define DB_FUNC_RUNLINK 22 /* DB: remove a shared region. */ +#define DB_REGION_ANON 23 /* DB: anonymous, unnamed regions. */ +#define DB_REGION_INIT 24 /* DB: page-fault regions in create. */ +#define DB_REGION_NAME 25 /* DB: anonymous, named regions. */ +#define DB_MUTEXLOCKS 26 /* DB: turn off all mutex locks. */ +#define DB_PAGEYIELD 27 /* DB: yield the CPU on pool get. */ /* * Database configuration and initialization. @@ -162,52 +169,51 @@ struct __db_dbt { /* * Flags understood by both db_open(3) and db_appinit(3). */ -#define DB_CREATE 0x00001 /* O_CREAT: create file as necessary. */ -#define DB_NOMMAP 0x00002 /* Don't mmap underlying file. */ -#define DB_THREAD 0x00004 /* Free-thread DB package handles. */ +#define DB_CREATE 0x000001 /* O_CREAT: create file as necessary. */ +#define DB_NOMMAP 0x000002 /* Don't mmap underlying file. */ +#define DB_THREAD 0x000004 /* Free-thread DB package handles. */ /* * Flags understood by db_appinit(3). - * - * DB_MUTEXDEBUG is internal only, and not documented. */ -/* 0x00007 COMMON MASK. */ -#define DB_INIT_LOCK 0x00008 /* Initialize locking. */ -#define DB_INIT_LOG 0x00010 /* Initialize logging. */ -#define DB_INIT_MPOOL 0x00020 /* Initialize mpool. */ -#define DB_INIT_TXN 0x00040 /* Initialize transactions. */ -#define DB_MPOOL_PRIVATE 0x00080 /* Mpool: private memory pool. */ -#define DB_MUTEXDEBUG 0x00100 /* Do not get/set mutexes in regions. */ -#define DB_RECOVER 0x00200 /* Run normal recovery. */ -#define DB_RECOVER_FATAL 0x00400 /* Run catastrophic recovery. */ -#define DB_TXN_NOSYNC 0x00800 /* Do not sync log on commit. */ -#define DB_USE_ENVIRON 0x01000 /* Use the environment. */ -#define DB_USE_ENVIRON_ROOT 0x02000 /* Use the environment if root. */ +/* 0x000007 COMMON MASK. */ +#define DB_INIT_LOCK 0x000008 /* Initialize locking. */ +#define DB_INIT_LOG 0x000010 /* Initialize logging. */ +#define DB_INIT_MPOOL 0x000020 /* Initialize mpool. */ +#define DB_INIT_TXN 0x000040 /* Initialize transactions. */ +#define DB_MPOOL_PRIVATE 0x000080 /* Mpool: private memory pool. */ +#define __UNUSED_100 0x000100 +#define DB_RECOVER 0x000200 /* Run normal recovery. */ +#define DB_RECOVER_FATAL 0x000400 /* Run catastrophic recovery. */ +#define DB_TXN_NOSYNC 0x000800 /* Do not sync log on commit. */ +#define DB_USE_ENVIRON 0x001000 /* Use the environment. */ +#define DB_USE_ENVIRON_ROOT 0x002000 /* Use the environment if root. */ /* CURRENTLY UNUSED LOCK FLAGS. */ -#define DB_TXN_LOCK_2PL 0x00000 /* Two-phase locking. */ -#define DB_TXN_LOCK_OPTIMISTIC 0x00000 /* Optimistic locking. */ -#define DB_TXN_LOCK_MASK 0x00000 /* Lock flags mask. */ +#define DB_TXN_LOCK_2PL 0x000000 /* Two-phase locking. */ +#define DB_TXN_LOCK_OPTIMIST 0x000000 /* Optimistic locking. */ +#define DB_TXN_LOCK_MASK 0x000000 /* Lock flags mask. */ /* CURRENTLY UNUSED LOG FLAGS. */ -#define DB_TXN_LOG_REDO 0x00000 /* Redo-only logging. */ -#define DB_TXN_LOG_UNDO 0x00000 /* Undo-only logging. */ -#define DB_TXN_LOG_UNDOREDO 0x00000 /* Undo/redo write-ahead logging. */ -#define DB_TXN_LOG_MASK 0x00000 /* Log flags mask. */ +#define DB_TXN_LOG_REDO 0x000000 /* Redo-only logging. */ +#define DB_TXN_LOG_UNDO 0x000000 /* Undo-only logging. */ +#define DB_TXN_LOG_UNDOREDO 0x000000 /* Undo/redo write-ahead logging. */ +#define DB_TXN_LOG_MASK 0x000000 /* Log flags mask. */ /* * Flags understood by db_open(3). * - * DB_EXCL and DB_TEMPORARY are internal only, and not documented. - * DB_SEQUENTIAL is currently internal, but likely to be exported some day. + * DB_EXCL and DB_TEMPORARY are internal only, and are not documented. + * DB_SEQUENTIAL is currently internal, but may be exported some day. */ -/* 0x00007 COMMON MASK. */ -/* 0x07fff ALREADY USED. */ -#define DB_EXCL 0x08000 /* O_EXCL: exclusive open. */ -#define DB_RDONLY 0x10000 /* O_RDONLY: read-only. */ -#define DB_SEQUENTIAL 0x20000 /* Indicate sequential access. */ -#define DB_TEMPORARY 0x40000 /* Remove on last close. */ -#define DB_TRUNCATE 0x80000 /* O_TRUNCATE: replace existing DB. */ +/* 0x000007 COMMON MASK. */ +/* 0x003fff ALREADY USED. */ +#define __UNUSED_4000 0x004000 +#define DB_EXCL 0x008000 /* O_EXCL: exclusive open. */ +#define DB_RDONLY 0x010000 /* O_RDONLY: read-only. */ +#define DB_SEQUENTIAL 0x020000 /* Indicate sequential access. */ +#define DB_TEMPORARY 0x040000 /* Remove on last close. */ +#define DB_TRUNCATE 0x080000 /* O_TRUNCATE: replace existing DB. */ /* * Deadlock detector modes; used in the DBENV structure to configure the @@ -240,9 +246,9 @@ struct __db_env { /* Locking. */ DB_LOCKTAB *lk_info; /* Return from lock_open(). */ u_int8_t *lk_conflicts; /* Two dimensional conflict matrix. */ - int lk_modes; /* Number of lock modes in table. */ - u_int lk_max; /* Maximum number of locks. */ - u_int32_t lk_detect; /* Deadlock detect on every conflict. */ + u_int32_t lk_modes; /* Number of lock modes in table. */ + u_int32_t lk_max; /* Maximum number of locks. */ + u_int32_t lk_detect; /* Deadlock detect on all conflicts. */ /* Logging. */ DB_LOG *lg_info; /* Return from log_open(). */ @@ -255,7 +261,7 @@ struct __db_env { /* Transactions. */ DB_TXNMGR *tx_info; /* Return from txn_open(). */ - unsigned int tx_max; /* Maximum number of transactions. */ + u_int32_t tx_max; /* Maximum number of transactions. */ int (*tx_recover) /* Dispatch function for recovery. */ __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); @@ -300,17 +306,17 @@ struct __db_info { void *(*db_malloc) __P((size_t)); /* Btree access method. */ - int bt_maxkey; /* Maximum keys per page. */ - int bt_minkey; /* Minimum keys per page. */ + u_int32_t bt_maxkey; /* Maximum keys per page. */ + u_int32_t bt_minkey; /* Minimum keys per page. */ int (*bt_compare) /* Comparison function. */ __P((const DBT *, const DBT *)); size_t (*bt_prefix) /* Prefix function. */ __P((const DBT *, const DBT *)); /* Hash access method. */ - unsigned int h_ffactor; /* Fill factor. */ - unsigned int h_nelem; /* Number of elements. */ - u_int32_t (*h_hash) /* Hash function. */ + u_int32_t h_ffactor; /* Fill factor. */ + u_int32_t h_nelem; /* Number of elements. */ + u_int32_t (*h_hash) /* Hash function. */ __P((const void *, u_int32_t)); /* Recno access method. */ @@ -353,6 +359,7 @@ struct __db_info { #define DB_SET 0x010000 /* c_get(), log_get() */ #define DB_SET_RANGE 0x020000 /* c_get() */ #define DB_SET_RECNO 0x040000 /* c_get() */ +#define DB_CURLSN 0x080000 /* log_put() */ /* * DB (user visible) error return codes. @@ -435,14 +442,14 @@ struct __db { void *(*db_malloc) __P((size_t)); /* Functions. */ - int (*close) __P((DB *, int)); + int (*close) __P((DB *, u_int32_t)); int (*cursor) __P((DB *, DB_TXN *, DBC **)); - int (*del) __P((DB *, DB_TXN *, DBT *, int)); + int (*del) __P((DB *, DB_TXN *, DBT *, u_int32_t)); int (*fd) __P((DB *, int *)); - int (*get) __P((DB *, DB_TXN *, DBT *, DBT *, int)); - int (*put) __P((DB *, DB_TXN *, DBT *, DBT *, int)); - int (*stat) __P((DB *, void *, void *(*)(size_t), int)); - int (*sync) __P((DB *, int)); + int (*get) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + int (*put) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + int (*stat) __P((DB *, void *, void *(*)(size_t), u_int32_t)); + int (*sync) __P((DB *, u_int32_t)); #define DB_AM_DUP 0x000001 /* DB_DUP (internal). */ #define DB_AM_INMEM 0x000002 /* In-memory; no sync on close. */ @@ -483,9 +490,9 @@ struct __dbc { void *internal; /* Access method private. */ int (*c_close) __P((DBC *)); - int (*c_del) __P((DBC *, int)); - int (*c_get) __P((DBC *, DBT *, DBT *, int)); - int (*c_put) __P((DBC *, DBT *, DBT *, int)); + int (*c_del) __P((DBC *, u_int32_t)); + int (*c_get) __P((DBC *, DBT *, DBT *, u_int32_t)); + int (*c_put) __P((DBC *, DBT *, DBT *, u_int32_t)); }; /* Btree/recno statistics structure. */ @@ -524,10 +531,11 @@ struct __db_bt_stat { #if defined(__cplusplus) extern "C" { #endif -int db_appinit __P((const char *, char * const *, DB_ENV *, int)); +int db_appinit __P((const char *, char * const *, DB_ENV *, u_int32_t)); int db_appexit __P((DB_ENV *)); int db_jump_set __P((void *, int)); -int db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **)); +int db_open __P((const char *, + DBTYPE, u_int32_t, int, DB_ENV *, DB_INFO *, DB **)); int db_value_set __P((int, int)); char *db_version __P((int *, int *, int *)); #if defined(__cplusplus) @@ -575,6 +583,21 @@ typedef enum { DB_LOCK_IWR /* Intent to read and write. */ } db_lockmode_t; +/* + * Status of a lock. + */ +typedef enum { + DB_LSTAT_ABORTED, /* Lock belongs to an aborted txn. */ + DB_LSTAT_ERR, /* Lock is bad. */ + DB_LSTAT_FREE, /* Lock is unallocated. */ + DB_LSTAT_HELD, /* Lock is currently held. */ + DB_LSTAT_NOGRANT, /* Lock was not granted. */ + DB_LSTAT_PENDING, /* Lock was waiting and has been + * promoted; waiting for the owner + * to run and upgrade it to held. */ + DB_LSTAT_WAITING /* Lock is on the wait queue. */ +} db_status_t; + /* Lock request structure. */ struct __db_lockreq { db_lockop_t op; /* Operation. */ @@ -596,19 +619,38 @@ extern const u_int8_t db_rw_conflicts[]; #define DB_LOCK_RIW_N 6 extern const u_int8_t db_riw_conflicts[]; +struct __db_lock_stat { + u_int32_t st_magic; /* Lock file magic number. */ + u_int32_t st_version; /* Lock file version number. */ + u_int32_t st_maxlocks; /* Maximum number of locks in table. */ + u_int32_t st_nmodes; /* Number of lock modes. */ + u_int32_t st_numobjs; /* Number of objects. */ + u_int32_t st_nlockers; /* Number of lockers. */ + u_int32_t st_nconflicts; /* Number of lock conflicts. */ + u_int32_t st_nrequests; /* Number of lock gets. */ + u_int32_t st_nreleases; /* Number of lock puts. */ + u_int32_t st_ndeadlocks; /* Number of lock deadlocks. */ + u_int32_t st_region_wait; /* Region lock granted after wait. */ + u_int32_t st_region_nowait; /* Region lock granted without wait. */ + u_int32_t st_refcnt; /* Region reference count. */ + u_int32_t st_regsize; /* Region size. */ +}; + #if defined(__cplusplus) extern "C" { #endif int lock_close __P((DB_LOCKTAB *)); -int lock_detect __P((DB_LOCKTAB *, int, int)); +int lock_detect __P((DB_LOCKTAB *, u_int32_t, u_int32_t)); int lock_get __P((DB_LOCKTAB *, - u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *)); + u_int32_t, u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *)); int lock_id __P((DB_LOCKTAB *, u_int32_t *)); -int lock_open __P((const char *, int, int, DB_ENV *, DB_LOCKTAB **)); +int lock_open __P((const char *, + u_int32_t, int, DB_ENV *, DB_LOCKTAB **)); int lock_put __P((DB_LOCKTAB *, DB_LOCK)); +int lock_stat __P((DB_LOCKTAB *, DB_LOCK_STAT **, void *(*)(size_t))); int lock_unlink __P((const char *, int, DB_ENV *)); int lock_vec __P((DB_LOCKTAB *, - u_int32_t, int, DB_LOCKREQ *, int, DB_LOCKREQ **)); + u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); #if defined(__cplusplus) } #endif @@ -651,19 +693,21 @@ struct __db_log_stat { u_int32_t st_region_nowait; /* Region lock granted without wait. */ u_int32_t st_cur_file; /* Current log file number. */ u_int32_t st_cur_offset; /* Current log file offset. */ + u_int32_t st_refcnt; /* Region reference count. */ + u_int32_t st_regsize; /* Region size. */ }; #if defined(__cplusplus) extern "C" { #endif -int log_archive __P((DB_LOG *, char **[], int, void *(*)(size_t))); +int log_archive __P((DB_LOG *, char **[], u_int32_t, void *(*)(size_t))); int log_close __P((DB_LOG *)); int log_compare __P((const DB_LSN *, const DB_LSN *)); int log_file __P((DB_LOG *, const DB_LSN *, char *, size_t)); int log_flush __P((DB_LOG *, const DB_LSN *)); -int log_get __P((DB_LOG *, DB_LSN *, DBT *, int)); -int log_open __P((const char *, int, int, DB_ENV *, DB_LOG **)); -int log_put __P((DB_LOG *, DB_LSN *, const DBT *, int)); +int log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t)); +int log_open __P((const char *, u_int32_t, int, DB_ENV *, DB_LOG **)); +int log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t)); int log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *)); int log_stat __P((DB_LOG *, DB_LOG_STAT **, void *(*)(size_t))); int log_unlink __P((const char *, int, DB_ENV *)); @@ -705,6 +749,17 @@ struct __db_mpool_stat { u_int32_t st_page_trickle; /* Pages written by memp_trickle. */ u_int32_t st_region_wait; /* Region lock granted after wait. */ u_int32_t st_region_nowait; /* Region lock granted without wait. */ + u_int32_t st_refcnt; /* Region reference count. */ + u_int32_t st_regsize; /* Region size. */ +}; + +/* Mpool file open information structure. */ +struct __db_mpool_finfo { + int ftype; /* File type. */ + DBT *pgcookie; /* Byte-string passed to pgin/pgout. */ + u_int8_t *fileid; /* Unique file ID. */ + int32_t lsn_offset; /* LSN offset in page. */ + u_int32_t clear_len; /* Cleared length on created pages. */ }; /* Mpool file statistics structure. */ @@ -724,13 +779,13 @@ extern "C" { #endif int memp_close __P((DB_MPOOL *)); int memp_fclose __P((DB_MPOOLFILE *)); -int memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, int, void *)); +int memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, u_int32_t, void *)); int memp_fopen __P((DB_MPOOL *, const char *, - int, int, int, size_t, int, DBT *, u_int8_t *, DB_MPOOLFILE **)); -int memp_fput __P((DB_MPOOLFILE *, void *, int)); -int memp_fset __P((DB_MPOOLFILE *, void *, int)); + u_int32_t, int, size_t, DB_MPOOL_FINFO *, DB_MPOOLFILE **)); +int memp_fput __P((DB_MPOOLFILE *, void *, u_int32_t)); +int memp_fset __P((DB_MPOOLFILE *, void *, u_int32_t)); int memp_fsync __P((DB_MPOOLFILE *)); -int memp_open __P((const char *, int, int, DB_ENV *, DB_MPOOL **)); +int memp_open __P((const char *, u_int32_t, int, DB_ENV *, DB_MPOOL **)); int memp_register __P((DB_MPOOL *, int, int (*)(db_pgno_t, void *, DBT *), int (*)(db_pgno_t, void *, DBT *))); @@ -765,16 +820,21 @@ struct __db_txn_active { }; struct __db_txn_stat { - DB_LSN st_last_ckp; /* lsn of the last checkpoint */ - DB_LSN st_pending_ckp; /* last checkpoint did not finish */ - time_t st_time_ckp; /* time of last checkpoint */ - u_int32_t st_last_txnid; /* last transaction id given out */ - u_int32_t st_maxtxns; /* maximum number of active txns */ - u_int32_t st_naborts; /* number of aborted transactions */ - u_int32_t st_nbegins; /* number of begun transactions */ - u_int32_t st_ncommits; /* number of committed transactions */ - u_int32_t st_nactive; /* number of active transactions */ - DB_TXN_ACTIVE *st_txnarray; /* array of active transactions */ + DB_LSN st_last_ckp; /* lsn of the last checkpoint */ + DB_LSN st_pending_ckp; /* last checkpoint did not finish */ + time_t st_time_ckp; /* time of last checkpoint */ + u_int32_t st_last_txnid; /* last transaction id given out */ + u_int32_t st_maxtxns; /* maximum number of active txns */ + u_int32_t st_naborts; /* number of aborted transactions */ + u_int32_t st_nbegins; /* number of begun transactions */ + u_int32_t st_ncommits; /* number of committed transactions */ + u_int32_t st_nactive; /* number of active transactions */ + DB_TXN_ACTIVE + *st_txnarray; /* array of active transactions */ + u_int32_t st_region_wait; /* Region lock granted after wait. */ + u_int32_t st_region_nowait; /* Region lock granted without wait. */ + u_int32_t st_refcnt; /* Region reference count. */ + u_int32_t st_regsize; /* Region size. */ }; #if defined(__cplusplus) @@ -782,11 +842,11 @@ extern "C" { #endif int txn_abort __P((DB_TXN *)); int txn_begin __P((DB_TXNMGR *, DB_TXN *, DB_TXN **)); -int txn_checkpoint __P((const DB_TXNMGR *, int, int)); +int txn_checkpoint __P((const DB_TXNMGR *, u_int32_t, u_int32_t)); int txn_commit __P((DB_TXN *)); int txn_close __P((DB_TXNMGR *)); u_int32_t txn_id __P((DB_TXN *)); -int txn_open __P((const char *, int, int, DB_ENV *, DB_TXNMGR **)); +int txn_open __P((const char *, u_int32_t, int, DB_ENV *, DB_TXNMGR **)); int txn_prepare __P((DB_TXN *)); int txn_stat __P((DB_TXNMGR *, DB_TXN_STAT **, void *(*)(size_t))); int txn_unlink __P((const char *, int, DB_ENV *)); @@ -810,10 +870,17 @@ int txn_unlink __P((const char *, int, DB_ENV *)); */ #define DBM_SUFFIX ".db" +#if defined(_XPG4_2) +typedef struct { + char *dptr; + size_t dsize; +} datum; +#else typedef struct { char *dptr; int dsize; } datum; +#endif /* * Translate DBM calls into DB calls so that DB doesn't step on the @@ -894,7 +961,7 @@ typedef enum { typedef struct entry { char *key; - void *data; + char *data; } ENTRY; /* @@ -909,7 +976,7 @@ typedef struct entry { #if defined(__cplusplus) extern "C" { #endif -int __db_hcreate __P((unsigned int)); +int __db_hcreate __P((size_t)); void __db_hdestroy __P((void)); ENTRY *__db_hsearch __P((ENTRY, ACTION)); #if defined(__cplusplus) diff --git a/db2/db/db.c b/db2/db/db.c index 8df76349d1..9951ebd944 100644 --- a/db2/db/db.c +++ b/db2/db/db.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -44,20 +44,16 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db.c 10.45 (Sleepycat) 12/4/97"; +static const char sccsid[] = "@(#)db.c 10.57 (Sleepycat) 5/7/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <sys/stat.h> #include <errno.h> -#include <fcntl.h> #include <stddef.h> -#include <stdio.h> #include <stdlib.h> #include <string.h> -#include <unistd.h> #endif #include "db_int.h" @@ -71,7 +67,7 @@ static const char sccsid[] = "@(#)db.c 10.45 (Sleepycat) 12/4/97"; #include "db_am.h" #include "common_ext.h" -static int db_close __P((DB *, int)); +static int db_close __P((DB *, u_int32_t)); static int db_fd __P((DB *, int *)); /* @@ -99,7 +95,8 @@ int db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) const char *fname; DBTYPE type; - int flags, mode; + u_int32_t flags; + int mode; DB_ENV *dbenv; DB_INFO *dbinfo; DB **dbpp; @@ -108,6 +105,7 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) DB *dbp; DBT pgcookie; DB_ENV *envp, t_dbenv; + DB_MPOOL_FINFO finfo; DB_PGINFO pginfo; HASHHDR *hashm; size_t cachesize; @@ -125,10 +123,26 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) if ((ret = __db_fchk(dbenv, "db_open", flags, OKFLAGS)) != 0) return (ret); - if (dbenv != NULL && - LF_ISSET(DB_THREAD) && !F_ISSET(dbenv, DB_ENV_THREAD)) { - __db_err(dbenv, "environment not created using DB_THREAD"); - return (EINVAL); + if (dbenv != NULL) { + /* + * You can't specify threads during the db_open() if the + * environment wasn't configured with them. + */ + if (LF_ISSET(DB_THREAD) && !F_ISSET(dbenv, DB_ENV_THREAD)) { + __db_err(dbenv, + "environment not created using DB_THREAD"); + return (EINVAL); + } + + /* + * Specifying a cachesize to db_open(3), after creating an + * environment, is a common mistake. + */ + if (dbinfo != NULL && dbinfo->db_cachesize != 0) { + __db_err(dbenv, + "cachesize will be ignored if environment exists"); + return (EINVAL); + } } /* Initialize for error return. */ @@ -203,7 +217,7 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) /* Fill in the default file mode. */ if (mode == 0) - mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + mode = __db_omode("rwrw--"); /* Check if the user wants us to swap byte order. */ if (dbinfo != NULL) @@ -230,7 +244,7 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) if (fname != NULL && fname[0] != '\0') { /* Get the real file name. */ if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, fname, NULL, &real_name)) != 0) + DB_APP_DATA, NULL, fname, 0, NULL, &real_name)) != 0) goto err; /* @@ -455,22 +469,6 @@ empty: /* } /* - * Set and/or correct the cache size; must be a multiple of the - * page size. - */ - if (dbinfo == NULL || dbinfo->db_cachesize == 0) - cachesize = dbp->pgsize * DB_MINCACHE; - else { - cachesize = dbinfo->db_cachesize; - if (cachesize & (dbp->pgsize - 1)) - cachesize += (~cachesize & (dbp->pgsize - 1)) + 1; - if (cachesize < dbp->pgsize * DB_MINCACHE) - cachesize = dbp->pgsize * DB_MINCACHE; - if (cachesize < 20 * 1024) - cachesize = 20 * 1024; - } - - /* * If no mpool supplied by the application, attach to a local, * created buffer pool. * @@ -499,10 +497,28 @@ empty: /* envp = dbenv; restore = 1; } + + /* + * Set and/or correct the cache size; must be a multiple of + * the page size. + */ + if (dbinfo == NULL || dbinfo->db_cachesize == 0) + cachesize = dbp->pgsize * DB_MINCACHE; + else { + cachesize = dbinfo->db_cachesize; + if (cachesize & (dbp->pgsize - 1)) + cachesize += + (~cachesize & (dbp->pgsize - 1)) + 1; + if (cachesize < dbp->pgsize * DB_MINCACHE) + cachesize = dbp->pgsize * DB_MINCACHE; + if (cachesize < 20 * 1024) + cachesize = 20 * 1024; + } envp->mp_size = cachesize; + if ((ret = memp_open(NULL, DB_CREATE | DB_MPOOL_PRIVATE | (F_ISSET(dbp, DB_AM_THREAD) ? DB_THREAD : 0), - S_IRUSR | S_IWUSR, envp, &dbp->mp)) != 0) + __db_omode("rw----"), envp, &dbp->mp)) != 0) goto err; if (restore) *dbenv = t_dbenv; @@ -566,9 +582,18 @@ empty: /* pgcookie.data = &pginfo; pgcookie.size = sizeof(DB_PGINFO); - if ((ret = memp_fopen(dbp->mp, fname, ftype, - F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0, 0, dbp->pgsize, - 0, &pgcookie, dbp->lock.fileid, &dbp->mpf)) != 0) + /* + * Set up additional memp_fopen information. + */ + memset(&finfo, 0, sizeof(finfo)); + finfo.ftype = ftype; + finfo.pgcookie = &pgcookie; + finfo.fileid = dbp->lock.fileid; + finfo.lsn_offset = 0; + finfo.clear_len = DB_PAGE_CLEAR_LEN; + if ((ret = memp_fopen(dbp->mp, fname, + F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0, + 0, dbp->pgsize, &finfo, &dbp->mpf)) != 0) goto err; /* @@ -673,7 +698,7 @@ err: /* Close the file descriptor. */ static int db_close(dbp, flags) DB *dbp; - int flags; + u_int32_t flags; { DBC *dbc; DB *tdbp; @@ -734,7 +759,7 @@ db_close(dbp, flags) } /* Sync the memory pool. */ - if ((t_ret = memp_fsync(dbp->mpf)) != 0 && + if (!LF_ISSET(DB_NOSYNC) && (t_ret = memp_fsync(dbp->mpf)) != 0 && t_ret != DB_INCOMPLETE && ret == 0) ret = t_ret; @@ -796,18 +821,11 @@ db_fd(dbp, fdp) DB *dbp; int *fdp; { - /* In-memory database can't have a file descriptor. */ - if (F_ISSET(dbp, DB_AM_INMEM)) - return (ENOENT); - /* * XXX - * Truly spectacular layering violation. As we don't open the - * underlying file until we need it, it may not be initialized. + * Truly spectacular layering violation. */ - if ((*fdp = dbp->mpf->fd) == -1) - return (ENOENT); - return (0); + return (__mp_xxx_fd(dbp->mpf, fdp)); } /* @@ -821,6 +839,11 @@ __db_pgerr(dbp, pgno) DB *dbp; db_pgno_t pgno; { + /* + * Three things are certain: + * Death, taxes, and lost data. + * Guess which has occurred. + */ __db_err(dbp->dbenv, "unable to create/retrieve page %lu", (u_long)pgno); return (__db_panic(dbp)); diff --git a/db2/db/db.src b/db2/db/db.src index 07d98123ac..91d8b390a1 100644 --- a/db2/db/db.src +++ b/db2/db/db.src @@ -1,11 +1,11 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. - * @(#)db.src 10.4 (Sleepycat) 11/2/97 + * + * @(#)db.src 10.6 (Sleepycat) 4/28/98 */ -#include "config.h" PREFIX db @@ -153,4 +153,7 @@ END * noop -- do nothing, but get an LSN. */ BEGIN noop +ARG fileid u_int32_t lu +ARG pgno db_pgno_t lu +POINTER prevlsn DB_LSN * lu END diff --git a/db2/db/db_auto.c b/db2/db/db_auto.c index 5d35264103..5203e0a94c 100644 --- a/db2/db/db_auto.c +++ b/db2/db/db_auto.c @@ -14,8 +14,6 @@ #include "db_page.h" #include "db_dispatch.h" #include "db_am.h" -#include "common_ext.h" - /* * PUBLIC: int __db_addrem_log * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, @@ -107,7 +105,7 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags, else memset(bp, 0, sizeof(*pagelsn)); bp += sizeof(*pagelsn); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -123,22 +121,23 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__db_addrem_print(notused1, dbtp, lsnp, notused3, notused4) +__db_addrem_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __db_addrem_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __db_addrem_read(dbtp->data, &argp)) != 0) return (ret); @@ -156,20 +155,20 @@ __db_addrem_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tnbytes: %lu\n", (u_long)argp->nbytes); printf("\thdr: "); for (i = 0; i < argp->hdr.size; i++) { - c = ((char *)argp->hdr.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->hdr.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tdbt: "); for (i = 0; i < argp->dbt.size; i++) { - c = ((char *)argp->dbt.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->dbt.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tpagelsn: [%lu][%lu]\n", @@ -296,7 +295,7 @@ int __db_split_log(logp, txnid, ret_lsnp, flags, else memset(bp, 0, sizeof(*pagelsn)); bp += sizeof(*pagelsn); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -312,22 +311,23 @@ int __db_split_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__db_split_print(notused1, dbtp, lsnp, notused3, notused4) +__db_split_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __db_split_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __db_split_read(dbtp->data, &argp)) != 0) return (ret); @@ -343,11 +343,11 @@ __db_split_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tpgno: %lu\n", (u_long)argp->pgno); printf("\tpageimage: "); for (i = 0; i < argp->pageimage.size; i++) { - c = ((char *)argp->pageimage.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->pageimage.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tpagelsn: [%lu][%lu]\n", @@ -490,7 +490,7 @@ int __db_big_log(logp, txnid, ret_lsnp, flags, else memset(bp, 0, sizeof(*nextlsn)); bp += sizeof(*nextlsn); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -506,22 +506,23 @@ int __db_big_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__db_big_print(notused1, dbtp, lsnp, notused3, notused4) +__db_big_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __db_big_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __db_big_read(dbtp->data, &argp)) != 0) return (ret); @@ -539,11 +540,11 @@ __db_big_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno); printf("\tdbt: "); for (i = 0; i < argp->dbt.size; i++) { - c = ((char *)argp->dbt.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->dbt.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tpagelsn: [%lu][%lu]\n", @@ -660,7 +661,7 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags, else memset(bp, 0, sizeof(*lsn)); bp += sizeof(*lsn); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -676,22 +677,23 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__db_ovref_print(notused1, dbtp, lsnp, notused3, notused4) +__db_ovref_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __db_ovref_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __db_ovref_read(dbtp->data, &argp)) != 0) return (ret); @@ -823,7 +825,7 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags, else memset(bp, 0, sizeof(*lsn_next)); bp += sizeof(*lsn_next); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -839,22 +841,23 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__db_relink_print(notused1, dbtp, lsnp, notused3, notused4) +__db_relink_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __db_relink_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __db_relink_read(dbtp->data, &argp)) != 0) return (ret); @@ -985,7 +988,7 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags, else memset(bp, 0, sizeof(*nextlsn)); bp += sizeof(*nextlsn); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -1001,22 +1004,23 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__db_addpage_print(notused1, dbtp, lsnp, notused3, notused4) +__db_addpage_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __db_addpage_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __db_addpage_read(dbtp->data, &argp)) != 0) return (ret); @@ -1159,7 +1163,7 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags, } memcpy(bp, &arg_flags, sizeof(arg_flags)); bp += sizeof(arg_flags); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -1175,22 +1179,23 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__db_debug_print(notused1, dbtp, lsnp, notused3, notused4) +__db_debug_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __db_debug_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __db_debug_read(dbtp->data, &argp)) != 0) return (ret); @@ -1203,30 +1208,30 @@ __db_debug_print(notused1, dbtp, lsnp, notused3, notused4) (u_long)argp->prev_lsn.offset); printf("\top: "); for (i = 0; i < argp->op.size; i++) { - c = ((char *)argp->op.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->op.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tfileid: %lu\n", (u_long)argp->fileid); printf("\tkey: "); for (i = 0; i < argp->key.size; i++) { - c = ((char *)argp->key.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->key.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tdata: "); for (i = 0; i < argp->data.size; i++) { - c = ((char *)argp->data.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->data.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\targ_flags: %lu\n", (u_long)argp->arg_flags); @@ -1280,13 +1285,18 @@ __db_debug_read(recbuf, argpp) /* * PUBLIC: int __db_noop_log - * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t)); + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *)); */ -int __db_noop_log(logp, txnid, ret_lsnp, flags) +int __db_noop_log(logp, txnid, ret_lsnp, flags, + fileid, pgno, prevlsn) DB_LOG *logp; DB_TXN *txnid; DB_LSN *ret_lsnp; u_int32_t flags; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN * prevlsn; { DBT logrec; DB_LSN *lsnp, null_lsn; @@ -1302,7 +1312,10 @@ int __db_noop_log(logp, txnid, ret_lsnp, flags) lsnp = &null_lsn; } else lsnp = &txnid->last_lsn; - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN); + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(fileid) + + sizeof(pgno) + + sizeof(*prevlsn); if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); @@ -1313,7 +1326,16 @@ int __db_noop_log(logp, txnid, ret_lsnp, flags) bp += sizeof(txn_num); memcpy(bp, lsnp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); -#ifdef DEBUG + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &pgno, sizeof(pgno)); + bp += sizeof(pgno); + if (prevlsn != NULL) + memcpy(bp, prevlsn, sizeof(*prevlsn)); + else + memset(bp, 0, sizeof(*prevlsn)); + bp += sizeof(*prevlsn); +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -1329,22 +1351,23 @@ int __db_noop_log(logp, txnid, ret_lsnp, flags) * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__db_noop_print(notused1, dbtp, lsnp, notused3, notused4) +__db_noop_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __db_noop_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __db_noop_read(dbtp->data, &argp)) != 0) return (ret); @@ -1355,6 +1378,10 @@ __db_noop_print(notused1, dbtp, lsnp, notused3, notused4) (u_long)argp->txnid->txnid, (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tprevlsn: [%lu][%lu]\n", + (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset); printf("\n"); __db_free(argp); return (0); @@ -1383,6 +1410,12 @@ __db_noop_read(recbuf, argpp) bp += sizeof(argp->txnid->txnid); memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->pgno, bp, sizeof(argp->pgno)); + bp += sizeof(argp->pgno); + memcpy(&argp->prevlsn, bp, sizeof(argp->prevlsn)); + bp += sizeof(argp->prevlsn); *argpp = argp; return (0); } diff --git a/db2/db/db_conv.c b/db2/db/db_conv.c index e9c4bf90bd..8b5cf5f4a7 100644 --- a/db2/db/db_conv.c +++ b/db2/db/db_conv.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -44,7 +44,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_conv.c 10.8 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)db_conv.c 10.13 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -160,6 +160,13 @@ __db_convert(pg, pp, pagesize, pgin) } } + + /* + * The offsets in the inp array are used to determine + * the size of entries on a page; therefore they + * cannot be converted until we've done all the + * entries. + */ if (!pgin) for (i = 0; i < NUM_ENT(h); i++) M_16_SWAP(h->inp[i]); @@ -179,8 +186,8 @@ __db_convert(pg, pp, pagesize, pgin) case B_DUPLICATE: case B_OVERFLOW: bo = (BOVERFLOW *)bk; - M_32_SWAP(bo->tlen); M_32_SWAP(bo->pgno); + M_32_SWAP(bo->tlen); break; } @@ -194,17 +201,18 @@ __db_convert(pg, pp, pagesize, pgin) M_16_SWAP(h->inp[i]); bi = GET_BINTERNAL(h, i); + M_16_SWAP(bi->len); + M_32_SWAP(bi->pgno); + M_32_SWAP(bi->nrecs); + switch (B_TYPE(bi->type)) { case B_KEYDATA: - M_16_SWAP(bi->len); - M_32_SWAP(bi->pgno); - M_32_SWAP(bi->nrecs); break; case B_DUPLICATE: case B_OVERFLOW: - bo = (BOVERFLOW *)bi; - M_32_SWAP(bo->tlen); + bo = (BOVERFLOW *)bi->data; M_32_SWAP(bo->pgno); + M_32_SWAP(bo->tlen); break; } @@ -224,6 +232,7 @@ __db_convert(pg, pp, pagesize, pgin) if (!pgin) M_16_SWAP(h->inp[i]); } + break; case P_OVERFLOW: case P_INVALID: /* Nothing to do. */ diff --git a/db2/db/db_dispatch.c b/db2/db/db_dispatch.c index 736575adfc..8645948614 100644 --- a/db2/db/db_dispatch.c +++ b/db2/db/db_dispatch.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -43,14 +43,13 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_dispatch.c 10.9 (Sleepycat) 1/17/98"; +static const char sccsid[] = "@(#)db_dispatch.c 10.14 (Sleepycat) 5/3/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <fcntl.h> #include <stddef.h> #include <stdlib.h> #include <string.h> @@ -62,6 +61,7 @@ static const char sccsid[] = "@(#)db_dispatch.c 10.9 (Sleepycat) 1/17/98"; #include "db_am.h" #include "common_ext.h" #include "log_auto.h" +#include "txn_auto.h" /* * Data structures to manage the DB dispatch table. The dispatch table @@ -114,7 +114,7 @@ __db_dispatch(logp, db, lsnp, redo, info) * seen it, then we call the appropriate recovery routine * in "abort mode". */ - if (rectype == DB_log_register || + if (rectype == DB_log_register || rectype == DB_txn_ckp || __db_txnlist_find(info, txnid) == DB_NOTFOUND) return ((dispatch_table[rectype])(logp, db, lsnp, TXN_UNDO, info)); @@ -124,7 +124,7 @@ __db_dispatch(logp, db, lsnp, redo, info) * In the forward pass, if we haven't seen the transaction, * do nothing, else recovery it. */ - if (rectype == DB_log_register || + if (rectype == DB_log_register || rectype == DB_txn_ckp || __db_txnlist_find(info, txnid) != DB_NOTFOUND) return ((dispatch_table[rectype])(logp, db, lsnp, TXN_REDO, info)); @@ -188,14 +188,14 @@ int __db_txnlist_init(retp) void *retp; { - __db_txnhead *headp; + DB_TXNHEAD *headp; - if ((headp = (struct __db_txnhead *) - __db_malloc(sizeof(struct __db_txnhead))) == NULL) + if ((headp = (DB_TXNHEAD *)__db_malloc(sizeof(DB_TXNHEAD))) == NULL) return (ENOMEM); LIST_INIT(&headp->head); headp->maxid = 0; + headp->generation = 1; *(void **)retp = headp; return (0); @@ -212,25 +212,26 @@ __db_txnlist_add(listp, txnid) void *listp; u_int32_t txnid; { - __db_txnhead *hp; - __db_txnlist *elp; + DB_TXNHEAD *hp; + DB_TXNLIST *elp; - if ((elp = (__db_txnlist *)__db_malloc(sizeof(__db_txnlist))) == NULL) + if ((elp = (DB_TXNLIST *)__db_malloc(sizeof(DB_TXNLIST))) == NULL) return (ENOMEM); elp->txnid = txnid; - hp = (struct __db_txnhead *)listp; + hp = (DB_TXNHEAD *)listp; LIST_INSERT_HEAD(&hp->head, elp, links); if (txnid > hp->maxid) hp->maxid = txnid; + elp->generation = hp->generation; return (0); } /* * __db_txnlist_find -- - * Checks to see if txnid is in the txnid list, returns 1 if found, - * 0 if not found. + * Checks to see if a txnid with the current generation is in the + * txnid list. * * PUBLIC: int __db_txnlist_find __P((void *, u_int32_t)); */ @@ -239,45 +240,19 @@ __db_txnlist_find(listp, txnid) void *listp; u_int32_t txnid; { - __db_txnhead *hp; - __db_txnlist *p; + DB_TXNHEAD *hp; + DB_TXNLIST *p; - if ((hp = (struct __db_txnhead *)listp) == NULL) + if ((hp = (DB_TXNHEAD *)listp) == NULL) return (DB_NOTFOUND); - if (hp->maxid < txnid) { - hp->maxid = txnid; - return (DB_NOTFOUND); - } - for (p = hp->head.lh_first; p != NULL; p = p->links.le_next) - if (p->txnid == txnid) + if (p->txnid == txnid && hp->generation == p->generation) return (0); return (DB_NOTFOUND); } -#ifdef DEBUG -/* - * __db_txnlist_print -- - * Print out the transaction list. - * - * PUBLIC: void __db_txnlist_print __P((void *)); - */ -void -__db_txnlist_print(listp) - void *listp; -{ - __db_txnhead *hp; - __db_txnlist *p; - - hp = (struct __db_txnhead *)listp; - printf("Maxid: %lu\n", (u_long)hp->maxid); - for (p = hp->head.lh_first; p != NULL; p = p->links.le_next) - printf("TXNID: %lu\n", (u_long)p->txnid); -} -#endif - /* * __db_txnlist_end -- * Discard transaction linked list. @@ -288,13 +263,61 @@ void __db_txnlist_end(listp) void *listp; { - __db_txnhead *hp; - __db_txnlist *p; + DB_TXNHEAD *hp; + DB_TXNLIST *p; - hp = (struct __db_txnhead *)listp; + hp = (DB_TXNHEAD *)listp; while ((p = LIST_FIRST(&hp->head)) != LIST_END(&hp->head)) { LIST_REMOVE(p, links); __db_free(p); } __db_free(listp); } + +/* + * __db_txnlist_gen -- + * Change the current generation number. + * + * PUBLIC: void __db_txnlist_gen __P((void *, int)); + */ +void +__db_txnlist_gen(listp, incr) + void *listp; + int incr; +{ + DB_TXNHEAD *hp; + + /* + * During recovery generation numbers keep track of how many "restart" + * checkpoints we've seen. Restart checkpoints occur whenever we take + * a checkpoint and there are no outstanding transactions. When that + * happens, we can reset transaction IDs back to 1. It always happens + * at recovery and it prevents us from exhausting the transaction IDs + * name space. + */ + hp = (DB_TXNHEAD *)listp; + hp->generation += incr; +} + +#ifdef DEBUG +/* + * __db_txnlist_print -- + * Print out the transaction list. + * + * PUBLIC: void __db_txnlist_print __P((void *)); + */ +void +__db_txnlist_print(listp) + void *listp; +{ + DB_TXNHEAD *hp; + DB_TXNLIST *p; + + hp = (DB_TXNHEAD *)listp; + printf("Maxid: %lu Generation: %lu\n", (u_long)hp->maxid, + (u_long)hp->generation); + for (p = hp->head.lh_first; p != NULL; p = p->links.le_next) + printf("TXNID: %lu(%lu)\n", (u_long)p->txnid, + (u_long)p->generation); +} +#endif diff --git a/db2/db/db_dup.c b/db2/db/db_dup.c index 59dfb85b92..6379fc1729 100644 --- a/db2/db/db_dup.c +++ b/db2/db/db_dup.c @@ -1,35 +1,27 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_dup.c 10.11 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)db_dup.c 10.18 (Sleepycat) 5/31/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <sys/stat.h> #include <errno.h> -#include <fcntl.h> -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> -#include <unistd.h> #endif #include "db_int.h" #include "db_page.h" -#include "db_swap.h" #include "btree.h" #include "db_am.h" -#include "common_ext.h" static int __db_addpage __P((DB *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **))); @@ -209,9 +201,8 @@ __db_dsplit(dbp, hp, indxp, size, newfunc) PAGE *h, *np, *tp; BKEYDATA *bk; DBT page_dbt; - db_indx_t indx, nindex, oindex, sum; - db_indx_t halfbytes, i, lastsum; - int did_indx, ret, s; + db_indx_t halfbytes, i, indx, lastsum, nindex, oindex, s, sum; + int did_indx, ret; h = *hp; indx = *indxp; @@ -219,7 +210,7 @@ __db_dsplit(dbp, hp, indxp, size, newfunc) /* Create a temporary page to do compaction onto. */ if ((tp = (PAGE *)__db_malloc(dbp->pgsize)) == NULL) return (ENOMEM); -#ifdef DEBUG +#ifdef DIAGNOSTIC memset(tp, 0xff, dbp->pgsize); #endif /* Create new page for the split. */ @@ -239,6 +230,7 @@ __db_dsplit(dbp, hp, indxp, size, newfunc) for (sum = 0, lastsum = 0, i = 0; i < NUM_ENT(h); i++) { if (i == indx) { sum += size; + did_indx = 1; if (lastsum < halfbytes && sum >= halfbytes) { /* We've crossed the halfway point. */ if ((db_indx_t)(halfbytes - lastsum) < @@ -252,7 +244,6 @@ __db_dsplit(dbp, hp, indxp, size, newfunc) } *indxp = i; lastsum = sum; - did_indx = 1; } if (B_TYPE(GET_BKEYDATA(h, i)->type) == B_KEYDATA) sum += BKEYDATA_SIZE(GET_BKEYDATA(h, i)->len); diff --git a/db2/db/db_overflow.c b/db2/db/db_overflow.c index 8c6619f228..d28740dcbe 100644 --- a/db2/db/db_overflow.c +++ b/db2/db/db_overflow.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -47,22 +47,19 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_overflow.c 10.7 (Sleepycat) 11/2/97"; +static const char sccsid[] = "@(#)db_overflow.c 10.11 (Sleepycat) 5/7/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> #endif #include "db_int.h" #include "db_page.h" #include "db_am.h" -#include "common_ext.h" /* * Big key/data code. @@ -91,9 +88,9 @@ __db_goff(dbp, dbt, tlen, pgno, bpp, bpsz) { PAGE *h; db_indx_t bytes; - int ret; u_int32_t curoff, needed, start; u_int8_t *p, *src; + int ret; /* * Check if the buffer is big enough; if it is not and we are @@ -259,13 +256,13 @@ __db_poff(dbp, dbt, pgnop, newfunc) * __db_ovref -- * Increment/decrement the reference count on an overflow page. * - * PUBLIC: int __db_ovref __P((DB *, db_pgno_t, int)); + * PUBLIC: int __db_ovref __P((DB *, db_pgno_t, int32_t)); */ int __db_ovref(dbp, pgno, adjust) DB *dbp; db_pgno_t pgno; - int adjust; + int32_t adjust; { PAGE *h; int ret; @@ -277,7 +274,7 @@ __db_ovref(dbp, pgno, adjust) if (DB_LOGGING(dbp)) if ((ret = __db_ovref_log(dbp->dbenv->lg_info, dbp->txn, - &LSN(h), 0, dbp->log_fileid, h->pgno, (int32_t)adjust, + &LSN(h), 0, dbp->log_fileid, h->pgno, adjust, &LSN(h))) != 0) return (ret); OV_REF(h) += adjust; @@ -353,8 +350,8 @@ __db_moff(dbp, dbt, pgno) { PAGE *pagep; u_int32_t cmp_bytes, key_left; - int ret; u_int8_t *p1, *p2; + int ret; /* While there are both keys to compare. */ for (ret = 0, p1 = dbt->data, diff --git a/db2/db/db_pr.c b/db2/db/db_pr.c index 1135a9e738..a294cdd135 100644 --- a/db2/db/db_pr.c +++ b/db2/db/db_pr.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_pr.c 10.20 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)db_pr.c 10.29 (Sleepycat) 5/23/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -16,7 +16,6 @@ static const char sccsid[] = "@(#)db_pr.c 10.20 (Sleepycat) 1/8/98"; #include <ctype.h> #include <errno.h> -#include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -160,7 +159,7 @@ __db_prdb(dbp) } fprintf(fp, "%s ", t); - __db_prflags(dbp->flags, fn); + __db_prflags(dbp->flags, fn, fp); fprintf(fp, "\n"); return (0); @@ -179,12 +178,16 @@ __db_prbtree(dbp) static const FN mfn[] = { { BTM_DUP, "duplicates" }, { BTM_RECNO, "recno" }, + { BTM_RECNUM, "btree:records" }, + { BTM_FIXEDLEN, "recno:fixed-length" }, + { BTM_RENUMBER, "recno:renumber" }, { 0 }, }; BTMETA *mp; BTREE *t; EPG *epg; FILE *fp; + PAGE *h; RECNO *rp; db_pgno_t i; int ret; @@ -193,19 +196,29 @@ __db_prbtree(dbp) fp = __db_prinit(NULL); (void)fprintf(fp, "%s\nOn-page metadata:\n", DB_LINE); - i = PGNO_METADATA; + i = PGNO_METADATA; if ((ret = __bam_pget(dbp, (PAGE **)&mp, &i, 0)) != 0) return (ret); (void)fprintf(fp, "magic %#lx\n", (u_long)mp->magic); - (void)fprintf(fp, "version %lu\n", (u_long)mp->version); + (void)fprintf(fp, "version %#lx\n", (u_long)mp->version); (void)fprintf(fp, "pagesize %lu\n", (u_long)mp->pagesize); (void)fprintf(fp, "maxkey: %lu minkey: %lu\n", (u_long)mp->maxkey, (u_long)mp->minkey); - (void)fprintf(fp, "free %lu\n", (u_long)mp->free); - (void)fprintf(fp, "flags %lu", (u_long)mp->flags); - __db_prflags(mp->flags, mfn); + + (void)fprintf(fp, "free %lu", (u_long)mp->free); + for (i = mp->free; i != PGNO_INVALID;) { + if ((ret = __bam_pget(dbp, &h, &i, 0)) != 0) + return (ret); + i = h->next_pgno; + (void)memp_fput(dbp->mpf, h, 0); + (void)fprintf(fp, ", %lu", (u_long)i); + } + (void)fprintf(fp, "\n"); + + (void)fprintf(fp, "flags %#lx", (u_long)mp->flags); + __db_prflags(mp->flags, mfn, fp); (void)fprintf(fp, "\n"); (void)memp_fput(dbp->mpf, mp, 0); @@ -576,7 +589,7 @@ __db_isbad(h, die) BKEYDATA *bk; FILE *fp; db_indx_t i; - int type; + u_int type; fp = __db_prinit(NULL); @@ -668,7 +681,8 @@ __db_pr(p, len) u_int32_t len; { FILE *fp; - int i, lastch; + u_int lastch; + int i; fp = __db_prinit(NULL); @@ -681,7 +695,7 @@ __db_pr(p, len) if (isprint(*p) || *p == '\n') fprintf(fp, "%c", *p); else - fprintf(fp, "%#x", (u_int)*p); + fprintf(fp, "0x%.2x", (u_int)*p); } if (len > 20) { fprintf(fp, "..."); @@ -693,6 +707,50 @@ __db_pr(p, len) } /* + * __db_prdbt -- + * Print out a DBT data element. + * + * PUBLIC: int __db_prdbt __P((DBT *, int, FILE *)); + */ +int +__db_prdbt(dbtp, checkprint, fp) + DBT *dbtp; + int checkprint; + FILE *fp; +{ + static const char hex[] = "0123456789abcdef"; + u_int8_t *p; + u_int32_t len; + + /* + * !!! + * This routine is the routine that dumps out items in the format + * used by db_dump(1) and db_load(1). This means that the format + * cannot change. + */ + if (checkprint) { + for (len = dbtp->size, p = dbtp->data; len--; ++p) + if (isprint(*p)) { + if (*p == '\\' && fprintf(fp, "\\") != 1) + return (EIO); + if (fprintf(fp, "%c", *p) != 1) + return (EIO); + } else + if (fprintf(fp, "\\%c%c", + hex[(u_int8_t)(*p & 0xf0) >> 4], + hex[*p & 0x0f]) != 3) + return (EIO); + } else + for (len = dbtp->size, p = dbtp->data; len--; ++p) + if (fprintf(fp, "%c%c", + hex[(u_int8_t)(*p & 0xf0) >> 4], + hex[*p & 0x0f]) != 2) + return (EIO); + + return (fprintf(fp, "\n") == 1 ? 0 : EIO); +} + +/* * __db_proff -- * Print out an off-page element. */ @@ -721,23 +779,21 @@ __db_proff(vp) * __db_prflags -- * Print out flags values. * - * PUBLIC: void __db_prflags __P((u_int32_t, const FN *)); + * PUBLIC: void __db_prflags __P((u_int32_t, const FN *, FILE *)); */ void -__db_prflags(flags, fn) +__db_prflags(flags, fn, fp) u_int32_t flags; FN const *fn; -{ FILE *fp; +{ const FN *fnp; int found; const char *sep; - fp = __db_prinit(NULL); - sep = " ("; for (found = 0, fnp = fn; fnp->mask != 0; ++fnp) - if (fnp->mask & flags) { + if (LF_ISSET(fnp->mask)) { fprintf(fp, "%s%s", sep, fnp->name); sep = ", "; found = 1; diff --git a/db2/db/db_rec.c b/db2/db/db_rec.c index 48e09e6f23..fe7c807384 100644 --- a/db2/db/db_rec.c +++ b/db2/db/db_rec.c @@ -1,30 +1,25 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_rec.c 10.12 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)db_rec.c 10.16 (Sleepycat) 4/28/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#endif -#include <ctype.h> -#include <errno.h> -#include <stddef.h> -#include <stdlib.h> #include <string.h> +#endif #include "db_int.h" #include "shqueue.h" #include "db_page.h" -#include "db_dispatch.h" #include "log.h" #include "hash.h" #include "btree.h" @@ -48,7 +43,8 @@ __db_addrem_recover(logp, dbtp, lsnp, redo, info) DB *file_dbp, *mdbp; DB_MPOOLFILE *mpf; PAGE *pagep; - int change, cmp_n, cmp_p, ret; + u_int32_t change; + int cmp_n, cmp_p, ret; REC_PRINT(__db_addrem_print); REC_INTRO(__db_addrem_read); @@ -193,7 +189,8 @@ __db_big_recover(logp, dbtp, lsnp, redo, info) DB *file_dbp, *mdbp; DB_MPOOLFILE *mpf; PAGE *pagep; - int change, cmp_n, cmp_p, ret; + u_int32_t change; + int cmp_n, cmp_p, ret; REC_PRINT(__db_big_print); REC_INTRO(__db_big_read); @@ -503,7 +500,8 @@ __db_addpage_recover(logp, dbtp, lsnp, redo, info) DB *file_dbp, *mdbp; DB_MPOOLFILE *mpf; PAGE *pagep; - int change, cmp_n, cmp_p, ret; + u_int32_t change; + int cmp_n, cmp_p, ret; REC_PRINT(__db_addpage_print); REC_INTRO(__db_addpage_read); @@ -601,8 +599,7 @@ __db_debug_recover(logp, dbtp, lsnp, redo, info) * __db_noop_recover -- * Recovery function for noop. * - * PUBLIC: int __db_noop_recover - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + * PUBLIC: int __db_noop_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int __db_noop_recover(logp, dbtp, lsnp, redo, info) @@ -613,16 +610,30 @@ __db_noop_recover(logp, dbtp, lsnp, redo, info) void *info; { __db_noop_args *argp; - int ret; - - COMPQUIET(redo, 0); - COMPQUIET(logp, NULL); + DB *file_dbp, *mdbp; + DB_MPOOLFILE *mpf; + PAGE *pagep; + u_int32_t change; + int cmp_n, cmp_p, ret; REC_PRINT(__db_noop_print); - REC_NOOP_INTRO(__db_noop_read); + REC_INTRO(__db_noop_read); + + if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) + goto out; + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->prevlsn); + change = 0; + if (cmp_p == 0 && redo) { + LSN(pagep) = *lsnp; + change = DB_MPOOL_DIRTY; + } else if (cmp_n == 0 && !redo) { + LSN(pagep) = argp->prevlsn; + change = DB_MPOOL_DIRTY; + } *lsnp = argp->prev_lsn; - ret = 0; + ret = memp_fput(mpf, pagep, change); - REC_NOOP_CLOSE; +out: REC_CLOSE; } diff --git a/db2/db/db_ret.c b/db2/db/db_ret.c index 65441aa45a..9d9b599ad6 100644 --- a/db2/db/db_ret.c +++ b/db2/db/db_ret.c @@ -1,29 +1,26 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_ret.c 10.10 (Sleepycat) 11/28/97"; +static const char sccsid[] = "@(#)db_ret.c 10.13 (Sleepycat) 5/7/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> #endif #include "db_int.h" #include "db_page.h" #include "btree.h" -#include "hash.h" #include "db_am.h" /* diff --git a/db2/db/db_thread.c b/db2/db/db_thread.c index d9086918dd..73e2a51286 100644 --- a/db2/db/db_thread.c +++ b/db2/db/db_thread.c @@ -1,27 +1,25 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_thread.c 8.13 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)db_thread.c 8.15 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <stdlib.h> #include <string.h> #endif #include "db_int.h" #include "db_page.h" -#include "shqueue.h" #include "db_am.h" static int __db_getlockid __P((DB *, DB *)); diff --git a/db2/db185/db185.c b/db2/db185/db185.c index 7f6a16de49..893dfa3c7f 100644 --- a/db2/db185/db185.c +++ b/db2/db185/db185.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ @@ -9,9 +9,9 @@ #ifndef lint static const char copyright[] = -"@(#) Copyright (c) 1997\n\ +"@(#) Copyright (c) 1996, 1997, 1998\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db185.c 8.14 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)db185.c 8.17 (Sleepycat) 5/7/98"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -20,7 +20,6 @@ static const char sccsid[] = "@(#)db185.c 8.14 (Sleepycat) 10/25/97"; #include <errno.h> #include <fcntl.h> #include <stdlib.h> -#include <stdio.h> #include <string.h> #include <unistd.h> #endif @@ -114,6 +113,16 @@ __dbopen(file, oflags, mode, type, openinfo) * and DB 2.0 doesn't. * * !!! + * Setting the file name to NULL specifies that we're creating + * a temporary backing file, in DB 2.X. If we're opening the + * DB file read-only, change the flags to read-write, because + * temporary backing files cannot be opened read-only, and DB + * 2.X will return an error. We are cheating here -- if the + * application does a put on the database, it will succeed -- + * although that would be a stupid thing for the application + * to do. + * + * !!! * Note, the file name in DB 1.85 was a const -- we don't do * that in DB 2.0, so do that cast. */ @@ -122,6 +131,10 @@ __dbopen(file, oflags, mode, type, openinfo) (void)__os_close(__os_open(file, oflags, mode)); dbinfop->re_source = (char *)file; file = NULL; + + if (O_RDONLY) + oflags &= ~O_RDONLY; + oflags |= O_RDWR; } if ((ri = openinfo) != NULL) { @@ -181,15 +194,14 @@ __dbopen(file, oflags, mode, type, openinfo) * Store the returned pointer to the real DB 2.0 structure in the * internal pointer. Ugly, but we're not going for pretty, here. */ - if ((__set_errno(db_open(file, - type, __db_oflags(oflags), mode, NULL, dbinfop, &dbp))) != 0) { + if ((errno = db_open(file, + type, __db_oflags(oflags), mode, NULL, dbinfop, &dbp)) != 0) { __db_free(db185p); return (NULL); } /* Create the cursor used for sequential ops. */ - if ((__set_errno(dbp->cursor(dbp, NULL, &((DB185 *)db185p)->dbc))) - != 0) { + if ((errno = dbp->cursor(dbp, NULL, &((DB185 *)db185p)->dbc)) != 0) { s_errno = errno; (void)dbp->close(dbp, 0); __db_free(db185p); diff --git a/db2/db185/db185_int.h b/db2/db185/db185_int.h index f3e24b9026..f7d7af5347 100644 --- a/db2/db185/db185_int.h +++ b/db2/db185/db185_int.h @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -40,7 +40,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)db185_int.h 8.4 (Sleepycat) 7/27/97 + * @(#)db185_int.h 8.7 (Sleepycat) 4/10/98 */ #ifndef _DB185_H_ @@ -90,11 +90,11 @@ typedef struct __db185 { /* Structure used to pass parameters to the btree routines. */ typedef struct { #define R_DUP 0x01 /* duplicate keys */ - u_long flags; - u_int cachesize; /* bytes to cache */ - int maxkeypage; /* maximum keys per page */ - int minkeypage; /* minimum keys per page */ - u_int psize; /* page size */ + u_int32_t flags; + u_int32_t cachesize; /* bytes to cache */ + u_int32_t maxkeypage; /* maximum keys per page */ + u_int32_t minkeypage; /* minimum keys per page */ + u_int32_t psize; /* page size */ int (*compare) /* comparison function */ __P((const DBT *, const DBT *)); size_t (*prefix) /* prefix function */ @@ -104,10 +104,10 @@ typedef struct { /* Structure used to pass parameters to the hashing routines. */ typedef struct { - u_int bsize; /* bucket size */ - u_int ffactor; /* fill factor */ - u_int nelem; /* number of elements */ - u_int cachesize; /* bytes to cache */ + u_int32_t bsize; /* bucket size */ + u_int32_t ffactor; /* fill factor */ + u_int32_t nelem; /* number of elements */ + u_int32_t cachesize; /* bytes to cache */ u_int32_t /* hash function */ (*hash) __P((const void *, size_t)); int lorder; /* byte order */ @@ -118,9 +118,9 @@ typedef struct { #define R_FIXEDLEN 0x01 /* fixed-length records */ #define R_NOKEY 0x02 /* key not required */ #define R_SNAPSHOT 0x04 /* snapshot the input */ - u_long flags; - u_int cachesize; /* bytes to cache */ - u_int psize; /* page size */ + u_int32_t flags; + u_int32_t cachesize; /* bytes to cache */ + u_int32_t psize; /* page size */ int lorder; /* byte order */ size_t reclen; /* record length (fixed-length records) */ u_char bval; /* delimiting byte (variable-length records */ diff --git a/db2/db_185.h b/db2/db_185.h index f3b02c71e9..0e1b87879b 100644 --- a/db2/db_185.h +++ b/db2/db_185.h @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -36,7 +36,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)db_185.h.src 8.5 (Sleepycat) 1/15/98 + * @(#)db_185.h.src 8.7 (Sleepycat) 4/10/98 */ #ifndef _DB_185_H_ @@ -127,11 +127,11 @@ typedef struct __db { /* Structure used to pass parameters to the btree routines. */ typedef struct { #define R_DUP 0x01 /* duplicate keys */ - u_long flags; - u_int cachesize; /* bytes to cache */ - int maxkeypage; /* maximum keys per page */ - int minkeypage; /* minimum keys per page */ - u_int psize; /* page size */ + u_int32_t flags; + u_int32_t cachesize; /* bytes to cache */ + u_int32_t maxkeypage; /* maximum keys per page */ + u_int32_t minkeypage; /* minimum keys per page */ + u_int32_t psize; /* page size */ int (*compare) /* comparison function */ __P((const DBT *, const DBT *)); size_t (*prefix) /* prefix function */ @@ -144,10 +144,10 @@ typedef struct { /* Structure used to pass parameters to the hashing routines. */ typedef struct { - u_int bsize; /* bucket size */ - u_int ffactor; /* fill factor */ - u_int nelem; /* number of elements */ - u_int cachesize; /* bytes to cache */ + u_int32_t bsize; /* bucket size */ + u_int32_t ffactor; /* fill factor */ + u_int32_t nelem; /* number of elements */ + u_int32_t cachesize; /* bytes to cache */ u_int32_t /* hash function */ (*hash) __P((const void *, size_t)); int lorder; /* byte order */ @@ -158,9 +158,9 @@ typedef struct { #define R_FIXEDLEN 0x01 /* fixed-length records */ #define R_NOKEY 0x02 /* key not required */ #define R_SNAPSHOT 0x04 /* snapshot the input */ - u_long flags; - u_int cachesize; /* bytes to cache */ - u_int psize; /* page size */ + u_int32_t flags; + u_int32_t cachesize; /* bytes to cache */ + u_int32_t psize; /* page size */ int lorder; /* byte order */ size_t reclen; /* record length (fixed-length records) */ u_char bval; /* delimiting byte (variable-length records */ diff --git a/db2/db_int.h b/db2/db_int.h index eacd3f9806..b48b104a91 100644 --- a/db2/db_int.h +++ b/db2/db_int.h @@ -1,10 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)db_int.h.src 10.41 (Sleepycat) 1/8/98 + * @(#)db_int.h.src 10.62 (Sleepycat) 5/23/98 */ #ifndef _DB_INTERNAL_H_ @@ -12,8 +12,6 @@ #include "db.h" /* Standard DB include file. */ #include "queue.h" -#include "os_func.h" -#include "os_ext.h" /******************************************************* * General purpose constants and macros. @@ -77,8 +75,8 @@ #define R_ADDR(base, offset) ((void *)((u_int8_t *)((base)->addr) + offset)) #define R_OFFSET(base, p) ((u_int8_t *)(p) - (u_int8_t *)(base)->addr) -/* Free and free-string macros that overwrite memory during debugging. */ -#ifdef DEBUG +/* Free and free-string macros that overwrite memory. */ +#ifdef DIAGNOSTIC #undef FREE #define FREE(p, len) { \ memset(p, 0xff, len); \ @@ -117,36 +115,41 @@ typedef struct __fn { #undef DB_LINE #define DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" +/* Global variables. */ +typedef struct __db_globals { + int db_mutexlocks; /* DB_MUTEXLOCKS */ + int db_region_anon; /* DB_REGION_ANON, DB_REGION_NAME */ + int db_region_init; /* DB_REGION_INIT */ + int db_tsl_spins; /* DB_TSL_SPINS */ + int db_pageyield; /* DB_PAGEYIELD */ +} DB_GLOBALS; +extern DB_GLOBALS __db_global_values; +#define DB_GLOBAL(v) __db_global_values.v + /* Unused, or not-used-yet variable. "Shut that bloody compiler up!" */ #define COMPQUIET(n, v) (n) = (v) +/* + * Win16 needs specific syntax on callback functions. Nobody else cares. + */ +#ifndef DB_CALLBACK +#define DB_CALLBACK /* Nothing. */ +#endif + /******************************************************* * Files. *******************************************************/ -#ifndef MAXPATHLEN /* Maximum path length. */ -#ifdef PATH_MAX -#define MAXPATHLEN PATH_MAX -#else + /* + * We use 1024 as the maximum path length. It's too hard to figure out what + * the real path length is, as it was traditionally stored in <sys/param.h>, + * and that file isn't always available. + */ +#undef MAXPATHLEN #define MAXPATHLEN 1024 -#endif -#endif #define PATH_DOT "." /* Current working directory. */ #define PATH_SEPARATOR "/" /* Path separator character. */ -#ifndef S_IRUSR /* UNIX specific file permissions. */ -#define S_IRUSR 0000400 /* R for owner */ -#define S_IWUSR 0000200 /* W for owner */ -#define S_IRGRP 0000040 /* R for group */ -#define S_IWGRP 0000020 /* W for group */ -#define S_IROTH 0000004 /* R for other */ -#define S_IWOTH 0000002 /* W for other */ -#endif - -#ifndef S_ISDIR /* UNIX specific: directory test. */ -#define S_ISDIR(m) ((m & 0170000) == 0040000) -#endif - /******************************************************* * Mutex support. *******************************************************/ @@ -176,12 +179,12 @@ typedef unsigned char tsl_t; typedef struct _db_mutex_t { #ifdef HAVE_SPINLOCKS tsl_t tsl_resource; /* Resource test and set. */ -#ifdef DEBUG - u_long pid; /* Lock holder: 0 or process pid. */ +#ifdef DIAGNOSTIC + u_int32_t pid; /* Lock holder: 0 or process pid. */ #endif #else u_int32_t off; /* Backing file offset. */ - u_long pid; /* Lock holder: 0 or process pid. */ + u_int32_t pid; /* Lock holder: 0 or process pid. */ #endif u_int32_t spins; /* Spins before block. */ u_int32_t mutex_set_wait; /* Granted after wait. */ @@ -195,11 +198,11 @@ typedef struct _db_mutex_t { *******************************************************/ /* Lock/unlock a DB thread. */ #define DB_THREAD_LOCK(dbp) \ - (F_ISSET(dbp, DB_AM_THREAD) ? \ - __db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1) : 0) + if (F_ISSET(dbp, DB_AM_THREAD)) \ + (void)__db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1); #define DB_THREAD_UNLOCK(dbp) \ - (F_ISSET(dbp, DB_AM_THREAD) ? \ - __db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1) : 0) + if (F_ISSET(dbp, DB_AM_THREAD)) \ + (void)__db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1); /* Btree/recno local statistics structure. */ struct __db_bt_lstat; typedef struct __db_bt_lstat DB_BTREE_LSTAT; @@ -228,7 +231,7 @@ typedef enum { } APPNAME; /******************************************************* - * Regions. + * Shared memory regions. *******************************************************/ /* * The shared memory regions share an initial structure so that the general @@ -240,16 +243,69 @@ typedef enum { */ typedef struct _rlayout { db_mutex_t lock; /* Region mutex. */ +#define DB_REGIONMAGIC 0x120897 + u_int32_t valid; /* Valid magic number. */ u_int32_t refcnt; /* Region reference count. */ size_t size; /* Region length. */ int majver; /* Major version number. */ int minver; /* Minor version number. */ int patch; /* Patch version number. */ +#define INVALID_SEGID -1 + int segid; /* shmget(2) ID, or Win16 segment ID. */ -#define DB_R_DELETED 0x01 /* Region was deleted. */ +#define REGION_ANONYMOUS 0x01 /* Region is/should be in anon mem. */ u_int32_t flags; } RLAYOUT; +/* + * DB creates all regions on 4K boundaries out of sheer paranoia, so that + * we don't make the underlying VM unhappy. + */ +#define DB_VMPAGESIZE (4 * 1024) +#define DB_ROUNDOFF(i) { \ + (i) += DB_VMPAGESIZE - 1; \ + (i) -= (i) % DB_VMPAGESIZE; \ +} + +/* + * The interface to region attach is nasty, there is a lot of complex stuff + * going on, which has to be retained between create/attach and detach. The + * REGINFO structure keeps track of it. + */ +struct __db_reginfo; typedef struct __db_reginfo REGINFO; +struct __db_reginfo { + /* Arguments. */ + DB_ENV *dbenv; /* Region naming info. */ + APPNAME appname; /* Region naming info. */ + char *path; /* Region naming info. */ + const char *file; /* Region naming info. */ + int mode; /* Region mode, if a file. */ + size_t size; /* Region size. */ + u_int32_t dbflags; /* Region file open flags, if a file. */ + + /* Results. */ + char *name; /* Region name. */ + void *addr; /* Region address. */ + int fd; /* Fcntl(2) locking file descriptor. + NB: this is only valid if a regular + file is backing the shared region, + and mmap(2) is being used to map it + into our address space. */ + int segid; /* shmget(2) ID, or Win16 segment ID. */ + + /* Shared flags. */ +/* 0x0001 COMMON MASK with RLAYOUT structure. */ +#define REGION_CANGROW 0x0002 /* Can grow. */ +#define REGION_CREATED 0x0004 /* Created. */ +#define REGION_HOLDINGSYS 0x0008 /* Holding system resources. */ +#define REGION_LASTDETACH 0x0010 /* Delete on last detach. */ +#define REGION_MALLOC 0x0020 /* Created in malloc'd memory. */ +#define REGION_PRIVATE 0x0040 /* Private to thread/process. */ +#define REGION_REMOVED 0x0080 /* Already deleted. */ +#define REGION_SIZEDEF 0x0100 /* Use default region size if exists. */ + u_int32_t flags; +}; + /******************************************************* * Mpool. *******************************************************/ @@ -281,7 +337,7 @@ typedef struct __dbpginfo { #define DB_LOGGING(dbp) \ (F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER)) -#ifdef DEBUG +#ifdef DIAGNOSTIC /* * Debugging macro to log operations. * If DEBUG_WOP is defined, log operations that modify the database. @@ -318,7 +374,7 @@ typedef struct __dbpginfo { #else #define DEBUG_LREAD(D, T, O, K, A, F) #define DEBUG_LWRITE(D, T, O, K, A, F) -#endif /* DEBUG */ +#endif /* DIAGNOSTIC */ /******************************************************* * Transactions and recovery. @@ -339,4 +395,8 @@ struct __db_txn { size_t off; /* Detail structure within region. */ TAILQ_ENTRY(__db_txn) links; }; + +#include "os_func.h" +#include "os_ext.h" + #endif /* !_DB_INTERNAL_H_ */ diff --git a/db2/dbm/dbm.c b/db2/dbm/dbm.c index 81f4bba69c..261fe81ff2 100644 --- a/db2/dbm/dbm.c +++ b/db2/dbm/dbm.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -47,15 +47,14 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)dbm.c 10.10 (Sleepycat) 1/16/98"; +static const char sccsid[] = "@(#)dbm.c 10.16 (Sleepycat) 5/7/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES -#include <sys/param.h> +#include <sys/types.h> #include <errno.h> #include <fcntl.h> -#include <stdio.h> #include <string.h> #endif @@ -82,7 +81,7 @@ __db_dbm_init(file) if (__cur_db != NULL) (void)dbm_close(__cur_db); if ((__cur_db = - dbm_open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR)) != NULL) + dbm_open(file, O_CREAT | O_RDWR, __db_omode("rw----"))) != NULL) return (0); if ((__cur_db = dbm_open(file, O_RDONLY, 0)) != NULL) return (0); @@ -244,19 +243,19 @@ __db_ndbm_fetch(db, key) { DBT _key, _data; datum data; - int status; + int ret; memset(&_key, 0, sizeof(DBT)); memset(&_data, 0, sizeof(DBT)); _key.size = key.dsize; _key.data = key.dptr; - status = db->get((DB *)db, NULL, &_key, &_data, 0); - if (status) { - data.dptr = NULL; - data.dsize = 0; - } else { + if ((ret = db->get((DB *)db, NULL, &_key, &_data, 0)) == 0) { data.dptr = _data.data; data.dsize = _data.size; + } else { + data.dptr = NULL; + data.dsize = 0; + __set_errno (ret == DB_NOTFOUND ? ENOENT : ret); } return (data); } @@ -273,7 +272,7 @@ __db_ndbm_firstkey(db) { DBT _key, _data; datum key; - int status; + int ret; DBC *cp; @@ -285,13 +284,13 @@ __db_ndbm_firstkey(db) memset(&_key, 0, sizeof(DBT)); memset(&_data, 0, sizeof(DBT)); - status = (cp->c_get)(cp, &_key, &_data, DB_FIRST); - if (status) { - key.dptr = NULL; - key.dsize = 0; - } else { + if ((ret = (cp->c_get)(cp, &_key, &_data, DB_FIRST)) == 0) { key.dptr = _key.data; key.dsize = _key.size; + } else { + key.dptr = NULL; + key.dsize = 0; + __set_errno (ret == DB_NOTFOUND ? ENOENT : ret); } return (key); } @@ -309,7 +308,7 @@ __db_ndbm_nextkey(db) DBC *cp; DBT _key, _data; datum key; - int status; + int ret; if ((cp = TAILQ_FIRST(&db->curs_queue)) == NULL) if ((errno = db->cursor(db, NULL, &cp)) != 0) { @@ -319,13 +318,13 @@ __db_ndbm_nextkey(db) memset(&_key, 0, sizeof(DBT)); memset(&_data, 0, sizeof(DBT)); - status = (cp->c_get)(cp, &_key, &_data, DB_NEXT); - if (status) { - key.dptr = NULL; - key.dsize = 0; - } else { + if ((ret = (cp->c_get)(cp, &_key, &_data, DB_NEXT)) == 0) { key.dptr = _key.data; key.dsize = _key.size; + } else { + key.dptr = NULL; + key.dsize = 0; + __set_errno (ret == DB_NOTFOUND ? ENOENT : ret); } return (key); } @@ -347,14 +346,10 @@ __db_ndbm_delete(db, key) memset(&_key, 0, sizeof(DBT)); _key.data = key.dptr; _key.size = key.dsize; - ret = (((DB *)db)->del)((DB *)db, NULL, &_key, 0); - if (ret < 0) - errno = ENOENT; - else if (ret > 0) { - errno = ret; - ret = -1; - } - return (ret); + if ((ret = (((DB *)db)->del)((DB *)db, NULL, &_key, 0)) == 0) + return (0); + errno = ret == DB_NOTFOUND ? ENOENT : ret; + return (-1); } weak_alias (__db_ndbm_delete, dbm_delete) @@ -371,6 +366,7 @@ __db_ndbm_store(db, key, data, flags) int flags; { DBT _key, _data; + int ret; memset(&_key, 0, sizeof(DBT)); memset(&_data, 0, sizeof(DBT)); @@ -378,8 +374,13 @@ __db_ndbm_store(db, key, data, flags) _key.size = key.dsize; _data.data = data.dptr; _data.size = data.dsize; - return (db->put((DB *)db, - NULL, &_key, &_data, (flags == DBM_INSERT) ? DB_NOOVERWRITE : 0)); + if ((ret = db->put((DB *)db, NULL, + &_key, &_data, flags == DBM_INSERT ? DB_NOOVERWRITE : 0)) == 0) + return (0); + if (ret == DB_KEYEXIST) + return (1); + errno = ret; + return (-1); } weak_alias (__db_ndbm_store, dbm_store) diff --git a/db2/hash/hash.c b/db2/hash/hash.c index 5193ece561..5e0660b727 100644 --- a/db2/hash/hash.c +++ b/db2/hash/hash.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -47,23 +47,19 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)hash.c 10.36 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)hash.c 10.45 (Sleepycat) 5/11/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <sys/stat.h> #include <errno.h> -#include <fcntl.h> -#include <stdio.h> #include <stdlib.h> #include <string.h> -#include <unistd.h> #endif -#include "shqueue.h" #include "db_int.h" +#include "shqueue.h" #include "db_page.h" #include "db_am.h" #include "db_ext.h" @@ -71,20 +67,20 @@ static const char sccsid[] = "@(#)hash.c 10.36 (Sleepycat) 1/8/98"; #include "log.h" static int __ham_c_close __P((DBC *)); -static int __ham_c_del __P((DBC *, int)); -static int __ham_c_get __P((DBC *, DBT *, DBT *, int)); -static int __ham_c_put __P((DBC *, DBT *, DBT *, int)); +static int __ham_c_del __P((DBC *, u_int32_t)); +static int __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t)); +static int __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t)); static int __ham_c_init __P((DB *, DB_TXN *, DBC **)); static int __ham_cursor __P((DB *, DB_TXN *, DBC **)); -static int __ham_delete __P((DB *, DB_TXN *, DBT *, int)); -static int __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, int)); -static int __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, int)); -static void __ham_init_htab __P((HTAB *, u_int)); +static int __ham_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); +static int __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t)); +static int __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); +static void __ham_init_htab __P((HTAB *, u_int32_t, u_int32_t)); static int __ham_lookup __P((HTAB *, HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t)); static int __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *)); -static int __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, int)); -static int __ham_sync __P((DB *, int)); +static int __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); +static int __ham_sync __P((DB *, u_int32_t)); /************************** INTERFACE ROUTINES ***************************/ /* OPEN/CLOSE */ @@ -175,9 +171,9 @@ __ham_open(dbp, dbinfo) goto out; } - hashp->hdr->ffactor = - dbinfo != NULL && dbinfo->h_ffactor ? dbinfo->h_ffactor : 0; - __ham_init_htab(hashp, dbinfo != NULL ? dbinfo->h_nelem : 0); + __ham_init_htab(hashp, + dbinfo != NULL ? dbinfo->h_nelem : 0, + dbinfo != NULL ? dbinfo->h_ffactor : 0); if (F_ISSET(dbp, DB_AM_DUP)) F_SET(hashp->hdr, DB_HASH_DUP); if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0) @@ -230,7 +226,7 @@ out: (void)__ham_close(dbp); } /* - * PUBLIC: int __ham_close __P((DB *)); + * PUBLIC: int __ham_close __P((DB *)); */ int __ham_close(dbp) @@ -264,13 +260,14 @@ __ham_close(dbp) * Returns 0 on No Error */ static void -__ham_init_htab(hashp, nelem) +__ham_init_htab(hashp, nelem, ffactor) HTAB *hashp; - u_int nelem; + u_int32_t nelem, ffactor; { int32_t l2, nbuckets; - hashp->hdr->nelem = 0; + memset(hashp->hdr, 0, sizeof(HASHHDR)); + hashp->hdr->ffactor = ffactor; hashp->hdr->pagesize = hashp->dbp->pgsize; ZERO_LSN(hashp->hdr->lsn); hashp->hdr->magic = DB_HASHMAGIC; @@ -287,8 +284,6 @@ __ham_init_htab(hashp, nelem) nbuckets = 1 << l2; - hashp->hdr->spares[l2] = 0; - hashp->hdr->spares[l2 + 1] = 0; hashp->hdr->ovfl_point = l2; hashp->hdr->last_freed = PGNO_INVALID; @@ -310,7 +305,7 @@ __ham_init_htab(hashp, nelem) static int __ham_sync(dbp, flags) DB *dbp; - int flags; + u_int32_t flags; { int ret; @@ -342,10 +337,9 @@ __ham_get(dbp, txn, key, data, flags) DB_TXN *txn; DBT *key; DBT *data; - int flags; + u_int32_t flags; { DB *ldbp; - DBC *cp; HTAB *hashp; HASH_CURSOR *hcp; int ret, t_ret; @@ -362,7 +356,6 @@ __ham_get(dbp, txn, key, data, flags) hashp = (HTAB *)ldbp->internal; SET_LOCKER(ldbp, txn); GET_META(ldbp, hashp); - cp = TAILQ_FIRST(&ldbp->curs_queue); hashp->hash_accesses++; hcp = (HASH_CURSOR *)TAILQ_FIRST(&ldbp->curs_queue)->internal; @@ -386,14 +379,14 @@ __ham_put(dbp, txn, key, data, flags) DB_TXN *txn; DBT *key; DBT *data; - int flags; + u_int32_t flags; { DB *ldbp; - HTAB *hashp; - HASH_CURSOR *hcp; DBT tmp_val, *myval; - int ret, t_ret; + HASH_CURSOR *hcp; + HTAB *hashp; u_int32_t nbytes; + int ret, t_ret; DEBUG_LWRITE(dbp, txn, "ham_put", key, data, flags); if ((ret = __db_putchk(dbp, key, data, @@ -531,7 +524,7 @@ __ham_delete(dbp, txn, key, flags) DB *dbp; DB_TXN *txn; DBT *key; - int flags; + u_int32_t flags; { DB *ldbp; HTAB *hashp; @@ -539,7 +532,8 @@ __ham_delete(dbp, txn, key, flags) int ret, t_ret; DEBUG_LWRITE(dbp, txn, "ham_delete", key, NULL, flags); - if ((ret = __db_delchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0) + if ((ret = + __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0) return (ret); ldbp = dbp; @@ -639,12 +633,12 @@ __ham_c_iclose(dbp, dbc) static int __ham_c_del(cursor, flags) DBC *cursor; - int flags; + u_int32_t flags; { DB *ldbp; - HTAB *hashp; HASH_CURSOR *hcp; HASH_CURSOR save_curs; + HTAB *hashp; db_pgno_t ppgno, chg_pgno; int ret, t_ret; @@ -756,7 +750,7 @@ __ham_c_del(cursor, flags) normal: ret = __ham_del_pair(hashp, hcp, 1); out: if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0) - t_ret = ret; + ret = t_ret; if (ret != 0) *hcp = save_curs; RELEASE_META(hashp->dbp, hashp); @@ -770,7 +764,7 @@ __ham_c_get(cursor, key, data, flags) DBC *cursor; DBT *key; DBT *data; - int flags; + u_int32_t flags; { DB *ldbp; HTAB *hashp; @@ -805,7 +799,7 @@ __ham_c_get(cursor, key, data, flags) ret = __ham_item_prev(hashp, hcp, DB_LOCK_READ); break; } - /* FALL THROUGH */ + /* FALLTHROUGH */ case DB_LAST: ret = __ham_item_last(hashp, hcp, DB_LOCK_READ); break; @@ -893,7 +887,7 @@ __ham_c_get(cursor, key, data, flags) } } out1: if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0) - t_ret = ret; + ret = t_ret; out: if (ret) *hcp = save_curs; RELEASE_META(hashp->dbp, hashp); @@ -907,17 +901,17 @@ __ham_c_put(cursor, key, data, flags) DBC *cursor; DBT *key; DBT *data; - int flags; + u_int32_t flags; { DB *ldbp; - HTAB *hashp; HASH_CURSOR *hcp, save_curs; - int ret, t_ret; + HTAB *hashp; u_int32_t nbytes; + int ret, t_ret; DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_put", flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL, - NULL, flags); + data, flags); ldbp = cursor->dbp; if (F_ISSET(cursor->dbp, DB_AM_THREAD) && (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0) @@ -1087,14 +1081,14 @@ __ham_dup_return(hashp, hcp, val, flags) HTAB *hashp; HASH_CURSOR *hcp; DBT *val; - int flags; + u_int32_t flags; { PAGE *pp; DBT *myval, tmp_val; db_indx_t ndx; db_pgno_t pgno; u_int8_t *hk, type; - int indx, ret; + int ret; db_indx_t len; /* Check for duplicate and return the first one. */ @@ -1145,7 +1139,6 @@ __ham_dup_return(hashp, hcp, val, flags) memcpy(&pgno, HOFFDUP_PGNO(P_ENTRY(hcp->pagep, ndx)), sizeof(db_pgno_t)); if (flags == DB_LAST || flags == DB_PREV) { - indx = (int)hcp->dndx; if ((ret = __db_dend(hashp->dbp, pgno, &hcp->dpagep)) != 0) return (ret); @@ -1451,14 +1444,15 @@ __ham_c_update(hcp, chg_pgno, len, add, is_dup) * __ham_hdup -- * This function gets called when we create a duplicate handle for a * threaded DB. It should create the private part of the DB structure. + * * PUBLIC: int __ham_hdup __P((DB *, DB *)); */ int __ham_hdup(orig, new) DB *orig, *new; { - HTAB *hashp; DBC *curs; + HTAB *hashp; int ret; if ((hashp = (HTAB *)__db_malloc(sizeof(HTAB))) == NULL) diff --git a/db2/hash/hash.src b/db2/hash/hash.src index 8cbcee73f7..8a512830b8 100644 --- a/db2/hash/hash.src +++ b/db2/hash/hash.src @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -43,11 +43,9 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)hash.src 10.2 (Sleepycat) 11/2/97 + * @(#)hash.src 10.3 (Sleepycat) 4/10/98 */ -#include "config.h" - /* * This is the source file used to create the logging functions for the * hash package. Each access method (or set of routines wishing to register diff --git a/db2/hash/hash_auto.c b/db2/hash/hash_auto.c index 830ea46a4e..41b1ebed01 100644 --- a/db2/hash/hash_auto.c +++ b/db2/hash/hash_auto.c @@ -15,8 +15,6 @@ #include "db_dispatch.h" #include "hash.h" #include "db_am.h" -#include "common_ext.h" - /* * PUBLIC: int __ham_insdel_log * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, @@ -104,7 +102,7 @@ int __ham_insdel_log(logp, txnid, ret_lsnp, flags, memcpy(bp, data->data, data->size); bp += data->size; } -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -120,22 +118,23 @@ int __ham_insdel_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__ham_insdel_print(notused1, dbtp, lsnp, notused3, notused4) +__ham_insdel_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __ham_insdel_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __ham_insdel_read(dbtp->data, &argp)) != 0) return (ret); @@ -154,20 +153,20 @@ __ham_insdel_print(notused1, dbtp, lsnp, notused3, notused4) (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); printf("\tkey: "); for (i = 0; i < argp->key.size; i++) { - c = ((char *)argp->key.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->key.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tdata: "); for (i = 0; i < argp->data.size; i++) { - c = ((char *)argp->data.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->data.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\n"); @@ -300,7 +299,7 @@ int __ham_newpage_log(logp, txnid, ret_lsnp, flags, else memset(bp, 0, sizeof(*nextlsn)); bp += sizeof(*nextlsn); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -316,22 +315,23 @@ int __ham_newpage_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__ham_newpage_print(notused1, dbtp, lsnp, notused3, notused4) +__ham_newpage_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __ham_newpage_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __ham_newpage_read(dbtp->data, &argp)) != 0) return (ret); @@ -462,7 +462,7 @@ int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags, else memset(bp, 0, sizeof(*metalsn)); bp += sizeof(*metalsn); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -478,22 +478,23 @@ int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__ham_splitmeta_print(notused1, dbtp, lsnp, notused3, notused4) +__ham_splitmeta_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __ham_splitmeta_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __ham_splitmeta_read(dbtp->data, &argp)) != 0) return (ret); @@ -622,7 +623,7 @@ int __ham_splitdata_log(logp, txnid, ret_lsnp, flags, else memset(bp, 0, sizeof(*pagelsn)); bp += sizeof(*pagelsn); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -638,22 +639,23 @@ int __ham_splitdata_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__ham_splitdata_print(notused1, dbtp, lsnp, notused3, notused4) +__ham_splitdata_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __ham_splitdata_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __ham_splitdata_read(dbtp->data, &argp)) != 0) return (ret); @@ -669,11 +671,11 @@ __ham_splitdata_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tpgno: %lu\n", (u_long)argp->pgno); printf("\tpageimage: "); for (i = 0; i < argp->pageimage.size; i++) { - c = ((char *)argp->pageimage.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->pageimage.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tpagelsn: [%lu][%lu]\n", @@ -813,7 +815,7 @@ int __ham_replace_log(logp, txnid, ret_lsnp, flags, } memcpy(bp, &makedup, sizeof(makedup)); bp += sizeof(makedup); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -829,22 +831,23 @@ int __ham_replace_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__ham_replace_print(notused1, dbtp, lsnp, notused3, notused4) +__ham_replace_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __ham_replace_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __ham_replace_read(dbtp->data, &argp)) != 0) return (ret); @@ -863,20 +866,20 @@ __ham_replace_print(notused1, dbtp, lsnp, notused3, notused4) printf("\toff: %ld\n", (long)argp->off); printf("\tolditem: "); for (i = 0; i < argp->olditem.size; i++) { - c = ((char *)argp->olditem.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->olditem.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tnewitem: "); for (i = 0; i < argp->newitem.size; i++) { - c = ((char *)argp->newitem.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->newitem.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tmakedup: %lu\n", (u_long)argp->makedup); @@ -1014,7 +1017,7 @@ int __ham_newpgno_log(logp, txnid, ret_lsnp, flags, else memset(bp, 0, sizeof(*metalsn)); bp += sizeof(*metalsn); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -1030,22 +1033,23 @@ int __ham_newpgno_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__ham_newpgno_print(notused1, dbtp, lsnp, notused3, notused4) +__ham_newpgno_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __ham_newpgno_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __ham_newpgno_read(dbtp->data, &argp)) != 0) return (ret); @@ -1182,7 +1186,7 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags, else memset(bp, 0, sizeof(*metalsn)); bp += sizeof(*metalsn); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -1198,22 +1202,23 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__ham_ovfl_print(notused1, dbtp, lsnp, notused3, notused4) +__ham_ovfl_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __ham_ovfl_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __ham_ovfl_read(dbtp->data, &argp)) != 0) return (ret); @@ -1364,7 +1369,7 @@ int __ham_copypage_log(logp, txnid, ret_lsnp, flags, memcpy(bp, page->data, page->size); bp += page->size; } -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -1380,22 +1385,23 @@ int __ham_copypage_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__ham_copypage_print(notused1, dbtp, lsnp, notused3, notused4) +__ham_copypage_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __ham_copypage_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __ham_copypage_read(dbtp->data, &argp)) != 0) return (ret); @@ -1418,11 +1424,11 @@ __ham_copypage_print(notused1, dbtp, lsnp, notused3, notused4) (u_long)argp->nnextlsn.file, (u_long)argp->nnextlsn.offset); printf("\tpage: "); for (i = 0; i < argp->page.size; i++) { - c = ((char *)argp->page.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->page.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\n"); diff --git a/db2/hash/hash_conv.c b/db2/hash/hash_conv.c index 9cebe72390..c6d0ba4d4e 100644 --- a/db2/hash/hash_conv.c +++ b/db2/hash/hash_conv.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)hash_conv.c 10.4 (Sleepycat) 9/15/97"; +static const char sccsid[] = "@(#)hash_conv.c 10.5 (Sleepycat) 4/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES diff --git a/db2/hash/hash_debug.c b/db2/hash/hash_debug.c index 979ddd7b87..232906ae34 100644 --- a/db2/hash/hash_debug.c +++ b/db2/hash/hash_debug.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -43,7 +43,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)hash_debug.c 10.2 (Sleepycat) 6/21/97"; +static const char sccsid[] = "@(#)hash_debug.c 10.6 (Sleepycat) 5/7/98"; #endif /* not lint */ #ifdef DEBUG @@ -60,9 +60,6 @@ static const char sccsid[] = "@(#)hash_debug.c 10.2 (Sleepycat) 6/21/97"; */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> - -#include <stdio.h> -#include <string.h> #endif #include "db_int.h" @@ -83,10 +80,9 @@ __ham_dump_bucket(hashp, bucket) { PAGE *p; db_pgno_t pgno; - int ret; for (pgno = BUCKET_TO_PAGE(hashp, bucket); pgno != PGNO_INVALID;) { - if ((ret = memp_fget(hashp->dbp->mpf, &pgno, 0, &p)) != 0) + if (memp_fget(hashp->dbp->mpf, &pgno, 0, &p) != 0) break; (void)__db_prpage(p, 1); pgno = p->next_pgno; diff --git a/db2/hash/hash_dup.c b/db2/hash/hash_dup.c index f8b0adb933..ba248ddb17 100644 --- a/db2/hash/hash_dup.c +++ b/db2/hash/hash_dup.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -42,7 +42,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)hash_dup.c 10.10 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)hash_dup.c 10.14 (Sleepycat) 5/7/98"; #endif /* not lint */ /* @@ -61,15 +61,11 @@ static const char sccsid[] = "@(#)hash_dup.c 10.10 (Sleepycat) 1/8/98"; #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> -#include <unistd.h> #endif #include "db_int.h" #include "db_page.h" -#include "db_swap.h" #include "hash.h" static int __ham_check_move __P((HTAB *, HASH_CURSOR *, int32_t)); @@ -89,14 +85,14 @@ static int __ham_make_dup __P((const DBT *, DBT *d, void **, u_int32_t *)); * Case 4: The element is large enough to push the duplicate set onto a * separate page. * - * PUBLIC: int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int)); + * PUBLIC: int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t)); */ int __ham_add_dup(hashp, hcp, nval, flags) HTAB *hashp; HASH_CURSOR *hcp; DBT *nval; - int flags; + u_int32_t flags; { DBT pval, tmp_val; u_int32_t del_len, new_size; @@ -367,9 +363,9 @@ __ham_check_move(hashp, hcp, add_len) DB_LSN new_lsn; PAGE *next_pagep; db_pgno_t next_pgno; - int rectype, ret; - u_int32_t new_datalen, old_len; + u_int32_t new_datalen, old_len, rectype; u_int8_t *hk; + int ret; /* * Check if we can do whatever we need to on this page. If not, @@ -419,7 +415,8 @@ __ham_check_move(hashp, hcp, add_len) (ret = __ham_put_page(hashp->dbp, next_pagep, 0)) != 0) return (ret); - if ((ret = __ham_get_page(hashp->dbp, next_pgno, &next_pagep)) != 0) + if ((ret = + __ham_get_page(hashp->dbp, next_pgno, &next_pagep)) != 0) return (ret); if (P_FREESPACE(next_pagep) >= new_datalen) diff --git a/db2/hash/hash_func.c b/db2/hash/hash_func.c index 1bf12c4948..9131098e5e 100644 --- a/db2/hash/hash_func.c +++ b/db2/hash/hash_func.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)hash_func.c 10.7 (Sleepycat) 9/16/97"; +static const char sccsid[] = "@(#)hash_func.c 10.8 (Sleepycat) 4/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES diff --git a/db2/hash/hash_page.c b/db2/hash/hash_page.c index 09a4a0c374..ce692f2e41 100644 --- a/db2/hash/hash_page.c +++ b/db2/hash/hash_page.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)hash_page.c 10.31 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)hash_page.c 10.40 (Sleepycat) 6/2/98"; #endif /* not lint */ /* @@ -70,15 +70,11 @@ static const char sccsid[] = "@(#)hash_page.c 10.31 (Sleepycat) 1/8/98"; #include <sys/types.h> #include <errno.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> -#include <unistd.h> #endif #include "db_int.h" #include "db_page.h" -#include "db_swap.h" #include "hash.h" static int __ham_lock_bucket __P((DB *, HASH_CURSOR *, db_lockmode_t)); @@ -266,6 +262,7 @@ __ham_item_last(hashp, cursorp, mode) F_SET(cursorp, H_OK); return (__ham_item_prev(hashp, cursorp, mode)); } + /* * PUBLIC: int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); */ @@ -285,8 +282,10 @@ __ham_item_first(hashp, cursorp, mode) } /* - * Returns a pointer to key/data pair on a page. In the case of bigkeys, - * just returns the page number and index of the bigkey pointer pair. + * __ham_item_prev -- + * Returns a pointer to key/data pair on a page. In the case of + * bigkeys, just returns the page number and index of the bigkey + * pointer pair. * * PUBLIC: int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); */ @@ -487,12 +486,61 @@ __ham_putitem(p, dbt, type) NUM_ENT(p) += 1; } +/* + * PUBLIC: void __ham_reputpair + * PUBLIC: __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *)); + * + * This is a special case to restore a key/data pair to its original + * location during recovery. We are guaranteed that the pair fits + * on the page and is not the last pair on the page (because if it's + * the last pair, the normal insert works). + */ +void +__ham_reputpair(p, psize, ndx, key, data) + PAGE *p; + u_int32_t psize, ndx; + const DBT *key, *data; +{ + db_indx_t i, movebytes, newbytes; + u_int8_t *from; + + /* First shuffle the existing items up on the page. */ + movebytes = + (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 1)]) - HOFFSET(p); + newbytes = key->size + data->size; + from = (u_int8_t *)p + HOFFSET(p); + memmove(from - newbytes, from, movebytes); + + /* + * Adjust the indices and move them up 2 spaces. Note that we + * have to check the exit condition inside the loop just in case + * we are dealing with index 0 (db_indx_t's are unsigned). + */ + for (i = NUM_ENT(p) - 1; ; i-- ) { + p->inp[i + 2] = p->inp[i] - newbytes; + if (i == H_KEYINDEX(ndx)) + break; + } + + /* Put the key and data on the page. */ + p->inp[H_KEYINDEX(ndx)] = + (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 1)]) - key->size; + p->inp[H_DATAINDEX(ndx)] = p->inp[H_KEYINDEX(ndx)] - data->size; + memcpy(P_ENTRY(p, H_KEYINDEX(ndx)), key->data, key->size); + memcpy(P_ENTRY(p, H_DATAINDEX(ndx)), data->data, data->size); + + /* Adjust page info. */ + HOFFSET(p) -= newbytes; + NUM_ENT(p) += 2; +} + /* * PUBLIC: int __ham_del_pair __P((HTAB *, HASH_CURSOR *, int)); - * XXX TODO: if the item is an offdup, delete the other pages and - * then remove the pair. If the offpage page is 0, then you can - * just remove the pair. + * + * XXX + * TODO: if the item is an offdup, delete the other pages and then remove + * the pair. If the offpage page is 0, then you can just remove the pair. */ int __ham_del_pair(hashp, cursorp, reclaim_page) @@ -648,8 +696,9 @@ __ham_del_pair(hashp, cursorp, reclaim_page) /* * Cursor is advanced to the beginning of the next page. */ - cursorp->bndx = NDX_INVALID; + cursorp->bndx = 0; cursorp->pgno = PGNO(p); + F_SET(cursorp, H_DELETED); chg_pgno = PGNO(p); if ((ret = __ham_dirty_page(hashp, p)) != 0 || (ret = __ham_del_page(hashp->dbp, n_pagep)) != 0) @@ -748,8 +797,8 @@ __ham_replpair(hashp, hcp, dbt, make_dup) { DBT old_dbt, tdata, tmp; DB_LSN new_lsn; + int32_t change; /* XXX: Possible overflow. */ u_int32_t len; - int32_t change; int is_big, ret, type; u_int8_t *beg, *dest, *end, *hk, *src; @@ -789,7 +838,7 @@ __ham_replpair(hashp, hcp, dbt, make_dup) change += dbt->doff + dbt->dlen - len; - if (change > (int)P_FREESPACE(hcp->pagep) || is_big) { + if (change > (int32_t)P_FREESPACE(hcp->pagep) || is_big) { /* * Case 3 -- two subcases. * A. This is not really a partial operation, but an overwrite. @@ -954,7 +1003,7 @@ __ham_split_page(hashp, obucket, nbucket) HTAB *hashp; u_int32_t obucket, nbucket; { - DBT key, val, page_dbt; + DBT key, page_dbt; DB_ENV *dbenv; DB_LSN new_lsn; PAGE **pp, *old_pagep, *temp_pagep, *new_pagep; @@ -995,7 +1044,7 @@ __ham_split_page(hashp, obucket, nbucket) big_len = 0; big_buf = NULL; - val.flags = key.flags = 0; + key.flags = 0; while (temp_pagep != NULL) { for (n = 0; n < (db_indx_t)H_NUMPAIRS(temp_pagep); n++) { if ((ret = @@ -1103,8 +1152,8 @@ __ham_split_page(hashp, obucket, nbucket) ret == 0) ret = tret; -err: if (0) { - if (old_pagep != NULL) + if (0) { +err: if (old_pagep != NULL) (void)__ham_put_page(hashp->dbp, old_pagep, 1); if (new_pagep != NULL) (void)__ham_put_page(hashp->dbp, new_pagep, 1); @@ -1121,8 +1170,8 @@ err: if (0) { * to which we just added something. This allows us to link overflow * pages and return the new page having correctly put the last page. * - * PUBLIC: int __ham_add_el __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *, - * PUBLIC: int)); + * PUBLIC: int __ham_add_el + * PUBLIC: __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *, int)); */ int __ham_add_el(hashp, hcp, key, val, type) @@ -1136,8 +1185,8 @@ __ham_add_el(hashp, hcp, key, val, type) DB_LSN new_lsn; HOFFPAGE doff, koff; db_pgno_t next_pgno; - u_int32_t data_size, key_size, pairsize; - int do_expand, is_keybig, is_databig, rectype, ret; + u_int32_t data_size, key_size, pairsize, rectype; + int do_expand, is_keybig, is_databig, ret; int key_type, data_type; do_expand = 0; @@ -1268,13 +1317,14 @@ __ham_add_el(hashp, hcp, key, val, type) * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA, * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we * do not need to do any logging here. - * PUBLIC: void __ham_copy_item __P((HTAB *, PAGE *, int, PAGE *)); + * + * PUBLIC: void __ham_copy_item __P((HTAB *, PAGE *, u_int32_t, PAGE *)); */ void __ham_copy_item(hashp, src_page, src_ndx, dest_page) HTAB *hashp; PAGE *src_page; - int src_ndx; + u_int32_t src_ndx; PAGE *dest_page; { u_int32_t len; @@ -1409,7 +1459,7 @@ __ham_del_page(dbp, pagep) LSN(pagep) = new_lsn; } -#ifdef DEBUG +#ifdef DIAGNOSTIC { db_pgno_t __pgno; DB_LSN __lsn; @@ -1563,13 +1613,13 @@ __ham_overflow_page(dbp, type, pp) #ifdef DEBUG /* * PUBLIC: #ifdef DEBUG - * PUBLIC: int __bucket_to_page __P((HTAB *, int)); + * PUBLIC: db_pgno_t __bucket_to_page __P((HTAB *, db_pgno_t)); * PUBLIC: #endif */ -int +db_pgno_t __bucket_to_page(hashp, n) HTAB *hashp; - int n; + db_pgno_t n; { int ret_val; @@ -1580,7 +1630,6 @@ __bucket_to_page(hashp, n) } #endif - /* * Create a bunch of overflow pages at the current split point. * PUBLIC: void __ham_init_ovflpages __P((HTAB *)); @@ -1660,8 +1709,9 @@ __ham_get_cpage(hashp, hcp, mode) * Get a new page at the cursor, putting the last page if necessary. * If the flag is set to H_ISDUP, then we are talking about the * duplicate page, not the main page. - * PUBLIC: int __ham_next_cpage __P((HTAB *, HASH_CURSOR *, db_pgno_t, - * PUBLIC: int, int)); + * + * PUBLIC: int __ham_next_cpage + * PUBLIC: __P((HTAB *, HASH_CURSOR *, db_pgno_t, int, u_int32_t)); */ int __ham_next_cpage(hashp, hcp, pgno, dirty, flags) @@ -1669,22 +1719,22 @@ __ham_next_cpage(hashp, hcp, pgno, dirty, flags) HASH_CURSOR *hcp; db_pgno_t pgno; int dirty; - int flags; + u_int32_t flags; { PAGE *p; int ret; - if (flags & H_ISDUP && hcp->dpagep != NULL && + if (LF_ISSET(H_ISDUP) && hcp->dpagep != NULL && (ret = __ham_put_page(hashp->dbp, hcp->dpagep, dirty)) != 0) return (ret); - else if (!(flags & H_ISDUP) && hcp->pagep != NULL && + else if (!LF_ISSET(H_ISDUP) && hcp->pagep != NULL && (ret = __ham_put_page(hashp->dbp, hcp->pagep, dirty)) != 0) return (ret); if ((ret = __ham_get_page(hashp->dbp, pgno, &p)) != 0) return (ret); - if (flags & H_ISDUP) { + if (LF_ISSET(H_ISDUP)) { hcp->dpagep = p; hcp->dpgno = pgno; hcp->dndx = 0; diff --git a/db2/hash/hash_rec.c b/db2/hash/hash_rec.c index 09508251a2..efaf61c638 100644 --- a/db2/hash/hash_rec.c +++ b/db2/hash/hash_rec.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -47,14 +47,13 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)hash_rec.c 10.15 (Sleepycat) 12/4/97"; +static const char sccsid[] = "@(#)hash_rec.c 10.19 (Sleepycat) 5/23/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <stdlib.h> #include <string.h> #endif @@ -64,7 +63,6 @@ static const char sccsid[] = "@(#)hash_rec.c 10.15 (Sleepycat) 12/4/97"; #include "hash.h" #include "btree.h" #include "log.h" -#include "db_dispatch.h" #include "common_ext.h" /* @@ -131,13 +129,23 @@ __ham_insdel_recover(logp, dbtp, lsnp, redo, info) if ((op == DELPAIR && cmp_n == 0 && !redo) || (op == PUTPAIR && cmp_p == 0 && redo)) { - /* Need to redo a PUT or undo a delete. */ - __ham_putitem(pagep, &argp->key, - !redo || PAIR_ISKEYBIG(argp->opcode) ? - H_OFFPAGE : H_KEYDATA); - __ham_putitem(pagep, &argp->data, - !redo || PAIR_ISDATABIG(argp->opcode) ? - H_OFFPAGE : H_KEYDATA); + /* + * Need to redo a PUT or undo a delete. If we are undoing a + * delete, we've got to restore the item back to its original + * position. That's a royal pain in the butt (because we do + * not store item lengths on the page), but there's no choice. + */ + if (op != DELPAIR || + argp->ndx == (u_int32_t)H_NUMPAIRS(pagep)) { + __ham_putitem(pagep, &argp->key, + !redo || PAIR_ISKEYBIG(argp->opcode) ? + H_OFFPAGE : H_KEYDATA); + __ham_putitem(pagep, &argp->data, + !redo || PAIR_ISDATABIG(argp->opcode) ? + H_OFFPAGE : H_KEYDATA); + } else + (void) __ham_reputpair(pagep, hashp->hdr->pagesize, + argp->ndx, &argp->key, &argp->data); LSN(pagep) = redo ? *lsnp : argp->pagelsn; if ((ret = __ham_put_page(file_dbp, pagep, 1)) != 0) @@ -453,7 +461,7 @@ __ham_newpgno_recover(logp, dbtp, lsnp, redo, info) DBT *dbtp; DB_LSN *lsnp; int redo; - void *info; + void *info; { __ham_newpgno_args *argp; DB *mdbp, *file_dbp; @@ -574,7 +582,7 @@ __ham_splitmeta_recover(logp, dbtp, lsnp, redo, info) DBT *dbtp; DB_LSN *lsnp; int redo; - void *info; + void *info; { __ham_splitmeta_args *argp; DB *mdbp, *file_dbp; @@ -649,7 +657,7 @@ __ham_splitdata_recover(logp, dbtp, lsnp, redo, info) DBT *dbtp; DB_LSN *lsnp; int redo; - void *info; + void *info; { __ham_splitdata_args *argp; DB *mdbp, *file_dbp; diff --git a/db2/hash/hash_stat.c b/db2/hash/hash_stat.c index 99c6078d86..b57ca0950d 100644 --- a/db2/hash/hash_stat.c +++ b/db2/hash/hash_stat.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)hash_stat.c 10.6 (Sleepycat) 7/2/97"; +static const char sccsid[] = "@(#)hash_stat.c 10.8 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -20,7 +20,6 @@ static const char sccsid[] = "@(#)hash_stat.c 10.6 (Sleepycat) 7/2/97"; #include "db_int.h" #include "db_page.h" #include "hash.h" -#include "common_ext.h" /* * __ham_stat -- diff --git a/db2/include/btree.h b/db2/include/btree.h index 878096b7b2..1660d331e7 100644 --- a/db2/include/btree.h +++ b/db2/include/btree.h @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -43,7 +43,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)btree.h 10.17 (Sleepycat) 9/23/97 + * @(#)btree.h 10.21 (Sleepycat) 5/23/98 */ /* Forward structure declarations. */ @@ -103,28 +103,39 @@ struct __recno; typedef struct __recno RECNO; * to return deleted entries. To simplify both the mnemonic representation * and the code that checks for various cases, we construct a set of bitmasks. */ -#define S_READ 0x0001 /* Read locks. */ -#define S_WRITE 0x0002 /* Write locks. */ - -#define S_APPEND 0x0040 /* Append to the tree. */ -#define S_DELNO 0x0080 /* Don't return deleted items. */ -#define S_DUPFIRST 0x0100 /* Return first duplicate. */ -#define S_DUPLAST 0x0200 /* Return last duplicate. */ -#define S_EXACT 0x0400 /* Exact items only. */ -#define S_PARENT 0x0800 /* Lock page pair. */ - -#define S_DELETE (S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT) +#define S_READ 0x00001 /* Read locks. */ +#define S_WRITE 0x00002 /* Write locks. */ + +#define S_APPEND 0x00040 /* Append to the tree. */ +#define S_DELNO 0x00080 /* Don't return deleted items. */ +#define S_DUPFIRST 0x00100 /* Return first duplicate. */ +#define S_DUPLAST 0x00200 /* Return last duplicate. */ +#define S_EXACT 0x00400 /* Exact items only. */ +#define S_PARENT 0x00800 /* Lock page pair. */ +#define S_STACK 0x01000 /* Need a complete stack. */ + +#define S_DELETE (S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT | S_STACK) #define S_FIND (S_READ | S_DUPFIRST | S_DELNO) -#define S_INSERT (S_WRITE | S_DUPLAST) -#define S_KEYFIRST (S_WRITE | S_DUPFIRST) -#define S_KEYLAST (S_WRITE | S_DUPLAST) +#define S_INSERT (S_WRITE | S_DUPLAST | S_STACK) +#define S_KEYFIRST (S_WRITE | S_DUPFIRST | S_STACK) +#define S_KEYLAST (S_WRITE | S_DUPLAST | S_STACK) #define S_WRPAIR (S_WRITE | S_DUPLAST | S_PARENT) /* + * If doing insert search (including keyfirst or keylast operations) or a + * split search on behalf of an insert, it's okay to return the entry one + * past the end of the page. + */ +#define PAST_END_OK(f) \ + ((f) == S_INSERT || \ + (f) == S_KEYFIRST || (f) == S_KEYLAST || (f) == S_WRPAIR) + +/* * Flags to __bam_iitem(). */ -#define BI_NEWKEY 0x01 /* New key. */ -#define BI_DELETED 0x02 /* Key/data pair only placeholder. */ +#define BI_DELETED 0x01 /* Key/data pair only placeholder. */ +#define BI_DOINCR 0x02 /* Increment the record count. */ +#define BI_NEWKEY 0x04 /* New key. */ /* * Various routines pass around page references. A page reference can be a @@ -138,6 +149,21 @@ struct __epg { }; /* + * All cursors are queued from the master DB structure. Convert the user's + * DB reference to the master DB reference. We lock the master DB mutex + * so that we can walk the cursor queue. There's no race in accessing the + * cursors, because if we're modifying a page, we have a write lock on it, + * and therefore no other thread than the current one can have a cursor that + * references the page. + */ +#define CURSOR_SETUP(dbp) { \ + (dbp) = (dbp)->master; \ + DB_THREAD_LOCK(dbp); \ +} +#define CURSOR_TEARDOWN(dbp) \ + DB_THREAD_UNLOCK(dbp); + +/* * Btree cursor. * * Arguments passed to __bam_ca_replace(). diff --git a/db2/include/btree_ext.h b/db2/include/btree_ext.h index 9c34c8c6bf..b8a137364c 100644 --- a/db2/include/btree_ext.h +++ b/db2/include/btree_ext.h @@ -2,7 +2,7 @@ #ifndef _btree_ext_h_ #define _btree_ext_h_ int __bam_close __P((DB *)); -int __bam_sync __P((DB *, int)); +int __bam_sync __P((DB *, u_int32_t)); int __bam_cmp __P((DB *, const DBT *, EPG *)); int __bam_defcmp __P((const DBT *, const DBT *)); size_t __bam_defpfx __P((const DBT *, const DBT *)); @@ -11,7 +11,7 @@ int __bam_pgout __P((db_pgno_t, void *, DBT *)); int __bam_mswap __P((PAGE *)); int __bam_cursor __P((DB *, DB_TXN *, DBC **)); int __bam_c_iclose __P((DB *, DBC *)); -int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, int)); +int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); int __bam_ovfl_chk __P((DB *, CURSOR *, u_int32_t, int)); int __bam_cprint __P((DB *)); int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *, int)); @@ -23,8 +23,8 @@ void __bam_ca_replace __P((DB *, db_pgno_t, u_int32_t, ca_replace_arg)); void __bam_ca_split __P((DB *, db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int)); -int __bam_delete __P((DB *, DB_TXN *, DBT *, int)); -int __ram_delete __P((DB *, DB_TXN *, DBT *, int)); +int __bam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); +int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); int __bam_ditem __P((DB *, PAGE *, u_int32_t)); int __bam_adjindx __P((DB *, PAGE *, u_int32_t, u_int32_t, int)); int __bam_dpage __P((DB *, const DBT *)); @@ -35,10 +35,10 @@ int __bam_free __P((DB *, PAGE *)); int __bam_lt __P((DB *)); int __bam_lget __P((DB *, int, db_pgno_t, db_lockmode_t, DB_LOCK *)); int __bam_lput __P((DB *, DB_LOCK)); -int __bam_pget __P((DB *, PAGE **, db_pgno_t *, int)); -int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, int)); +int __bam_pget __P((DB *, PAGE **, db_pgno_t *, u_int32_t)); +int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); int __bam_iitem __P((DB *, - PAGE **, db_indx_t *, DBT *, DBT *, int, int)); + PAGE **, db_indx_t *, DBT *, DBT *, u_int32_t, u_int32_t)); int __bam_ritem __P((DB *, PAGE *, u_int32_t, DBT *)); int __bam_pg_alloc_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); @@ -64,19 +64,19 @@ void __ram_ca __P((DB *, db_recno_t, ca_recno_arg)); int __ram_cprint __P((DB *)); int __ram_getno __P((DB *, const DBT *, db_recno_t *, int)); int __ram_snapshot __P((DB *)); -int __bam_rsearch __P((DB *, db_recno_t *, u_int, int, int *)); -int __bam_adjust __P((DB *, BTREE *, int)); +int __bam_rsearch __P((DB *, db_recno_t *, u_int32_t, int, int *)); +int __bam_adjust __P((DB *, BTREE *, int32_t)); int __bam_nrecs __P((DB *, db_recno_t *)); db_recno_t __bam_total __P((PAGE *)); int __bam_search __P((DB *, - const DBT *, u_int, int, db_recno_t *, int *)); + const DBT *, u_int32_t, int, db_recno_t *, int *)); int __bam_stkrel __P((DB *)); int __bam_stkgrow __P((BTREE *)); int __bam_split __P((DB *, void *)); int __bam_broot __P((DB *, PAGE *, PAGE *, PAGE *)); int __ram_root __P((DB *, PAGE *, PAGE *, PAGE *)); int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t)); -int __bam_stat __P((DB *, void *, void *(*)(size_t), int)); +int __bam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t)); void __bam_add_mstat __P((DB_BTREE_LSTAT *, DB_BTREE_LSTAT *)); int __bam_pg_alloc_log __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, diff --git a/db2/include/clib_ext.h b/db2/include/clib_ext.h index eb982bf85f..f5510a1629 100644 --- a/db2/include/clib_ext.h +++ b/db2/include/clib_ext.h @@ -53,9 +53,6 @@ int snprintf __P((char *, size_t, const char *, ...)); int snprintf(); #endif #endif -#ifndef HAVE_STRDUP -char *strdup __P((const char *)); -#endif #ifndef HAVE_STRERROR char *strerror __P((int)); #endif diff --git a/db2/include/common_ext.h b/db2/include/common_ext.h index b362c9c32e..4674f9ce01 100644 --- a/db2/include/common_ext.h +++ b/db2/include/common_ext.h @@ -2,8 +2,8 @@ #ifndef _common_ext_h_ #define _common_ext_h_ int __db_appname __P((DB_ENV *, - APPNAME, const char *, const char *, int *, char **)); -int __db_apprec __P((DB_ENV *, int)); + APPNAME, const char *, const char *, u_int32_t, int *, char **)); +int __db_apprec __P((DB_ENV *, u_int32_t)); int __db_byteorder __P((DB_ENV *, int)); #ifdef __STDC__ void __db_err __P((const DB_ENV *dbenv, const char *fmt, ...)); @@ -11,35 +11,32 @@ void __db_err __P((const DB_ENV *dbenv, const char *fmt, ...)); void __db_err(); #endif int __db_panic __P((DB *)); -int __db_fchk __P((DB_ENV *, const char *, int, int)); -int __db_fcchk __P((DB_ENV *, const char *, int, int, int)); -int __db_cdelchk __P((const DB *, int, int, int)); -int __db_cgetchk __P((const DB *, DBT *, DBT *, int, int)); +int __db_fchk __P((DB_ENV *, const char *, u_int32_t, u_int32_t)); +int __db_fcchk + __P((DB_ENV *, const char *, u_int32_t, u_int32_t, u_int32_t)); +int __db_cdelchk __P((const DB *, u_int32_t, int, int)); +int __db_cgetchk __P((const DB *, DBT *, DBT *, u_int32_t, int)); int __db_cputchk __P((const DB *, - const DBT *, DBT *, int, int, int)); -int __db_delchk __P((const DB *, int, int)); -int __db_getchk __P((const DB *, const DBT *, DBT *, int)); -int __db_putchk __P((const DB *, DBT *, const DBT *, int, int, int)); -int __db_statchk __P((const DB *, int)); -int __db_syncchk __P((const DB *, int)); + const DBT *, DBT *, u_int32_t, int, int)); +int __db_delchk __P((const DB *, DBT *, u_int32_t, int)); +int __db_getchk __P((const DB *, const DBT *, DBT *, u_int32_t)); +int __db_putchk + __P((const DB *, DBT *, const DBT *, u_int32_t, int, int)); +int __db_statchk __P((const DB *, u_int32_t)); +int __db_syncchk __P((const DB *, u_int32_t)); int __db_ferr __P((const DB_ENV *, const char *, int)); u_int32_t __db_log2 __P((u_int32_t)); -int __db_rcreate __P((DB_ENV *, APPNAME, - const char *, const char *, int, size_t, int, int *, void *)); -int __db_rinit __P((DB_ENV *, RLAYOUT *, int, size_t, int)); -int __db_ropen __P((DB_ENV *, - APPNAME, const char *, const char *, int, int *, void *)); -int __db_rclose __P((DB_ENV *, int, void *)); -int __db_runlink __P((DB_ENV *, - APPNAME, const char *, const char *, int)); -int __db_rgrow __P((DB_ENV *, int, size_t)); -int __db_rremap __P((DB_ENV *, void *, size_t, size_t, int, void *)); +int __db_rattach __P((REGINFO *)); +int __db_rdetach __P((REGINFO *)); +int __db_runlink __P((REGINFO *, int)); +int __db_rgrow __P((REGINFO *, size_t)); +int __db_rreattach __P((REGINFO *, size_t)); void __db_shalloc_init __P((void *, size_t)); int __db_shalloc __P((void *, size_t, size_t, void *)); void __db_shalloc_free __P((void *, void *)); size_t __db_shalloc_count __P((void *)); size_t __db_shsizeof __P((void *)); -void __db_shalloc_dump __P((FILE *, void *)); -int __db_tablesize __P((u_int)); -void __db_hashinit __P((void *, int)); +void __db_shalloc_dump __P((void *, FILE *)); +int __db_tablesize __P((u_int32_t)); +void __db_hashinit __P((void *, u_int32_t)); #endif /* _common_ext_h_ */ diff --git a/db2/include/cxx_int.h b/db2/include/cxx_int.h index bf7a09602d..0a59de4391 100644 --- a/db2/include/cxx_int.h +++ b/db2/include/cxx_int.h @@ -1,10 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)cxx_int.h 10.4 (Sleepycat) 8/22/97 + * @(#)cxx_int.h 10.5 (Sleepycat) 4/10/98 */ #ifndef _CXX_INT_H_ diff --git a/db2/include/db.h.src b/db2/include/db.h.src index ebdaa27470..97ad55693f 100644 --- a/db2/include/db.h.src +++ b/db2/include/db.h.src @@ -1,10 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)db.h.src 10.102 (Sleepycat) 1/18/98 + * @(#)db.h.src 10.131 (Sleepycat) 6/2/98 */ #ifndef _DB_H_ @@ -54,8 +54,7 @@ * * !!! * We also provide the standard u_int, u_long etc., if they're not provided - * by the system. This isn't completely necessary, but the example programs - * need them. + * by the system. */ #ifndef __BIT_TYPES_DEFINED__ #define __BIT_TYPES_DEFINED__ @@ -72,9 +71,9 @@ @u_long_decl@ #define DB_VERSION_MAJOR 2 -#define DB_VERSION_MINOR 3 -#define DB_VERSION_PATCH 16 -#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.16: (1/19/98)" +#define DB_VERSION_MINOR 4 +#define DB_VERSION_PATCH 14 +#define DB_VERSION_STRING "Sleepycat Software: DB 2.4.14: (6/2/98)" typedef u_int32_t db_pgno_t; /* Page number type. */ typedef u_int16_t db_indx_t; /* Page offset type. */ @@ -95,6 +94,7 @@ struct __db_bt_stat; typedef struct __db_bt_stat DB_BTREE_STAT; struct __db_dbt; typedef struct __db_dbt DBT; struct __db_env; typedef struct __db_env DB_ENV; struct __db_info; typedef struct __db_info DB_INFO; +struct __db_lock_stat; typedef struct __db_lock_stat DB_LOCK_STAT; struct __db_lockregion; typedef struct __db_lockregion DB_LOCKREGION; struct __db_lockreq; typedef struct __db_lockreq DB_LOCKREQ; struct __db_locktab; typedef struct __db_locktab DB_LOCKTAB; @@ -102,6 +102,7 @@ struct __db_log; typedef struct __db_log DB_LOG; struct __db_log_stat; typedef struct __db_log_stat DB_LOG_STAT; struct __db_lsn; typedef struct __db_lsn DB_LSN; struct __db_mpool; typedef struct __db_mpool DB_MPOOL; +struct __db_mpool_finfo;typedef struct __db_mpool_finfo DB_MPOOL_FINFO; struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT; struct __db_mpool_stat; typedef struct __db_mpool_stat DB_MPOOL_STAT; struct __db_mpoolfile; typedef struct __db_mpoolfile DB_MPOOLFILE; @@ -134,7 +135,7 @@ struct __db_dbt { * There are a set of functions that the application can replace with its * own versions, and some other knobs which can be turned at run-time. */ -#define DB_FUNC_CALLOC 1 /* ANSI C calloc. */ +#define DB_FUNC_CALLOC 1 /* DELETED: ANSI C calloc. */ #define DB_FUNC_CLOSE 2 /* POSIX 1003.1 close. */ #define DB_FUNC_DIRFREE 3 /* DB: free directory list. */ #define DB_FUNC_DIRLIST 4 /* DB: create directory list. */ @@ -149,12 +150,18 @@ struct __db_dbt { #define DB_FUNC_REALLOC 13 /* ANSI C realloc. */ #define DB_FUNC_SEEK 14 /* POSIX 1003.1 lseek. */ #define DB_FUNC_SLEEP 15 /* DB: sleep secs/usecs. */ -#define DB_FUNC_STRDUP 16 /* DB: strdup(3). */ +#define DB_FUNC_STRDUP 16 /* DELETED: DB: strdup(3). */ #define DB_FUNC_UNLINK 17 /* POSIX 1003.1 unlink. */ #define DB_FUNC_UNMAP 18 /* DB: unmap shared memory file. */ #define DB_FUNC_WRITE 19 /* POSIX 1003.1 write. */ #define DB_FUNC_YIELD 20 /* DB: yield thread to scheduler. */ #define DB_TSL_SPINS 21 /* DB: initialize spin count. */ +#define DB_FUNC_RUNLINK 22 /* DB: remove a shared region. */ +#define DB_REGION_ANON 23 /* DB: anonymous, unnamed regions. */ +#define DB_REGION_INIT 24 /* DB: page-fault regions in create. */ +#define DB_REGION_NAME 25 /* DB: anonymous, named regions. */ +#define DB_MUTEXLOCKS 26 /* DB: turn off all mutex locks. */ +#define DB_PAGEYIELD 27 /* DB: yield the CPU on pool get. */ /* * Database configuration and initialization. @@ -162,52 +169,51 @@ struct __db_dbt { /* * Flags understood by both db_open(3) and db_appinit(3). */ -#define DB_CREATE 0x00001 /* O_CREAT: create file as necessary. */ -#define DB_NOMMAP 0x00002 /* Don't mmap underlying file. */ -#define DB_THREAD 0x00004 /* Free-thread DB package handles. */ +#define DB_CREATE 0x000001 /* O_CREAT: create file as necessary. */ +#define DB_NOMMAP 0x000002 /* Don't mmap underlying file. */ +#define DB_THREAD 0x000004 /* Free-thread DB package handles. */ /* * Flags understood by db_appinit(3). - * - * DB_MUTEXDEBUG is internal only, and not documented. */ -/* 0x00007 COMMON MASK. */ -#define DB_INIT_LOCK 0x00008 /* Initialize locking. */ -#define DB_INIT_LOG 0x00010 /* Initialize logging. */ -#define DB_INIT_MPOOL 0x00020 /* Initialize mpool. */ -#define DB_INIT_TXN 0x00040 /* Initialize transactions. */ -#define DB_MPOOL_PRIVATE 0x00080 /* Mpool: private memory pool. */ -#define DB_MUTEXDEBUG 0x00100 /* Do not get/set mutexes in regions. */ -#define DB_RECOVER 0x00200 /* Run normal recovery. */ -#define DB_RECOVER_FATAL 0x00400 /* Run catastrophic recovery. */ -#define DB_TXN_NOSYNC 0x00800 /* Do not sync log on commit. */ -#define DB_USE_ENVIRON 0x01000 /* Use the environment. */ -#define DB_USE_ENVIRON_ROOT 0x02000 /* Use the environment if root. */ +/* 0x000007 COMMON MASK. */ +#define DB_INIT_LOCK 0x000008 /* Initialize locking. */ +#define DB_INIT_LOG 0x000010 /* Initialize logging. */ +#define DB_INIT_MPOOL 0x000020 /* Initialize mpool. */ +#define DB_INIT_TXN 0x000040 /* Initialize transactions. */ +#define DB_MPOOL_PRIVATE 0x000080 /* Mpool: private memory pool. */ +#define __UNUSED_100 0x000100 +#define DB_RECOVER 0x000200 /* Run normal recovery. */ +#define DB_RECOVER_FATAL 0x000400 /* Run catastrophic recovery. */ +#define DB_TXN_NOSYNC 0x000800 /* Do not sync log on commit. */ +#define DB_USE_ENVIRON 0x001000 /* Use the environment. */ +#define DB_USE_ENVIRON_ROOT 0x002000 /* Use the environment if root. */ /* CURRENTLY UNUSED LOCK FLAGS. */ -#define DB_TXN_LOCK_2PL 0x00000 /* Two-phase locking. */ -#define DB_TXN_LOCK_OPTIMISTIC 0x00000 /* Optimistic locking. */ -#define DB_TXN_LOCK_MASK 0x00000 /* Lock flags mask. */ +#define DB_TXN_LOCK_2PL 0x000000 /* Two-phase locking. */ +#define DB_TXN_LOCK_OPTIMIST 0x000000 /* Optimistic locking. */ +#define DB_TXN_LOCK_MASK 0x000000 /* Lock flags mask. */ /* CURRENTLY UNUSED LOG FLAGS. */ -#define DB_TXN_LOG_REDO 0x00000 /* Redo-only logging. */ -#define DB_TXN_LOG_UNDO 0x00000 /* Undo-only logging. */ -#define DB_TXN_LOG_UNDOREDO 0x00000 /* Undo/redo write-ahead logging. */ -#define DB_TXN_LOG_MASK 0x00000 /* Log flags mask. */ +#define DB_TXN_LOG_REDO 0x000000 /* Redo-only logging. */ +#define DB_TXN_LOG_UNDO 0x000000 /* Undo-only logging. */ +#define DB_TXN_LOG_UNDOREDO 0x000000 /* Undo/redo write-ahead logging. */ +#define DB_TXN_LOG_MASK 0x000000 /* Log flags mask. */ /* * Flags understood by db_open(3). * - * DB_EXCL and DB_TEMPORARY are internal only, and not documented. - * DB_SEQUENTIAL is currently internal, but likely to be exported some day. + * DB_EXCL and DB_TEMPORARY are internal only, and are not documented. + * DB_SEQUENTIAL is currently internal, but may be exported some day. */ -/* 0x00007 COMMON MASK. */ -/* 0x07fff ALREADY USED. */ -#define DB_EXCL 0x08000 /* O_EXCL: exclusive open. */ -#define DB_RDONLY 0x10000 /* O_RDONLY: read-only. */ -#define DB_SEQUENTIAL 0x20000 /* Indicate sequential access. */ -#define DB_TEMPORARY 0x40000 /* Remove on last close. */ -#define DB_TRUNCATE 0x80000 /* O_TRUNCATE: replace existing DB. */ +/* 0x000007 COMMON MASK. */ +/* 0x003fff ALREADY USED. */ +#define __UNUSED_4000 0x004000 +#define DB_EXCL 0x008000 /* O_EXCL: exclusive open. */ +#define DB_RDONLY 0x010000 /* O_RDONLY: read-only. */ +#define DB_SEQUENTIAL 0x020000 /* Indicate sequential access. */ +#define DB_TEMPORARY 0x040000 /* Remove on last close. */ +#define DB_TRUNCATE 0x080000 /* O_TRUNCATE: replace existing DB. */ /* * Deadlock detector modes; used in the DBENV structure to configure the @@ -240,9 +246,9 @@ struct __db_env { /* Locking. */ DB_LOCKTAB *lk_info; /* Return from lock_open(). */ u_int8_t *lk_conflicts; /* Two dimensional conflict matrix. */ - int lk_modes; /* Number of lock modes in table. */ - u_int lk_max; /* Maximum number of locks. */ - u_int32_t lk_detect; /* Deadlock detect on every conflict. */ + u_int32_t lk_modes; /* Number of lock modes in table. */ + u_int32_t lk_max; /* Maximum number of locks. */ + u_int32_t lk_detect; /* Deadlock detect on all conflicts. */ /* Logging. */ DB_LOG *lg_info; /* Return from log_open(). */ @@ -255,7 +261,7 @@ struct __db_env { /* Transactions. */ DB_TXNMGR *tx_info; /* Return from txn_open(). */ - unsigned int tx_max; /* Maximum number of transactions. */ + u_int32_t tx_max; /* Maximum number of transactions. */ int (*tx_recover) /* Dispatch function for recovery. */ __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); @@ -300,17 +306,17 @@ struct __db_info { void *(*db_malloc) __P((size_t)); /* Btree access method. */ - int bt_maxkey; /* Maximum keys per page. */ - int bt_minkey; /* Minimum keys per page. */ + u_int32_t bt_maxkey; /* Maximum keys per page. */ + u_int32_t bt_minkey; /* Minimum keys per page. */ int (*bt_compare) /* Comparison function. */ __P((const DBT *, const DBT *)); size_t (*bt_prefix) /* Prefix function. */ __P((const DBT *, const DBT *)); /* Hash access method. */ - unsigned int h_ffactor; /* Fill factor. */ - unsigned int h_nelem; /* Number of elements. */ - u_int32_t (*h_hash) /* Hash function. */ + u_int32_t h_ffactor; /* Fill factor. */ + u_int32_t h_nelem; /* Number of elements. */ + u_int32_t (*h_hash) /* Hash function. */ __P((const void *, u_int32_t)); /* Recno access method. */ @@ -353,6 +359,7 @@ struct __db_info { #define DB_SET 0x010000 /* c_get(), log_get() */ #define DB_SET_RANGE 0x020000 /* c_get() */ #define DB_SET_RECNO 0x040000 /* c_get() */ +#define DB_CURLSN 0x080000 /* log_put() */ /* * DB (user visible) error return codes. @@ -435,14 +442,14 @@ struct __db { void *(*db_malloc) __P((size_t)); /* Functions. */ - int (*close) __P((DB *, int)); + int (*close) __P((DB *, u_int32_t)); int (*cursor) __P((DB *, DB_TXN *, DBC **)); - int (*del) __P((DB *, DB_TXN *, DBT *, int)); + int (*del) __P((DB *, DB_TXN *, DBT *, u_int32_t)); int (*fd) __P((DB *, int *)); - int (*get) __P((DB *, DB_TXN *, DBT *, DBT *, int)); - int (*put) __P((DB *, DB_TXN *, DBT *, DBT *, int)); - int (*stat) __P((DB *, void *, void *(*)(size_t), int)); - int (*sync) __P((DB *, int)); + int (*get) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + int (*put) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + int (*stat) __P((DB *, void *, void *(*)(size_t), u_int32_t)); + int (*sync) __P((DB *, u_int32_t)); #define DB_AM_DUP 0x000001 /* DB_DUP (internal). */ #define DB_AM_INMEM 0x000002 /* In-memory; no sync on close. */ @@ -483,9 +490,9 @@ struct __dbc { void *internal; /* Access method private. */ int (*c_close) __P((DBC *)); - int (*c_del) __P((DBC *, int)); - int (*c_get) __P((DBC *, DBT *, DBT *, int)); - int (*c_put) __P((DBC *, DBT *, DBT *, int)); + int (*c_del) __P((DBC *, u_int32_t)); + int (*c_get) __P((DBC *, DBT *, DBT *, u_int32_t)); + int (*c_put) __P((DBC *, DBT *, DBT *, u_int32_t)); }; /* Btree/recno statistics structure. */ @@ -524,10 +531,11 @@ struct __db_bt_stat { #if defined(__cplusplus) extern "C" { #endif -int db_appinit __P((const char *, char * const *, DB_ENV *, int)); +int db_appinit __P((const char *, char * const *, DB_ENV *, u_int32_t)); int db_appexit __P((DB_ENV *)); int db_jump_set __P((void *, int)); -int db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **)); +int db_open __P((const char *, + DBTYPE, u_int32_t, int, DB_ENV *, DB_INFO *, DB **)); int db_value_set __P((int, int)); char *db_version __P((int *, int *, int *)); #if defined(__cplusplus) @@ -575,6 +583,21 @@ typedef enum { DB_LOCK_IWR /* Intent to read and write. */ } db_lockmode_t; +/* + * Status of a lock. + */ +typedef enum { + DB_LSTAT_ABORTED, /* Lock belongs to an aborted txn. */ + DB_LSTAT_ERR, /* Lock is bad. */ + DB_LSTAT_FREE, /* Lock is unallocated. */ + DB_LSTAT_HELD, /* Lock is currently held. */ + DB_LSTAT_NOGRANT, /* Lock was not granted. */ + DB_LSTAT_PENDING, /* Lock was waiting and has been + * promoted; waiting for the owner + * to run and upgrade it to held. */ + DB_LSTAT_WAITING /* Lock is on the wait queue. */ +} db_status_t; + /* Lock request structure. */ struct __db_lockreq { db_lockop_t op; /* Operation. */ @@ -596,19 +619,38 @@ extern const u_int8_t db_rw_conflicts[]; #define DB_LOCK_RIW_N 6 extern const u_int8_t db_riw_conflicts[]; +struct __db_lock_stat { + u_int32_t st_magic; /* Lock file magic number. */ + u_int32_t st_version; /* Lock file version number. */ + u_int32_t st_maxlocks; /* Maximum number of locks in table. */ + u_int32_t st_nmodes; /* Number of lock modes. */ + u_int32_t st_numobjs; /* Number of objects. */ + u_int32_t st_nlockers; /* Number of lockers. */ + u_int32_t st_nconflicts; /* Number of lock conflicts. */ + u_int32_t st_nrequests; /* Number of lock gets. */ + u_int32_t st_nreleases; /* Number of lock puts. */ + u_int32_t st_ndeadlocks; /* Number of lock deadlocks. */ + u_int32_t st_region_wait; /* Region lock granted after wait. */ + u_int32_t st_region_nowait; /* Region lock granted without wait. */ + u_int32_t st_refcnt; /* Region reference count. */ + u_int32_t st_regsize; /* Region size. */ +}; + #if defined(__cplusplus) extern "C" { #endif int lock_close __P((DB_LOCKTAB *)); -int lock_detect __P((DB_LOCKTAB *, int, int)); +int lock_detect __P((DB_LOCKTAB *, u_int32_t, u_int32_t)); int lock_get __P((DB_LOCKTAB *, - u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *)); + u_int32_t, u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *)); int lock_id __P((DB_LOCKTAB *, u_int32_t *)); -int lock_open __P((const char *, int, int, DB_ENV *, DB_LOCKTAB **)); +int lock_open __P((const char *, + u_int32_t, int, DB_ENV *, DB_LOCKTAB **)); int lock_put __P((DB_LOCKTAB *, DB_LOCK)); +int lock_stat __P((DB_LOCKTAB *, DB_LOCK_STAT **, void *(*)(size_t))); int lock_unlink __P((const char *, int, DB_ENV *)); int lock_vec __P((DB_LOCKTAB *, - u_int32_t, int, DB_LOCKREQ *, int, DB_LOCKREQ **)); + u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); #if defined(__cplusplus) } #endif @@ -651,19 +693,21 @@ struct __db_log_stat { u_int32_t st_region_nowait; /* Region lock granted without wait. */ u_int32_t st_cur_file; /* Current log file number. */ u_int32_t st_cur_offset; /* Current log file offset. */ + u_int32_t st_refcnt; /* Region reference count. */ + u_int32_t st_regsize; /* Region size. */ }; #if defined(__cplusplus) extern "C" { #endif -int log_archive __P((DB_LOG *, char **[], int, void *(*)(size_t))); +int log_archive __P((DB_LOG *, char **[], u_int32_t, void *(*)(size_t))); int log_close __P((DB_LOG *)); int log_compare __P((const DB_LSN *, const DB_LSN *)); int log_file __P((DB_LOG *, const DB_LSN *, char *, size_t)); int log_flush __P((DB_LOG *, const DB_LSN *)); -int log_get __P((DB_LOG *, DB_LSN *, DBT *, int)); -int log_open __P((const char *, int, int, DB_ENV *, DB_LOG **)); -int log_put __P((DB_LOG *, DB_LSN *, const DBT *, int)); +int log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t)); +int log_open __P((const char *, u_int32_t, int, DB_ENV *, DB_LOG **)); +int log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t)); int log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *)); int log_stat __P((DB_LOG *, DB_LOG_STAT **, void *(*)(size_t))); int log_unlink __P((const char *, int, DB_ENV *)); @@ -705,6 +749,17 @@ struct __db_mpool_stat { u_int32_t st_page_trickle; /* Pages written by memp_trickle. */ u_int32_t st_region_wait; /* Region lock granted after wait. */ u_int32_t st_region_nowait; /* Region lock granted without wait. */ + u_int32_t st_refcnt; /* Region reference count. */ + u_int32_t st_regsize; /* Region size. */ +}; + +/* Mpool file open information structure. */ +struct __db_mpool_finfo { + int ftype; /* File type. */ + DBT *pgcookie; /* Byte-string passed to pgin/pgout. */ + u_int8_t *fileid; /* Unique file ID. */ + int32_t lsn_offset; /* LSN offset in page. */ + u_int32_t clear_len; /* Cleared length on created pages. */ }; /* Mpool file statistics structure. */ @@ -724,13 +779,13 @@ extern "C" { #endif int memp_close __P((DB_MPOOL *)); int memp_fclose __P((DB_MPOOLFILE *)); -int memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, int, void *)); +int memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, u_int32_t, void *)); int memp_fopen __P((DB_MPOOL *, const char *, - int, int, int, size_t, int, DBT *, u_int8_t *, DB_MPOOLFILE **)); -int memp_fput __P((DB_MPOOLFILE *, void *, int)); -int memp_fset __P((DB_MPOOLFILE *, void *, int)); + u_int32_t, int, size_t, DB_MPOOL_FINFO *, DB_MPOOLFILE **)); +int memp_fput __P((DB_MPOOLFILE *, void *, u_int32_t)); +int memp_fset __P((DB_MPOOLFILE *, void *, u_int32_t)); int memp_fsync __P((DB_MPOOLFILE *)); -int memp_open __P((const char *, int, int, DB_ENV *, DB_MPOOL **)); +int memp_open __P((const char *, u_int32_t, int, DB_ENV *, DB_MPOOL **)); int memp_register __P((DB_MPOOL *, int, int (*)(db_pgno_t, void *, DBT *), int (*)(db_pgno_t, void *, DBT *))); @@ -765,16 +820,21 @@ struct __db_txn_active { }; struct __db_txn_stat { - DB_LSN st_last_ckp; /* lsn of the last checkpoint */ - DB_LSN st_pending_ckp; /* last checkpoint did not finish */ - time_t st_time_ckp; /* time of last checkpoint */ - u_int32_t st_last_txnid; /* last transaction id given out */ - u_int32_t st_maxtxns; /* maximum number of active txns */ - u_int32_t st_naborts; /* number of aborted transactions */ - u_int32_t st_nbegins; /* number of begun transactions */ - u_int32_t st_ncommits; /* number of committed transactions */ - u_int32_t st_nactive; /* number of active transactions */ - DB_TXN_ACTIVE *st_txnarray; /* array of active transactions */ + DB_LSN st_last_ckp; /* lsn of the last checkpoint */ + DB_LSN st_pending_ckp; /* last checkpoint did not finish */ + time_t st_time_ckp; /* time of last checkpoint */ + u_int32_t st_last_txnid; /* last transaction id given out */ + u_int32_t st_maxtxns; /* maximum number of active txns */ + u_int32_t st_naborts; /* number of aborted transactions */ + u_int32_t st_nbegins; /* number of begun transactions */ + u_int32_t st_ncommits; /* number of committed transactions */ + u_int32_t st_nactive; /* number of active transactions */ + DB_TXN_ACTIVE + *st_txnarray; /* array of active transactions */ + u_int32_t st_region_wait; /* Region lock granted after wait. */ + u_int32_t st_region_nowait; /* Region lock granted without wait. */ + u_int32_t st_refcnt; /* Region reference count. */ + u_int32_t st_regsize; /* Region size. */ }; #if defined(__cplusplus) @@ -782,11 +842,11 @@ extern "C" { #endif int txn_abort __P((DB_TXN *)); int txn_begin __P((DB_TXNMGR *, DB_TXN *, DB_TXN **)); -int txn_checkpoint __P((const DB_TXNMGR *, int, int)); +int txn_checkpoint __P((const DB_TXNMGR *, u_int32_t, u_int32_t)); int txn_commit __P((DB_TXN *)); int txn_close __P((DB_TXNMGR *)); u_int32_t txn_id __P((DB_TXN *)); -int txn_open __P((const char *, int, int, DB_ENV *, DB_TXNMGR **)); +int txn_open __P((const char *, u_int32_t, int, DB_ENV *, DB_TXNMGR **)); int txn_prepare __P((DB_TXN *)); int txn_stat __P((DB_TXNMGR *, DB_TXN_STAT **, void *(*)(size_t))); int txn_unlink __P((const char *, int, DB_ENV *)); @@ -810,10 +870,17 @@ int txn_unlink __P((const char *, int, DB_ENV *)); */ #define DBM_SUFFIX ".db" +#if defined(_XPG4_2) +typedef struct { + char *dptr; + size_t dsize; +} datum; +#else typedef struct { char *dptr; int dsize; } datum; +#endif /* * Translate DBM calls into DB calls so that DB doesn't step on the @@ -894,7 +961,7 @@ typedef enum { typedef struct entry { char *key; - void *data; + char *data; } ENTRY; /* @@ -909,7 +976,7 @@ typedef struct entry { #if defined(__cplusplus) extern "C" { #endif -int __db_hcreate __P((unsigned int)); +int __db_hcreate __P((size_t)); void __db_hdestroy __P((void)); ENTRY *__db_hsearch __P((ENTRY, ACTION)); #if defined(__cplusplus) diff --git a/db2/include/db_185.h.src b/db2/include/db_185.h.src index a88eb4e525..a928ca8fd5 100644 --- a/db2/include/db_185.h.src +++ b/db2/include/db_185.h.src @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -36,7 +36,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)db_185.h.src 8.5 (Sleepycat) 1/15/98 + * @(#)db_185.h.src 8.7 (Sleepycat) 4/10/98 */ #ifndef _DB_185_H_ @@ -127,11 +127,11 @@ typedef struct __db { /* Structure used to pass parameters to the btree routines. */ typedef struct { #define R_DUP 0x01 /* duplicate keys */ - u_long flags; - u_int cachesize; /* bytes to cache */ - int maxkeypage; /* maximum keys per page */ - int minkeypage; /* minimum keys per page */ - u_int psize; /* page size */ + u_int32_t flags; + u_int32_t cachesize; /* bytes to cache */ + u_int32_t maxkeypage; /* maximum keys per page */ + u_int32_t minkeypage; /* minimum keys per page */ + u_int32_t psize; /* page size */ int (*compare) /* comparison function */ __P((const DBT *, const DBT *)); size_t (*prefix) /* prefix function */ @@ -144,10 +144,10 @@ typedef struct { /* Structure used to pass parameters to the hashing routines. */ typedef struct { - u_int bsize; /* bucket size */ - u_int ffactor; /* fill factor */ - u_int nelem; /* number of elements */ - u_int cachesize; /* bytes to cache */ + u_int32_t bsize; /* bucket size */ + u_int32_t ffactor; /* fill factor */ + u_int32_t nelem; /* number of elements */ + u_int32_t cachesize; /* bytes to cache */ u_int32_t /* hash function */ (*hash) __P((const void *, size_t)); int lorder; /* byte order */ @@ -158,9 +158,9 @@ typedef struct { #define R_FIXEDLEN 0x01 /* fixed-length records */ #define R_NOKEY 0x02 /* key not required */ #define R_SNAPSHOT 0x04 /* snapshot the input */ - u_long flags; - u_int cachesize; /* bytes to cache */ - u_int psize; /* page size */ + u_int32_t flags; + u_int32_t cachesize; /* bytes to cache */ + u_int32_t psize; /* page size */ int lorder; /* byte order */ size_t reclen; /* record length (fixed-length records) */ u_char bval; /* delimiting byte (variable-length records */ diff --git a/db2/include/db_am.h b/db2/include/db_am.h index 304e3fd959..0c189244a2 100644 --- a/db2/include/db_am.h +++ b/db2/include/db_am.h @@ -1,10 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)db_am.h 10.8 (Sleepycat) 1/8/98 + * @(#)db_am.h 10.9 (Sleepycat) 4/10/98 */ #ifndef _DB_AM_H #define _DB_AM_H diff --git a/db2/include/db_auto.h b/db2/include/db_auto.h index 4c7b4da970..1b07c748e8 100644 --- a/db2/include/db_auto.h +++ b/db2/include/db_auto.h @@ -114,6 +114,9 @@ typedef struct _db_noop_args { u_int32_t type; DB_TXN *txnid; DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN prevlsn; } __db_noop_args; #endif diff --git a/db2/include/db_cxx.h b/db2/include/db_cxx.h index 83523c5559..fc04d5d66b 100644 --- a/db2/include/db_cxx.h +++ b/db2/include/db_cxx.h @@ -1,10 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)db_cxx.h 10.13 (Sleepycat) 11/25/97 + * @(#)db_cxx.h 10.17 (Sleepycat) 5/2/98 */ #ifndef _DB_CXX_H_ @@ -178,11 +178,11 @@ class _exported DbLock friend DbLockTab; public: - DbLock(unsigned int); + DbLock(u_int); DbLock(); - unsigned int get_lock_id(); - void set_lock_id(unsigned int); + u_int get_lock_id(); + void set_lock_id(u_int); int put(DbLockTab *locktab); @@ -202,16 +202,16 @@ class _exported DbLockTab friend DbEnv; public: int close(); - int detect(int flags, int atype); - int get(u_int32_t locker, int flags, const Dbt *obj, + int detect(u_int32_t flags, int atype); + int get(u_int32_t locker, u_int32_t flags, const Dbt *obj, db_lockmode_t lock_mode, DbLock *lock); int id(u_int32_t *idp); - int vec(u_int32_t locker, int flags, DB_LOCKREQ list[], + int vec(u_int32_t locker, u_int32_t flags, DB_LOCKREQ list[], int nlist, DB_LOCKREQ **elistp); // Create or remove new locktab files // - static int open(const char *dir, int flags, int mode, + static int open(const char *dir, u_int32_t flags, int mode, DbEnv* dbenv, DbLockTab **regionp); static int unlink(const char *dir, int force, DbEnv* dbenv); @@ -252,13 +252,13 @@ class _exported DbLog { friend DbEnv; public: - int archive(char **list[], int flags, void *(*db_malloc)(size_t)); + int archive(char **list[], u_int32_t flags, void *(*db_malloc)(size_t)); int close(); static int compare(const DbLsn *lsn0, const DbLsn *lsn1); int file(DbLsn *lsn, char *namep, int len); int flush(const DbLsn *lsn); - int get(DbLsn *lsn, Dbt *data, int flags); - int put(DbLsn *lsn, const Dbt *data, int flags); + int get(DbLsn *lsn, Dbt *data, u_int32_t flags); + int put(DbLsn *lsn, const Dbt *data, u_int32_t flags); // Normally these would be called register and unregister to // parallel the C interface, but "register" is a reserved word. @@ -268,7 +268,7 @@ public: // Create or remove new log files // - static int open(const char *dir, int flags, int mode, + static int open(const char *dir, u_int32_t flags, int mode, DbEnv* dbenv, DbLog **regionp); static int unlink(const char *dir, int force, DbEnv* dbenv); @@ -300,17 +300,17 @@ private: class _exported DbMpoolFile { +friend DbEnv; public: int close(); - int get(db_pgno_t *pgnoaddr, int flags, void *pagep); - int put(void *pgaddr, int flags); - int set(void *pgaddr, int flags); + int get(db_pgno_t *pgnoaddr, u_int32_t flags, void *pagep); + int put(void *pgaddr, u_int32_t flags); + int set(void *pgaddr, u_int32_t flags); int sync(); static int open(DbMpool *mp, const char *file, - int ftype, int flags, int mode, - size_t pagesize, int lsn_offset, - Dbt *pgcookie, u_int8_t *uid, DbMpoolFile **mpf); + u_int32_t flags, int mode, size_t pagesize, + DB_MPOOL_FINFO *finfop, DbMpoolFile **mpf); private: // We can add data to this class if needed @@ -356,7 +356,7 @@ public: // Create or remove new mpool files // - static int open(const char *dir, int flags, int mode, + static int open(const char *dir, u_int32_t flags, int mode, DbEnv* dbenv, DbMpool **regionp); static int unlink(const char *dir, int force, DbEnv* dbenv); @@ -391,13 +391,13 @@ class _exported DbTxnMgr friend DbEnv; public: int begin(DbTxn *pid, DbTxn **tid); - int checkpoint(int kbyte, int min) const; + int checkpoint(u_int32_t kbyte, u_int32_t min) const; int close(); int stat(DB_TXN_STAT **statp, void *(*db_malloc)(size_t)); // Create or remove new txnmgr files // - static int open(const char *dir, int flags, int mode, + static int open(const char *dir, u_int32_t flags, int mode, DbEnv* dbenv, DbTxnMgr **regionp); static int unlink(const char *dir, int force, DbEnv* dbenv); @@ -510,12 +510,12 @@ public: // Hash access method. // Fill factor. - unsigned int get_h_ffactor() const; - void set_h_ffactor(unsigned int); + u_int32_t get_h_ffactor() const; + void set_h_ffactor(u_int32_t); // Number of elements. - unsigned int get_h_nelem() const; - void set_h_nelem(unsigned int); + u_int32_t get_h_nelem() const; + void set_h_nelem(u_int32_t); // Hash function. typedef u_int32_t (*h_hash_fcn)(const void *, u_int32_t); @@ -584,7 +584,7 @@ public: // application with these arguments. Do not use it if you // need to set other parameters via the access methods. // - DbEnv(const char *homeDir, char *const *db_config, int flags); + DbEnv(const char *homeDir, char *const *db_config, u_int32_t flags); // Use this constructor if you wish to *delay* the initialization // of the db library. This is useful if you need to set @@ -596,7 +596,7 @@ public: // Used in conjunction with the default constructor to // complete the initialization of the db library. // - int appinit(const char *homeDir, char *const *db_config, int flags); + int appinit(const char *homeDir, char *const *db_config, u_int32_t flags); // Called automatically when DbEnv is destroyed, or can be // called at any time to shut down Db. @@ -673,8 +673,8 @@ public: void set_lk_modes(int); // Maximum number of locks. - unsigned int get_lk_max() const; - void set_lk_max(unsigned int); + u_int32_t get_lk_max() const; + void set_lk_max(u_int32_t); // Deadlock detect on every conflict. u_int32_t get_lk_detect() const; @@ -714,8 +714,8 @@ public: DbTxnMgr *get_tx_info() const; // Maximum number of transactions. - unsigned int get_tx_max() const; - void set_tx_max(unsigned int); + u_int32_t get_tx_max() const; + void set_tx_max(u_int32_t); // Dispatch function for recovery. typedef int (*tx_recover_fcn)(DB_LOG *, DBT *, DB_LSN *, int, void *); @@ -781,18 +781,18 @@ class _exported Db friend DbEnv; public: - int close(int flags); + int close(u_int32_t flags); int cursor(DbTxn *txnid, Dbc **cursorp); - int del(DbTxn *txnid, Dbt *key, int flags); + int del(DbTxn *txnid, Dbt *key, u_int32_t flags); int fd(int *fdp); - int get(DbTxn *txnid, Dbt *key, Dbt *data, int flags); - int put(DbTxn *txnid, Dbt *key, Dbt *data, int flags); - int stat(void *sp, void *(*db_malloc)(size_t), int flags); - int sync(int flags); + int get(DbTxn *txnid, Dbt *key, Dbt *data, u_int32_t flags); + int put(DbTxn *txnid, Dbt *key, Dbt *data, u_int32_t flags); + int stat(void *sp, void *(*db_malloc)(size_t), u_int32_t flags); + int sync(u_int32_t flags); DBTYPE get_type() const; - static int open(const char *fname, DBTYPE type, int flags, + static int open(const char *fname, DBTYPE type, u_int32_t flags, int mode, DbEnv *dbenv, DbInfo *info, Db **dbpp); private: @@ -867,9 +867,9 @@ class _exported Dbc : protected DBC public: int close(); - int del(int flags); - int get(Dbt* key, Dbt *data, int flags); - int put(Dbt* key, Dbt *data, int flags); + int del(u_int32_t flags); + int get(Dbt* key, Dbt *data, u_int32_t flags); + int put(Dbt* key, Dbt *data, u_int32_t flags); private: // No data is permitted in this class (see comment at top) diff --git a/db2/include/db_dispatch.h b/db2/include/db_dispatch.h index b93ec39b54..8f5e217402 100644 --- a/db2/include/db_dispatch.h +++ b/db2/include/db_dispatch.h @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -36,26 +36,30 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)db_dispatch.h 10.1 (Sleepycat) 4/12/97 + * @(#)db_dispatch.h 10.4 (Sleepycat) 5/3/98 */ #ifndef _DB_DISPATCH_H #define _DB_DISPATCH_H +struct __db_txnhead; typedef struct __db_txnhead DB_TXNHEAD; +struct __db_txnlist; typedef struct __db_txnlist DB_TXNLIST; + /* * Declarations and typedefs for the list of transaction IDs used during * recovery. */ - -typedef struct __db_txnhead { - LIST_HEAD(__db_headlink, _db_txnlist) head; +struct __db_txnhead { + LIST_HEAD(__db_headlink, __db_txnlist) head; u_int32_t maxid; -} __db_txnhead; + int32_t generation; +}; -typedef struct _db_txnlist { - LIST_ENTRY(_db_txnlist) links; - u_int32_t txnid; -} __db_txnlist; +struct __db_txnlist { + LIST_ENTRY(__db_txnlist) links; + u_int32_t txnid; + int32_t generation; +}; #define DB_log_BEGIN 0 #define DB_txn_BEGIN 5 diff --git a/db2/include/db_ext.h b/db2/include/db_ext.h index 122d8f13c1..8a03db9f64 100644 --- a/db2/include/db_ext.h +++ b/db2/include/db_ext.h @@ -53,7 +53,8 @@ int __db_debug_print __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __db_debug_read __P((void *, __db_debug_args **)); int __db_noop_log - __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t)); + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, DB_LSN *)); int __db_noop_print __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __db_noop_read __P((void *, __db_noop_args **)); @@ -67,8 +68,9 @@ int __db_add_recovery __P((DB_ENV *, int __db_txnlist_init __P((void *)); int __db_txnlist_add __P((void *, u_int32_t)); int __db_txnlist_find __P((void *, u_int32_t)); -void __db_txnlist_print __P((void *)); void __db_txnlist_end __P((void *)); +void __db_txnlist_gen __P((void *, int)); +void __db_txnlist_print __P((void *)); int __db_dput __P((DB *, DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **))); int __db_drem __P((DB *, @@ -83,7 +85,7 @@ int __db_goff __P((DB *, DBT *, u_int32_t, db_pgno_t, void **, u_int32_t *)); int __db_poff __P((DB *, const DBT *, db_pgno_t *, int (*)(DB *, u_int32_t, PAGE **))); -int __db_ovref __P((DB *, db_pgno_t, int)); +int __db_ovref __P((DB *, db_pgno_t, int32_t)); int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *))); int __db_moff __P((DB *, const DBT *, db_pgno_t)); void __db_loadme __P((void)); @@ -97,7 +99,8 @@ int __db_prnpage __P((DB_MPOOLFILE *, db_pgno_t)); int __db_prpage __P((PAGE *, int)); int __db_isbad __P((PAGE *, int)); void __db_pr __P((u_int8_t *, u_int32_t)); -void __db_prflags __P((u_int32_t, const FN *)); +int __db_prdbt __P((DBT *, int, FILE *)); +void __db_prflags __P((u_int32_t, const FN *, FILE *)); int __db_addrem_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __db_split_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); @@ -108,8 +111,7 @@ int __db_relink_recover int __db_addpage_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __db_debug_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); -int __db_noop_recover - __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __db_noop_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __db_ret __P((DB *, PAGE *, u_int32_t, DBT *, void **, u_int32_t *)); int __db_retcopy __P((DBT *, diff --git a/db2/include/db_int.h.src b/db2/include/db_int.h.src index 48790d6c9a..d67e2c428c 100644 --- a/db2/include/db_int.h.src +++ b/db2/include/db_int.h.src @@ -1,10 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)db_int.h.src 10.41 (Sleepycat) 1/8/98 + * @(#)db_int.h.src 10.62 (Sleepycat) 5/23/98 */ #ifndef _DB_INTERNAL_H_ @@ -12,8 +12,6 @@ #include "db.h" /* Standard DB include file. */ #include "queue.h" -#include "os_func.h" -#include "os_ext.h" /******************************************************* * General purpose constants and macros. @@ -77,8 +75,8 @@ #define R_ADDR(base, offset) ((void *)((u_int8_t *)((base)->addr) + offset)) #define R_OFFSET(base, p) ((u_int8_t *)(p) - (u_int8_t *)(base)->addr) -/* Free and free-string macros that overwrite memory during debugging. */ -#ifdef DEBUG +/* Free and free-string macros that overwrite memory. */ +#ifdef DIAGNOSTIC #undef FREE #define FREE(p, len) { \ memset(p, 0xff, len); \ @@ -117,36 +115,41 @@ typedef struct __fn { #undef DB_LINE #define DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" +/* Global variables. */ +typedef struct __db_globals { + int db_mutexlocks; /* DB_MUTEXLOCKS */ + int db_region_anon; /* DB_REGION_ANON, DB_REGION_NAME */ + int db_region_init; /* DB_REGION_INIT */ + int db_tsl_spins; /* DB_TSL_SPINS */ + int db_pageyield; /* DB_PAGEYIELD */ +} DB_GLOBALS; +extern DB_GLOBALS __db_global_values; +#define DB_GLOBAL(v) __db_global_values.v + /* Unused, or not-used-yet variable. "Shut that bloody compiler up!" */ #define COMPQUIET(n, v) (n) = (v) +/* + * Win16 needs specific syntax on callback functions. Nobody else cares. + */ +#ifndef DB_CALLBACK +#define DB_CALLBACK /* Nothing. */ +#endif + /******************************************************* * Files. *******************************************************/ -#ifndef MAXPATHLEN /* Maximum path length. */ -#ifdef PATH_MAX -#define MAXPATHLEN PATH_MAX -#else + /* + * We use 1024 as the maximum path length. It's too hard to figure out what + * the real path length is, as it was traditionally stored in <sys/param.h>, + * and that file isn't always available. + */ +#undef MAXPATHLEN #define MAXPATHLEN 1024 -#endif -#endif #define PATH_DOT "." /* Current working directory. */ #define PATH_SEPARATOR "/" /* Path separator character. */ -#ifndef S_IRUSR /* UNIX specific file permissions. */ -#define S_IRUSR 0000400 /* R for owner */ -#define S_IWUSR 0000200 /* W for owner */ -#define S_IRGRP 0000040 /* R for group */ -#define S_IWGRP 0000020 /* W for group */ -#define S_IROTH 0000004 /* R for other */ -#define S_IWOTH 0000002 /* W for other */ -#endif - -#ifndef S_ISDIR /* UNIX specific: directory test. */ -#define S_ISDIR(m) ((m & 0170000) == 0040000) -#endif - /******************************************************* * Mutex support. *******************************************************/ @@ -176,12 +179,12 @@ typedef struct __fn { typedef struct _db_mutex_t { #ifdef HAVE_SPINLOCKS tsl_t tsl_resource; /* Resource test and set. */ -#ifdef DEBUG - u_long pid; /* Lock holder: 0 or process pid. */ +#ifdef DIAGNOSTIC + u_int32_t pid; /* Lock holder: 0 or process pid. */ #endif #else u_int32_t off; /* Backing file offset. */ - u_long pid; /* Lock holder: 0 or process pid. */ + u_int32_t pid; /* Lock holder: 0 or process pid. */ #endif u_int32_t spins; /* Spins before block. */ u_int32_t mutex_set_wait; /* Granted after wait. */ @@ -195,11 +198,11 @@ typedef struct _db_mutex_t { *******************************************************/ /* Lock/unlock a DB thread. */ #define DB_THREAD_LOCK(dbp) \ - (F_ISSET(dbp, DB_AM_THREAD) ? \ - __db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1) : 0) + if (F_ISSET(dbp, DB_AM_THREAD)) \ + (void)__db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1); #define DB_THREAD_UNLOCK(dbp) \ - (F_ISSET(dbp, DB_AM_THREAD) ? \ - __db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1) : 0) + if (F_ISSET(dbp, DB_AM_THREAD)) \ + (void)__db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1); /* Btree/recno local statistics structure. */ struct __db_bt_lstat; typedef struct __db_bt_lstat DB_BTREE_LSTAT; @@ -228,7 +231,7 @@ typedef enum { } APPNAME; /******************************************************* - * Regions. + * Shared memory regions. *******************************************************/ /* * The shared memory regions share an initial structure so that the general @@ -240,16 +243,69 @@ typedef enum { */ typedef struct _rlayout { db_mutex_t lock; /* Region mutex. */ +#define DB_REGIONMAGIC 0x120897 + u_int32_t valid; /* Valid magic number. */ u_int32_t refcnt; /* Region reference count. */ size_t size; /* Region length. */ int majver; /* Major version number. */ int minver; /* Minor version number. */ int patch; /* Patch version number. */ +#define INVALID_SEGID -1 + int segid; /* shmget(2) ID, or Win16 segment ID. */ -#define DB_R_DELETED 0x01 /* Region was deleted. */ +#define REGION_ANONYMOUS 0x01 /* Region is/should be in anon mem. */ u_int32_t flags; } RLAYOUT; +/* + * DB creates all regions on 4K boundaries out of sheer paranoia, so that + * we don't make the underlying VM unhappy. + */ +#define DB_VMPAGESIZE (4 * 1024) +#define DB_ROUNDOFF(i) { \ + (i) += DB_VMPAGESIZE - 1; \ + (i) -= (i) % DB_VMPAGESIZE; \ +} + +/* + * The interface to region attach is nasty, there is a lot of complex stuff + * going on, which has to be retained between create/attach and detach. The + * REGINFO structure keeps track of it. + */ +struct __db_reginfo; typedef struct __db_reginfo REGINFO; +struct __db_reginfo { + /* Arguments. */ + DB_ENV *dbenv; /* Region naming info. */ + APPNAME appname; /* Region naming info. */ + char *path; /* Region naming info. */ + const char *file; /* Region naming info. */ + int mode; /* Region mode, if a file. */ + size_t size; /* Region size. */ + u_int32_t dbflags; /* Region file open flags, if a file. */ + + /* Results. */ + char *name; /* Region name. */ + void *addr; /* Region address. */ + int fd; /* Fcntl(2) locking file descriptor. + NB: this is only valid if a regular + file is backing the shared region, + and mmap(2) is being used to map it + into our address space. */ + int segid; /* shmget(2) ID, or Win16 segment ID. */ + + /* Shared flags. */ +/* 0x0001 COMMON MASK with RLAYOUT structure. */ +#define REGION_CANGROW 0x0002 /* Can grow. */ +#define REGION_CREATED 0x0004 /* Created. */ +#define REGION_HOLDINGSYS 0x0008 /* Holding system resources. */ +#define REGION_LASTDETACH 0x0010 /* Delete on last detach. */ +#define REGION_MALLOC 0x0020 /* Created in malloc'd memory. */ +#define REGION_PRIVATE 0x0040 /* Private to thread/process. */ +#define REGION_REMOVED 0x0080 /* Already deleted. */ +#define REGION_SIZEDEF 0x0100 /* Use default region size if exists. */ + u_int32_t flags; +}; + /******************************************************* * Mpool. *******************************************************/ @@ -281,7 +337,7 @@ typedef struct __dbpginfo { #define DB_LOGGING(dbp) \ (F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER)) -#ifdef DEBUG +#ifdef DIAGNOSTIC /* * Debugging macro to log operations. * If DEBUG_WOP is defined, log operations that modify the database. @@ -318,7 +374,7 @@ typedef struct __dbpginfo { #else #define DEBUG_LREAD(D, T, O, K, A, F) #define DEBUG_LWRITE(D, T, O, K, A, F) -#endif /* DEBUG */ +#endif /* DIAGNOSTIC */ /******************************************************* * Transactions and recovery. @@ -339,4 +395,8 @@ struct __db_txn { size_t off; /* Detail structure within region. */ TAILQ_ENTRY(__db_txn) links; }; + +#include "os_func.h" +#include "os_ext.h" + #endif /* !_DB_INTERNAL_H_ */ diff --git a/db2/include/db_page.h b/db2/include/db_page.h index 30f6072fc3..e1846cbbbd 100644 --- a/db2/include/db_page.h +++ b/db2/include/db_page.h @@ -1,10 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)db_page.h 10.13 (Sleepycat) 9/24/97 + * @(#)db_page.h 10.15 (Sleepycat) 5/1/98 */ #ifndef _DB_PAGE_H_ @@ -29,6 +29,14 @@ #define PGNO_INVALID 0 /* Metadata page number, therefore illegal. */ #define PGNO_ROOT 1 /* Root is page #1. */ +/* + * When we create pages in mpool, we ask mpool to clear some number of bytes + * in the header. This number must be at least as big as the regular page + * headers and cover enough of the btree and hash meta-data pages to obliterate + * the magic and version numbers. + */ +#define DB_PAGE_CLEAR_LEN 32 + /************************************************************************ BTREE METADATA PAGE LAYOUT ************************************************************************/ diff --git a/db2/include/db_shash.h b/db2/include/db_shash.h index b94e0f1d41..35ade395fc 100644 --- a/db2/include/db_shash.h +++ b/db2/include/db_shash.h @@ -1,10 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)db_shash.h 10.2 (Sleepycat) 9/16/97 + * @(#)db_shash.h 10.3 (Sleepycat) 4/10/98 */ /* Hash Headers */ diff --git a/db2/include/db_swap.h b/db2/include/db_swap.h index 278282f5e4..9f94ed721b 100644 --- a/db2/include/db_swap.h +++ b/db2/include/db_swap.h @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -36,7 +36,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)db_swap.h 10.3 (Sleepycat) 6/10/97 + * @(#)db_swap.h 10.5 (Sleepycat) 4/10/98 */ #ifndef _DB_SWAP_H_ @@ -74,7 +74,7 @@ /* * Little endian <==> big endian 16-bit swap macros. * M_16_SWAP swap a memory location - * P_16_COPY copy potentially unaligned from one location to another + * P_16_COPY copy potentially unaligned 2 byte quantities * P_16_SWAP swap a referenced memory location */ #define M_16_SWAP(a) { \ diff --git a/db2/include/hash.h b/db2/include/hash.h index ae6d3843c6..e55c2102cb 100644 --- a/db2/include/hash.h +++ b/db2/include/hash.h @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -43,7 +43,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)hash.h 10.7 (Sleepycat) 11/1/97 + * @(#)hash.h 10.8 (Sleepycat) 4/10/98 */ /* Cursor structure definitions. */ diff --git a/db2/include/hash_ext.h b/db2/include/hash_ext.h index 9b97d35a42..7086adcc44 100644 --- a/db2/include/hash_ext.h +++ b/db2/include/hash_ext.h @@ -2,7 +2,7 @@ #ifndef _hash_ext_h_ #define _hash_ext_h_ int __ham_open __P((DB *, DB_INFO *)); -int __ham_close __P((DB *)); +int __ham_close __P((DB *)); int __ham_c_iclose __P((DB *, DBC *)); int __ham_expand_table __P((HTAB *)); u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t)); @@ -75,7 +75,7 @@ int __ham_mswap __P((void *)); #ifdef DEBUG void __ham_dump_bucket __P((HTAB *, u_int32_t)); #endif -int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int)); +int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t)); void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t)); u_int32_t __ham_func2 __P((const void *, u_int32_t)); u_int32_t __ham_func3 __P((const void *, u_int32_t)); @@ -90,14 +90,16 @@ int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); void __ham_putitem __P((PAGE *p, const DBT *, int)); +void __ham_reputpair + __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *)); int __ham_del_pair __P((HTAB *, HASH_CURSOR *, int)); int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t)); void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t, int32_t, DBT *)); int __ham_split_page __P((HTAB *, u_int32_t, u_int32_t)); -int __ham_add_el __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *, - int)); -void __ham_copy_item __P((HTAB *, PAGE *, int, PAGE *)); +int __ham_add_el + __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *, int)); +void __ham_copy_item __P((HTAB *, PAGE *, u_int32_t, PAGE *)); int __ham_add_ovflpage __P((HTAB *, PAGE *, int, PAGE **)); int __ham_new_page __P((HTAB *, u_int32_t, u_int32_t, PAGE **)); int __ham_del_page __P((DB *, PAGE *)); @@ -106,12 +108,12 @@ int __ham_dirty_page __P((HTAB *, PAGE *)); int __ham_get_page __P((DB *, db_pgno_t, PAGE **)); int __ham_overflow_page __P((DB *, u_int32_t, PAGE **)); #ifdef DEBUG -int __bucket_to_page __P((HTAB *, int)); +db_pgno_t __bucket_to_page __P((HTAB *, db_pgno_t)); #endif void __ham_init_ovflpages __P((HTAB *)); int __ham_get_cpage __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); -int __ham_next_cpage __P((HTAB *, HASH_CURSOR *, db_pgno_t, - int, int)); +int __ham_next_cpage + __P((HTAB *, HASH_CURSOR *, db_pgno_t, int, u_int32_t)); void __ham_dpair __P((DB *, PAGE *, u_int32_t)); int __ham_insdel_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); diff --git a/db2/include/lock.h b/db2/include/lock.h index 5031b65d06..47a38b8783 100644 --- a/db2/include/lock.h +++ b/db2/include/lock.h @@ -1,16 +1,19 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)lock.h 10.10 (Sleepycat) 11/13/97 + * @(#)lock.h 10.15 (Sleepycat) 5/10/98 */ typedef struct __db_lockobj DB_LOCKOBJ; #define DB_DEFAULT_LOCK_FILE "__db_lock.share" -#define DB_LOCK_DEFAULT_N 5000 + +#ifndef DB_LOCK_DEFAULT_N +#define DB_LOCK_DEFAULT_N 5000 /* Default # of locks in region. */ +#endif /* * The locker id space is divided between the transaction manager and the lock @@ -54,9 +57,9 @@ struct __db_lockregion { /* Macros to lock/unlock the region. */ #define LOCK_LOCKREGION(lt) \ - (void)__db_mutex_lock(&(lt)->region->hdr.lock, (lt)->fd) + (void)__db_mutex_lock(&(lt)->region->hdr.lock, (lt)->reginfo.fd) #define UNLOCK_LOCKREGION(lt) \ - (void)__db_mutex_unlock(&(lt)->region->hdr.lock, (lt)->fd) + (void)__db_mutex_unlock(&(lt)->region->hdr.lock, (lt)->reginfo.fd) /* * Since we will be keeping DBTs in shared memory, we need the equivalent @@ -69,9 +72,6 @@ typedef struct __sh_dbt { #define SH_DBT_PTR(p) ((void *)(((u_int8_t *)(p)) + (p)->off)) -/* - * The lock table is the per-process cookie returned from a lock_open call. - */ struct __db_lockobj { SH_DBT lockobj; /* Identifies object locked. */ SH_TAILQ_ENTRY links; /* Links for free list. */ @@ -98,12 +98,14 @@ struct __db_lockobj { #define holders dlinks._holders #define heldby dlinks._heldby +/* + * The lock table is the per-process cookie returned from a lock_open call. + */ struct __db_locktab { DB_ENV *dbenv; /* Environment. */ - int fd; /* mapped file descriptor */ - DB_LOCKREGION *region; /* address of shared memory region */ + REGINFO reginfo; /* Region information. */ + DB_LOCKREGION *region; /* Address of shared memory region. */ DB_HASHTAB *hashtab; /* Beginning of hash table. */ - size_t reg_size; /* last known size of lock region */ void *mem; /* Beginning of string space. */ u_int8_t *conflicts; /* Pointer to conflict matrix. */ }; @@ -113,21 +115,6 @@ struct __db_locktab { T->conflicts[HELD * T->region->nmodes + WANTED] /* - * Status of a lock. - */ -typedef enum { - DB_LSTAT_ABORTED, /* Lock belongs to an aborted txn. */ - DB_LSTAT_ERR, /* Lock is bad. */ - DB_LSTAT_FREE, /* Lock is unallocated. */ - DB_LSTAT_HELD, /* Lock is currently held. */ - DB_LSTAT_NOGRANT, /* Lock was not granted. */ - DB_LSTAT_PENDING, /* Lock was waiting and has been - * promoted; waiting for the owner - * to run and upgrade it to held. */ - DB_LSTAT_WAITING /* Lock is on the wait queue. */ -} db_status_t; - -/* * Resources in the lock region. Used to indicate which resource * is running low when we need to grow the region. */ @@ -187,17 +174,4 @@ struct __db_lock { ALIGN((N) * sizeof(DB_LOCKOBJ), sizeof(size_t)) + \ ALIGN(STRING_SIZE(N), sizeof(size_t))) -#ifdef DEBUG -#define LOCK_DEBUG_LOCKERS 0x0001 -#define LOCK_DEBUG_LOCK 0x0002 -#define LOCK_DEBUG_OBJ 0x0004 -#define LOCK_DEBUG_CONF 0x0008 -#define LOCK_DEBUG_MEM 0x0010 -#define LOCK_DEBUG_BUCKET 0x0020 -#define LOCK_DEBUG_OBJECTS 0x0040 -#define LOCK_DEBUG_ALL 0xFFFF - -#define LOCK_DEBUG_NOMUTEX 0x0100 -#endif - #include "lock_ext.h" diff --git a/db2/include/lock_ext.h b/db2/include/lock_ext.h index d983b29069..1e0522c6b5 100644 --- a/db2/include/lock_ext.h +++ b/db2/include/lock_ext.h @@ -1,14 +1,17 @@ /* DO NOT EDIT: automatically built by dist/distrib. */ #ifndef _lock_ext_h_ #define _lock_ext_h_ -void __lock_dump_region __P((DB_LOCKTAB *, u_int)); int __lock_is_locked __P((DB_LOCKTAB *, u_int32_t, DBT *, db_lockmode_t)); +void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int)); int __lock_getobj __P((DB_LOCKTAB *, u_int32_t, const DBT *, u_int32_t type, DB_LOCKOBJ **)); +int __lock_validate_region __P((DB_LOCKTAB *)); +int __lock_grow_region __P((DB_LOCKTAB *, int, size_t)); +void __lock_dump_region __P((DB_LOCKTAB *, char *, FILE *)); int __lock_cmp __P((const DBT *, DB_LOCKOBJ *)); int __lock_locker_cmp __P((u_int32_t, DB_LOCKOBJ *)); -int __lock_ohash __P((const DBT *)); -u_int32_t __lock_locker_hash __P((u_int32_t)); +u_int32_t __lock_ohash __P((const DBT *)); u_int32_t __lock_lhash __P((DB_LOCKOBJ *)); +u_int32_t __lock_locker_hash __P((u_int32_t)); #endif /* _lock_ext_h_ */ diff --git a/db2/include/log.h b/db2/include/log.h index 4e27b038d3..7d5161cc9d 100644 --- a/db2/include/log.h +++ b/db2/include/log.h @@ -1,10 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)log.h 10.19 (Sleepycat) 1/17/98 + * @(#)log.h 10.25 (Sleepycat) 4/10/98 */ #ifndef _LOG_H_ @@ -15,9 +15,10 @@ struct __hdr; typedef struct __hdr HDR; struct __log; typedef struct __log LOG; struct __log_persist; typedef struct __log_persist LOGP; +#ifndef MAXLFNAME #define MAXLFNAME 99999 /* Maximum log file name. */ #define LFNAME "log.%05d" /* Log file name template. */ - +#endif /* Default log name. */ #define DB_DEFAULT_LOG_FILE "__db_log.share" @@ -31,17 +32,19 @@ struct __log_persist; typedef struct __log_persist LOGP; if (F_ISSET(dblp, DB_AM_THREAD)) \ (void)__db_mutex_unlock((dblp)->mutexp, -1); #define LOCK_LOGREGION(dblp) \ - (void)__db_mutex_lock(&((RLAYOUT *)(dblp)->lp)->lock, (dblp)->fd) + (void)__db_mutex_lock(&((RLAYOUT *)(dblp)->lp)->lock, \ + (dblp)->reginfo.fd) #define UNLOCK_LOGREGION(dblp) \ - (void)__db_mutex_unlock(&((RLAYOUT *)(dblp)->lp)->lock, (dblp)->fd) + (void)__db_mutex_unlock(&((RLAYOUT *)(dblp)->lp)->lock, \ + (dblp)->reginfo.fd) /* * The per-process table that maps log file-id's to DB structures. */ typedef struct __db_entry { - DB *dbp; /* Associated DB structure. */ - int refcount; /* Reference counted. */ - int deleted; /* File was not found during open. */ + DB *dbp; /* Associated DB structure. */ + u_int32_t refcount; /* Reference counted. */ + int deleted; /* File was not found during open. */ } DB_ENTRY; /* @@ -75,10 +78,9 @@ struct __db_log { LOG *lp; /* Address of the shared LOG. */ DB_ENV *dbenv; /* Reference to error information. */ + REGINFO reginfo; /* Region information. */ - void *maddr; /* Address of mmap'd region. */ void *addr; /* Address of shalloc() region. */ - int fd; /* Region file descriptor. */ char *dir; /* Directory argument. */ @@ -131,7 +133,7 @@ struct __log { u_int32_t w_off; /* Current write offset in the file. */ - DB_LSN c_lsn; /* LSN of the last checkpoint. */ + DB_LSN chkpt_lsn; /* LSN of the last checkpoint. */ time_t chkpt; /* Time of the last checkpoint. */ DB_LOG_STAT stat; /* Log statistics. */ @@ -159,9 +161,8 @@ struct __fname { u_int32_t id; /* Logging file id. */ DBTYPE s_type; /* Saved DB type. */ - u_int32_t fileid_off; /* Unique file id offset. */ - size_t name_off; /* Name offset. */ + u_int8_t ufid[DB_FILE_ID_LEN]; /* Unique file id. */ }; /* File open/close register log record opcodes. */ diff --git a/db2/include/log_ext.h b/db2/include/log_ext.h index 8640b134cd..bf3bcb02ce 100644 --- a/db2/include/log_ext.h +++ b/db2/include/log_ext.h @@ -13,8 +13,8 @@ int __log_register_read __P((void *, __log_register_args **)); int __log_init_print __P((DB_ENV *)); int __log_init_recover __P((DB_ENV *)); int __log_findckp __P((DB_LOG *, DB_LSN *)); -int __log_get __P((DB_LOG *, DB_LSN *, DBT *, int, int)); -int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, int)); +int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int)); +int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t)); int __log_name __P((DB_LOG *, int, char **)); int __log_register_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); diff --git a/db2/include/mp.h b/db2/include/mp.h index 4efbf9b95e..8635efa722 100644 --- a/db2/include/mp.h +++ b/db2/include/mp.h @@ -1,10 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)mp.h 10.25 (Sleepycat) 1/8/98 + * @(#)mp.h 10.33 (Sleepycat) 5/4/98 */ struct __bh; typedef struct __bh BH; @@ -16,10 +16,12 @@ struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE; #define DB_DEFAULT_MPOOL_FILE "__db_mpool.share" /* - * We default to 128K (16 8K pages) if the user doesn't specify, and + * We default to 128K (16 8K pages) if the user doesn't specify, and * require a minimum of 20K. */ +#ifndef DB_CACHESIZE_DEF #define DB_CACHESIZE_DEF (128 * 1024) +#endif #define DB_CACHESIZE_MIN ( 20 * 1024) #define INVALID 0 /* Invalid shared memory offset. */ @@ -79,30 +81,30 @@ struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE; #define LOCKINIT(dbmp, mutexp) \ if (F_ISSET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION)) \ (void)__db_mutex_init(mutexp, \ - MUTEX_LOCK_OFFSET((dbmp)->maddr, mutexp)) + MUTEX_LOCK_OFFSET((dbmp)->reginfo.addr, mutexp)) #define LOCKHANDLE(dbmp, mutexp) \ if (F_ISSET(dbmp, MP_LOCKHANDLE)) \ - (void)__db_mutex_lock(mutexp, (dbmp)->fd) + (void)__db_mutex_lock(mutexp, (dbmp)->reginfo.fd) #define UNLOCKHANDLE(dbmp, mutexp) \ if (F_ISSET(dbmp, MP_LOCKHANDLE)) \ - (void)__db_mutex_unlock(mutexp, (dbmp)->fd) + (void)__db_mutex_unlock(mutexp, (dbmp)->reginfo.fd) #define LOCKREGION(dbmp) \ if (F_ISSET(dbmp, MP_LOCKREGION)) \ (void)__db_mutex_lock(&((RLAYOUT *)(dbmp)->mp)->lock, \ - (dbmp)->fd) + (dbmp)->reginfo.fd) #define UNLOCKREGION(dbmp) \ if (F_ISSET(dbmp, MP_LOCKREGION)) \ (void)__db_mutex_unlock(&((RLAYOUT *)(dbmp)->mp)->lock, \ - (dbmp)->fd) + (dbmp)->reginfo.fd) #define LOCKBUFFER(dbmp, bhp) \ if (F_ISSET(dbmp, MP_LOCKREGION)) \ - (void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->fd) + (void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->reginfo.fd) #define UNLOCKBUFFER(dbmp, bhp) \ if (F_ISSET(dbmp, MP_LOCKREGION)) \ - (void)__db_mutex_unlock(&(bhp)->mutex, (dbmp)->fd) + (void)__db_mutex_unlock(&(bhp)->mutex, (dbmp)->reginfo.fd) /* * DB_MPOOL -- @@ -120,20 +122,16 @@ struct __db_mpool { /* These fields are not protected. */ DB_ENV *dbenv; /* Reference to error information. */ + REGINFO reginfo; /* Region information. */ MPOOL *mp; /* Address of the shared MPOOL. */ - void *maddr; /* Address of mmap'd region. */ void *addr; /* Address of shalloc() region. */ DB_HASHTAB *htab; /* Hash table of bucket headers. */ - int fd; /* Underlying mmap'd fd. */ - -#define MP_ISPRIVATE 0x01 /* Private, so local memory. */ -#define MP_LOCKHANDLE 0x02 /* Threaded, lock handles and region. */ -#define MP_LOCKREGION 0x04 /* Concurrent access, lock region. */ -#define MP_MALLOC 0x08 /* If region in allocated memory. */ +#define MP_LOCKHANDLE 0x01 /* Threaded, lock handles and region. */ +#define MP_LOCKREGION 0x02 /* Concurrent access, lock region. */ u_int32_t flags; }; @@ -146,8 +144,8 @@ struct __db_mpreg { int ftype; /* File type. */ /* Pgin, pgout routines. */ - int (*pgin) __P((db_pgno_t, void *, DBT *)); - int (*pgout) __P((db_pgno_t, void *, DBT *)); + int (DB_CALLBACK *pgin) __P((db_pgno_t, void *, DBT *)); + int (DB_CALLBACK *pgout) __P((db_pgno_t, void *, DBT *)); }; /* @@ -207,7 +205,7 @@ struct __mpool { size_t htab_buckets; /* Number of hash table entries. */ DB_LSN lsn; /* Maximum checkpoint LSN. */ - int lsn_cnt; /* Checkpoint buffers left to write. */ + u_int32_t lsn_cnt; /* Checkpoint buffers left to write. */ DB_MPOOL_STAT stat; /* Global mpool statistics. */ @@ -225,7 +223,9 @@ struct __mpoolfile { u_int32_t ref; /* Reference count. */ int ftype; /* File type. */ - int lsn_off; /* Page's LSN offset. */ + + int32_t lsn_off; /* Page's LSN offset. */ + u_int32_t clear_len; /* Bytes to clear on page create. */ size_t path_off; /* File name location. */ size_t fileid_off; /* File identification location. */ @@ -233,9 +233,10 @@ struct __mpoolfile { size_t pgcookie_len; /* Pgin/pgout cookie length. */ size_t pgcookie_off; /* Pgin/pgout cookie location. */ - int lsn_cnt; /* Checkpoint buffers left to write. */ + u_int32_t lsn_cnt; /* Checkpoint buffers left to write. */ db_pgno_t last_pgno; /* Last page in the file. */ + db_pgno_t orig_last_pgno; /* Original last page in the file. */ #define MP_CAN_MMAP 0x01 /* If the file can be mmap'd. */ #define MP_TEMP 0x02 /* Backing file is a temporary. */ diff --git a/db2/include/mp_ext.h b/db2/include/mp_ext.h index 1928820637..3650839475 100644 --- a/db2/include/mp_ext.h +++ b/db2/include/mp_ext.h @@ -7,13 +7,13 @@ int __memp_pgread __P((DB_MPOOLFILE *, BH *, int)); int __memp_pgwrite __P((DB_MPOOLFILE *, BH *, int *, int *)); int __memp_pg __P((DB_MPOOLFILE *, BH *, int)); void __memp_bhfree __P((DB_MPOOL *, MPOOLFILE *, BH *, int)); -int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, int, - int, int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **)); +int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, + u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **)); char * __memp_fn __P((DB_MPOOLFILE *)); char * __memp_fns __P((DB_MPOOL *, MPOOLFILE *)); -void __memp_debug __P((DB_MPOOL *, FILE *, int)); +void __memp_dump_region __P((DB_MPOOL *, char *, FILE *)); int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *)); int __memp_ropen - __P((DB_MPOOL *, const char *, size_t, int, int)); -int __memp_rclose __P((DB_MPOOL *)); + __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t)); +int __mp_xxx_fd __P((DB_MPOOLFILE *, int *)); #endif /* _mp_ext_h_ */ diff --git a/db2/include/mutex_ext.h b/db2/include/mutex_ext.h index f0e68f3659..b48da5d2f4 100644 --- a/db2/include/mutex_ext.h +++ b/db2/include/mutex_ext.h @@ -1,7 +1,7 @@ /* DO NOT EDIT: automatically built by dist/distrib. */ #ifndef _mutex_ext_h_ #define _mutex_ext_h_ -void __db_mutex_init __P((db_mutex_t *, u_int32_t)); +int __db_mutex_init __P((db_mutex_t *, u_int32_t)); int __db_mutex_lock __P((db_mutex_t *, int)); int __db_mutex_unlock __P((db_mutex_t *, int)); #endif /* _mutex_ext_h_ */ diff --git a/db2/include/os_ext.h b/db2/include/os_ext.h index 9c66a248c8..889a45a44e 100644 --- a/db2/include/os_ext.h +++ b/db2/include/os_ext.h @@ -2,23 +2,29 @@ #ifndef _os_ext_h_ #define _os_ext_h_ int __db_abspath __P((const char *)); +char *__db_strdup __P((const char *)); void *__db_calloc __P((size_t, size_t)); void *__db_malloc __P((size_t)); void *__db_realloc __P((void *, size_t)); -int __os_oldwin __P((void)); int __os_dirlist __P((const char *, char ***, int *)); void __os_dirfree __P((char **, int)); int __db_fileid __P((DB_ENV *, const char *, int, u_int8_t *)); int __db_fsync __P((int)); -int __os_map __P((int, size_t, int, int, void **)); -int __os_unmap __P((void *, size_t)); -int __db_oflags __P((int)); -int __db_open __P((const char *, int, int, int, int *)); +int __db_mapanon_ok __P((int)); +int __db_mapinit __P((void)); +int __db_mapregion __P((char *, REGINFO *)); +int __db_unmapregion __P((REGINFO *)); +int __db_unlinkregion __P((char *, REGINFO *)); +int __db_mapfile __P((char *, int, size_t, int, void **)); +int __db_unmapfile __P((void *, size_t)); +u_int32_t __db_oflags __P((int)); +int __db_omode __P((const char *)); +int __db_open __P((const char *, u_int32_t, u_int32_t, int, int *)); int __db_close __P((int)); char *__db_rpath __P((const char *)); int __db_read __P((int, void *, size_t, ssize_t *)); int __db_write __P((int, void *, size_t, ssize_t *)); -int __os_seek __P((int, size_t, db_pgno_t, u_long, int)); +int __os_seek __P((int, size_t, db_pgno_t, u_int32_t, int, int)); int __os_sleep __P((u_long, u_long)); int __os_spin __P((void)); int __os_exists __P((const char *, int *)); diff --git a/db2/include/os_func.h b/db2/include/os_func.h index b825fed5db..12794d550d 100644 --- a/db2/include/os_func.h +++ b/db2/include/os_func.h @@ -1,40 +1,40 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)os_func.h 10.5 (Sleepycat) 12/4/97 + * @(#)os_func.h 10.8 (Sleepycat) 4/19/98 */ /* Calls which can be replaced by the application. */ struct __db_jumptab { - int (*db_close) __P((int)); /* DB_FUNC_CLOSE */ - void (*db_dirfree) __P((char **, int)); /* DB_FUNC_DIRFREE */ - int (*db_dirlist) /* DB_FUNC_DIRLIST */ + int (*j_close) __P((int)); /* DB_FUNC_CLOSE */ + void (*j_dirfree) __P((char **, int)); /* DB_FUNC_DIRFREE */ + int (*j_dirlist) /* DB_FUNC_DIRLIST */ __P((const char *, char ***, int *)); - int (*db_exists) /* DB_FUNC_EXISTS */ + int (*j_exists) /* DB_FUNC_EXISTS */ __P((const char *, int *)); - void (*db_free) __P((void *)); /* DB_FUNC_FREE */ - int (*db_fsync) __P((int)); /* DB_FUNC_FSYNC */ - int (*db_ioinfo) __P((const char *, /* DB_FUNC_IOINFO */ + void (*j_free) __P((void *)); /* DB_FUNC_FREE */ + int (*j_fsync) __P((int)); /* DB_FUNC_FSYNC */ + int (*j_ioinfo) __P((const char *, /* DB_FUNC_IOINFO */ int, u_int32_t *, u_int32_t *, u_int32_t *)); - void *(*db_malloc) __P((size_t)); /* DB_FUNC_MALLOC */ - int (*db_map) /* DB_FUNC_MAP */ - __P((int, size_t, int, int, void **)); - int (*db_open) /* DB_FUNC_OPEN */ + void *(*j_malloc) __P((size_t)); /* DB_FUNC_MALLOC */ + int (*j_map) /* DB_FUNC_MAP */ + __P((char *, int, size_t, int, int, int, void **)); + int (*j_open) /* DB_FUNC_OPEN */ __P((const char *, int, ...)); - ssize_t (*db_read) __P((int, void *, size_t)); /* DB_FUNC_READ */ - void *(*db_realloc) __P((void *, size_t)); /* DB_FUNC_REALLOC */ - int (*db_seek) /* DB_FUNC_SEEK */ - __P((int, size_t, db_pgno_t, u_long, int)); - int (*db_sleep) __P((u_long, u_long)); /* DB_FUNC_SLEEP */ - char *(*db_strdup) __P((const char *)); /* DB_FUNC_STRDUP */ - int (*db_unlink) __P((const char *)); /* DB_FUNC_UNLINK */ - int (*db_unmap) __P((void *, size_t)); /* DB_FUNC_UNMAP */ - ssize_t (*db_write) /* DB_FUNC_WRITE */ + ssize_t (*j_read) __P((int, void *, size_t)); /* DB_FUNC_READ */ + void *(*j_realloc) __P((void *, size_t)); /* DB_FUNC_REALLOC */ + int (*j_runlink) __P((char *)); /* DB_FUNC_RUNLINK */ + int (*j_seek) /* DB_FUNC_SEEK */ + __P((int, size_t, db_pgno_t, u_int32_t, int, int)); + int (*j_sleep) __P((u_long, u_long)); /* DB_FUNC_SLEEP */ + int (*j_unlink) __P((const char *)); /* DB_FUNC_UNLINK */ + int (*j_unmap) __P((void *, size_t)); /* DB_FUNC_UNMAP */ + ssize_t (*j_write) /* DB_FUNC_WRITE */ __P((int, const void *, size_t)); - int (*db_yield) __P((void)); /* DB_FUNC_YIELD */ + int (*j_yield) __P((void)); /* DB_FUNC_YIELD */ }; extern struct __db_jumptab __db_jump; @@ -43,7 +43,7 @@ extern struct __db_jumptab __db_jump; * Names used by DB to call through the jump table. * * The naming scheme goes like this: if the functionality the application can - * replace is the same as the DB functionality, e.g., calloc, or dirlist, then + * replace is the same as the DB functionality, e.g., malloc, or dirlist, then * we use the name __db_XXX, and the application is expected to replace the * complete functionality, which may or may not map directly to an ANSI C or * POSIX 1003.1 interface. If the functionality that the aplication replaces @@ -53,20 +53,17 @@ extern struct __db_jumptab __db_jump; * part of DB is the only code that should use the __os_XXX names, all other * parts of DB should be calling __db_XXX functions. */ -#define __os_close __db_jump.db_close /* __db_close is a wrapper. */ -#define __db_dirfree __db_jump.db_dirfree -#define __db_dirlist __db_jump.db_dirlist -#define __db_exists __db_jump.db_exists -#define __db_free __db_jump.db_free -#define __os_fsync __db_jump.db_fsync /* __db_fsync is a wrapper. */ -#define __db_ioinfo __db_jump.db_ioinfo -#define __db_map __db_jump.db_map -#define __os_open __db_jump.db_open /* __db_open is a wrapper. */ -#define __os_read __db_jump.db_read /* __db_read is a wrapper. */ -#define __db_seek __db_jump.db_seek -#define __db_sleep __db_jump.db_sleep -#define __db_strdup __db_jump.db_strdup -#define __os_unlink __db_jump.db_unlink /* __db_unlink is a wrapper. */ -#define __db_unmap __db_jump.db_unmap -#define __os_write __db_jump.db_write /* __db_write is a wrapper. */ -#define __db_yield __db_jump.db_yield +#define __os_close __db_jump.j_close /* __db_close is a wrapper. */ +#define __db_dirfree __db_jump.j_dirfree +#define __db_dirlist __db_jump.j_dirlist +#define __db_exists __db_jump.j_exists +#define __db_free __db_jump.j_free +#define __os_fsync __db_jump.j_fsync /* __db_fsync is a wrapper. */ +#define __db_ioinfo __db_jump.j_ioinfo +#define __os_open __db_jump.j_open /* __db_open is a wrapper. */ +#define __os_read __db_jump.j_read /* __db_read is a wrapper. */ +#define __db_seek __db_jump.j_seek +#define __db_sleep __db_jump.j_sleep +#define __os_unlink __db_jump.j_unlink /* __db_unlink is a wrapper. */ +#define __os_write __db_jump.j_write /* __db_write is a wrapper. */ +#define __db_yield __db_jump.j_yield diff --git a/db2/include/queue.h b/db2/include/queue.h index 0909c86c60..f606eb0497 100644 --- a/db2/include/queue.h +++ b/db2/include/queue.h @@ -1,6 +1,6 @@ /* BSDI $Id$ */ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * diff --git a/db2/include/shqueue.h b/db2/include/shqueue.h index c596d33e92..00e5d76251 100644 --- a/db2/include/shqueue.h +++ b/db2/include/shqueue.h @@ -1,10 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)shqueue.h 8.12 (Sleepycat) 9/10/97 + * @(#)shqueue.h 8.13 (Sleepycat) 4/10/98 */ #ifndef _SYS_SHQUEUE_H_ #define _SYS_SHQUEUE_H_ diff --git a/db2/include/txn.h b/db2/include/txn.h index c64ac3fc52..a2512ed152 100644 --- a/db2/include/txn.h +++ b/db2/include/txn.h @@ -1,10 +1,10 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)txn.h 10.11 (Sleepycat) 10/25/97 + * @(#)txn.h 10.15 (Sleepycat) 4/21/98 */ #ifndef _TXN_H_ #define _TXN_H_ @@ -52,12 +52,11 @@ struct __db_txnmgr { TAILQ_HEAD(_chain, __db_txn) txn_chain; /* These fields are not protected. */ + REGINFO reginfo; /* Region information. */ DB_ENV *dbenv; /* Environment. */ int (*recover) /* Recovery dispatch routine */ __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - int fd; /* mapped file descriptor */ - u_int flags; /* DB_TXN_NOSYNC, DB_THREAD */ - size_t reg_size; /* how large we think the region is */ + u_int32_t flags; /* DB_TXN_NOSYNC, DB_THREAD */ DB_TXNREGION *region; /* address of shared memory region */ void *mem; /* address of the shalloc space */ }; @@ -102,17 +101,16 @@ struct __db_txnregion { (void)__db_mutex_unlock((tmgrp)->mutexp, -1) #define LOCK_TXNREGION(tmgrp) \ - (void)__db_mutex_lock(&(tmgrp)->region->hdr.lock, (tmgrp)->fd) + (void)__db_mutex_lock(&(tmgrp)->region->hdr.lock, (tmgrp)->reginfo.fd) #define UNLOCK_TXNREGION(tmgrp) \ - (void)__db_mutex_unlock(&(tmgrp)->region->hdr.lock, (tmgrp)->fd) + (void)__db_mutex_unlock(&(tmgrp)->region->hdr.lock, (tmgrp)->reginfo.fd) /* * Log record types. */ -#define TXN_BEGIN 1 -#define TXN_COMMIT 2 -#define TXN_PREPARE 3 -#define TXN_CHECKPOINT 4 +#define TXN_COMMIT 1 +#define TXN_PREPARE 2 +#define TXN_CHECKPOINT 3 #include "txn_auto.h" #include "txn_ext.h" diff --git a/db2/lock/lock.c b/db2/lock/lock.c index 0846d3c29f..3d20e0d65b 100644 --- a/db2/lock/lock.c +++ b/db2/lock/lock.c @@ -1,28 +1,21 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)lock.c 10.43 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)lock.c 10.52 (Sleepycat) 5/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <sys/mman.h> -#include <sys/stat.h> #include <errno.h> -#include <fcntl.h> -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> -#include <unistd.h> #endif #include "db_int.h" @@ -34,248 +27,15 @@ static const char sccsid[] = "@(#)lock.c 10.43 (Sleepycat) 1/8/98"; #include "db_am.h" static void __lock_checklocker __P((DB_LOCKTAB *, struct __db_lock *, int)); -static int __lock_count_locks __P((DB_LOCKREGION *)); -static int __lock_count_objs __P((DB_LOCKREGION *)); -static int __lock_create __P((const char *, int, DB_ENV *)); static void __lock_freeobj __P((DB_LOCKTAB *, DB_LOCKOBJ *)); -static int __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, int, const DBT *, - db_lockmode_t, struct __db_lock **)); -static int __lock_grow_region __P((DB_LOCKTAB *, int, size_t)); +static int __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, u_int32_t, + const DBT *, db_lockmode_t, struct __db_lock **)); static int __lock_put_internal __P((DB_LOCKTAB *, struct __db_lock *, int)); static void __lock_remove_waiter __P((DB_LOCKTAB *, DB_LOCKOBJ *, struct __db_lock *, db_status_t)); -static void __lock_reset_region __P((DB_LOCKTAB *)); -static int __lock_validate_region __P((DB_LOCKTAB *)); -#ifdef DEBUG -static void __lock_dump_locker __P((DB_LOCKTAB *, DB_LOCKOBJ *)); -static void __lock_dump_object __P((DB_LOCKTAB *, DB_LOCKOBJ *)); -static void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int)); -#endif - -/* - * Create and initialize a lock region in shared memory. - */ - -/* - * __lock_create -- - * Create the lock region. Returns an errno. In most cases, - * the errno should be that returned by __db_ropen, in which case - * an EAGAIN means that we should retry, and an EEXIST means that - * the region exists and we didn't need to create it. Any other - * sort of errno should be treated as a system error, leading to a - * failure of the original interface call. - */ -static int -__lock_create(path, mode, dbenv) - const char *path; - int mode; - DB_ENV *dbenv; -{ - struct __db_lock *lp; - struct lock_header *tq_head; - struct obj_header *obj_head; - DB_LOCKOBJ *op; - DB_LOCKREGION *lrp; - u_int maxlocks; - u_int32_t i; - int fd, lock_modes, nelements, ret; - const u_int8_t *conflicts; - u_int8_t *curaddr; - - maxlocks = dbenv == NULL || dbenv->lk_max == 0 ? - DB_LOCK_DEFAULT_N : dbenv->lk_max; - lock_modes = dbenv == NULL || dbenv->lk_modes == 0 ? - DB_LOCK_RW_N : dbenv->lk_modes; - conflicts = dbenv == NULL || dbenv->lk_conflicts == NULL ? - db_rw_conflicts : dbenv->lk_conflicts; - - if ((ret = - __db_rcreate(dbenv, DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, mode, - LOCK_REGION_SIZE(lock_modes, maxlocks, __db_tablesize(maxlocks)), - 0, &fd, &lrp)) != 0) - return (ret); - - /* Region exists; now initialize it. */ - lrp->table_size = __db_tablesize(maxlocks); - lrp->magic = DB_LOCKMAGIC; - lrp->version = DB_LOCKVERSION; - lrp->id = 0; - lrp->maxlocks = maxlocks; - lrp->need_dd = 0; - lrp->detect = DB_LOCK_NORUN; - lrp->numobjs = maxlocks; - lrp->nlockers = 0; - lrp->mem_bytes = ALIGN(STRING_SIZE(maxlocks), sizeof(size_t)); - lrp->increment = lrp->hdr.size / 2; - lrp->nmodes = lock_modes; - lrp->nconflicts = 0; - lrp->nrequests = 0; - lrp->nreleases = 0; - lrp->ndeadlocks = 0; - - /* - * As we write the region, we've got to maintain the alignment - * for the structures that follow each chunk. This information - * ends up being encapsulated both in here as well as in the - * lock.h file for the XXX_SIZE macros. - */ - /* Initialize conflict matrix. */ - curaddr = (u_int8_t *)lrp + sizeof(DB_LOCKREGION); - memcpy(curaddr, conflicts, lock_modes * lock_modes); - curaddr += lock_modes * lock_modes; - - /* - * Initialize hash table. - */ - curaddr = (u_int8_t *)ALIGNP(curaddr, LOCK_HASH_ALIGN); - lrp->hash_off = curaddr - (u_int8_t *)lrp; - nelements = lrp->table_size; - __db_hashinit(curaddr, nelements); - curaddr += nelements * sizeof(DB_HASHTAB); - - /* - * Initialize locks onto a free list. Since locks contains mutexes, - * we need to make sure that each lock is aligned on a MUTEX_ALIGNMENT - * boundary. - */ - curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT); - tq_head = &lrp->free_locks; - SH_TAILQ_INIT(tq_head); - - for (i = 0; i++ < maxlocks; - curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) { - lp = (struct __db_lock *)curaddr; - lp->status = DB_LSTAT_FREE; - SH_TAILQ_INSERT_HEAD(tq_head, lp, links, __db_lock); - } - - /* Initialize objects onto a free list. */ - obj_head = &lrp->free_objs; - SH_TAILQ_INIT(obj_head); - - for (i = 0; i++ < maxlocks; curaddr += sizeof(DB_LOCKOBJ)) { - op = (DB_LOCKOBJ *)curaddr; - SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj); - } - - /* - * Initialize the string space; as for all shared memory allocation - * regions, this requires size_t alignment, since we store the - * lengths of malloc'd areas in the area.. - */ - curaddr = (u_int8_t *)ALIGNP(curaddr, sizeof(size_t)); - lrp->mem_off = curaddr - (u_int8_t *)lrp; - __db_shalloc_init(curaddr, lrp->mem_bytes); - - /* Release the lock. */ - (void)__db_mutex_unlock(&lrp->hdr.lock, fd); - - /* Now unmap the region. */ - if ((ret = __db_rclose(dbenv, fd, lrp)) != 0) { - (void)lock_unlink(path, 1 /* force */, dbenv); - return (ret); - } - - return (0); -} int -lock_open(path, flags, mode, dbenv, ltp) - const char *path; - int flags, mode; - DB_ENV *dbenv; - DB_LOCKTAB **ltp; -{ - DB_LOCKTAB *lt; - int ret, retry_cnt; - - /* Validate arguments. */ -#ifdef HAVE_SPINLOCKS -#define OKFLAGS (DB_CREATE | DB_THREAD) -#else -#define OKFLAGS (DB_CREATE) -#endif - if ((ret = __db_fchk(dbenv, "lock_open", flags, OKFLAGS)) != 0) - return (ret); - - /* - * Create the lock table structure. - */ - if ((lt = (DB_LOCKTAB *)__db_calloc(1, sizeof(DB_LOCKTAB))) == NULL) { - __db_err(dbenv, "%s", strerror(ENOMEM)); - return (ENOMEM); - } - lt->dbenv = dbenv; - - /* - * Now, create the lock region if it doesn't already exist. - */ - retry_cnt = 0; -retry: if (LF_ISSET(DB_CREATE) && - (ret = __lock_create(path, mode, dbenv)) != 0) - if (ret == EAGAIN && ++retry_cnt < 3) { - (void)__db_sleep(1, 0); - goto retry; - } else if (ret == EEXIST) /* We did not create the region */ - LF_CLR(DB_CREATE); - else - goto out; - - /* - * Finally, open the region, map it in, and increment the - * reference count. - */ - retry_cnt = 0; -retry1: if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, - LF_ISSET(~(DB_CREATE | DB_THREAD)), <->fd, <->region)) != 0) { - if (ret == EAGAIN && ++retry_cnt < 3) { - (void)__db_sleep(1, 0); - goto retry1; - } - goto out; - } - - if (lt->region->magic != DB_LOCKMAGIC) { - __db_err(dbenv, "lock_open: Bad magic number"); - ret = EINVAL; - goto out; - } - - /* Check for automatic deadlock detection. */ - if (dbenv->lk_detect != DB_LOCK_NORUN) { - if (lt->region->detect != DB_LOCK_NORUN && - dbenv->lk_detect != DB_LOCK_DEFAULT && - lt->region->detect != dbenv->lk_detect) { - __db_err(dbenv, - "lock_open: incompatible deadlock detector mode"); - ret = EINVAL; - goto out; - } - if (lt->region->detect == DB_LOCK_NORUN) - lt->region->detect = dbenv->lk_detect; - } - - /* Set up remaining pointers into region. */ - lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION); - lt->hashtab = - (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off); - lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off); - lt->reg_size = lt->region->hdr.size; - - *ltp = lt; - return (0); - -/* Error handling. */ -out: if (lt->region != NULL) - (void)__db_rclose(lt->dbenv, lt->fd, lt->region); - if (LF_ISSET(DB_CREATE)) - (void)lock_unlink(path, 1, lt->dbenv); - __db_free(lt); - return (ret); -} - -int -lock_id (lt, idp) +lock_id(lt, idp) DB_LOCKTAB *lt; u_int32_t *idp; { @@ -294,8 +54,8 @@ lock_id (lt, idp) int lock_vec(lt, locker, flags, list, nlist, elistp) DB_LOCKTAB *lt; - u_int32_t locker; - int flags, nlist; + u_int32_t locker, flags; + int nlist; DB_LOCKREQ *list, **elistp; { struct __db_lock *lp; @@ -345,7 +105,7 @@ lock_vec(lt, locker, flags, list, nlist, elistp) for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); lp != NULL; lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) { - if ((ret = __lock_put_internal(lt, lp, 0)) != 0) + if ((ret = __lock_put_internal(lt, lp, 1)) != 0) break; } __lock_freeobj(lt, sh_locker); @@ -436,8 +196,7 @@ lock_vec(lt, locker, flags, list, nlist, elistp) int lock_get(lt, locker, flags, obj, lock_mode, lock) DB_LOCKTAB *lt; - u_int32_t locker; - int flags; + u_int32_t locker, flags; const DBT *obj; db_lockmode_t lock_mode; DB_LOCK *lock; @@ -496,35 +255,6 @@ lock_put(lt, lock) return (ret); } -int -lock_close(lt) - DB_LOCKTAB *lt; -{ - int ret; - - if ((ret = __db_rclose(lt->dbenv, lt->fd, lt->region)) != 0) - return (ret); - - /* Free lock table. */ - __db_free(lt); - return (0); -} - -int -lock_unlink(path, force, dbenv) - const char *path; - int force; - DB_ENV *dbenv; -{ - return (__db_runlink(dbenv, - DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, force)); -} - -/* - * XXX This looks like it could be void, but I'm leaving it returning - * an int because I think it will have to when we go through and add - * the appropriate error checking for the EINTR on mutexes. - */ static int __lock_put_internal(lt, lockp, do_all) DB_LOCKTAB *lt; @@ -593,7 +323,7 @@ __lock_put_internal(lt, lockp, do_all) SH_TAILQ_INSERT_TAIL(&sh_obj->holders, lp_w, links); /* Wake up waiter. */ - (void)__db_mutex_unlock(&lp_w->mutex, lt->fd); + (void)__db_mutex_unlock(&lp_w->mutex, lt->reginfo.fd); state_changed = 1; } @@ -626,8 +356,7 @@ __lock_put_internal(lt, lockp, do_all) static int __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp) DB_LOCKTAB *lt; - u_int32_t locker; - int flags; + u_int32_t locker, flags; const DBT *obj; db_lockmode_t lock_mode; struct __db_lock **lockp; @@ -741,7 +470,7 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp) */ (void)__db_mutex_init(&newl->mutex, MUTEX_LOCK_OFFSET(lt->region, &newl->mutex)); - (void)__db_mutex_lock(&newl->mutex, lt->fd); + (void)__db_mutex_lock(&newl->mutex, lt->reginfo.fd); /* * Now, insert the lock onto its locker's list. @@ -772,7 +501,7 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp) if (lrp->detect != DB_LOCK_NORUN) ret = lock_detect(lt, 0, lrp->detect); - (void)__db_mutex_lock(&newl->mutex, lt->fd); + (void)__db_mutex_lock(&newl->mutex, lt->reginfo.fd); LOCK_LOCKREGION(lt); if (newl->status != DB_LSTAT_PENDING) { @@ -802,306 +531,6 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp) } /* - * This is called at every interface to verify if the region - * has changed size, and if so, to remap the region in and - * reset the process pointers. - */ -static int -__lock_validate_region(lt) - DB_LOCKTAB *lt; -{ - int ret; - - if (lt->reg_size == lt->region->hdr.size) - return (0); - - /* Grow the region. */ - if ((ret = __db_rremap(lt->dbenv, lt->region, - lt->reg_size, lt->region->hdr.size, lt->fd, <->region)) != 0) - return (ret); - - __lock_reset_region(lt); - - return (0); -} - -/* - * We have run out of space; time to grow the region. - */ -static int -__lock_grow_region(lt, which, howmuch) - DB_LOCKTAB *lt; - int which; - size_t howmuch; -{ - struct __db_lock *newl; - struct lock_header *lock_head; - struct obj_header *obj_head; - DB_LOCKOBJ *op; - DB_LOCKREGION *lrp; - float lock_ratio, obj_ratio; - size_t incr, oldsize, used; - u_int32_t i, newlocks, newmem, newobjs; - int ret, usedlocks, usedmem, usedobjs; - u_int8_t *curaddr; - - lrp = lt->region; - oldsize = lrp->hdr.size; - incr = lrp->increment; - - /* Figure out how much of each sort of space we have. */ - usedmem = lrp->mem_bytes - __db_shalloc_count(lt->mem); - usedobjs = lrp->numobjs - __lock_count_objs(lrp); - usedlocks = lrp->maxlocks - __lock_count_locks(lrp); - - /* - * Figure out what fraction of the used space belongs to each - * different type of "thing" in the region. Then partition the - * new space up according to this ratio. - */ - used = usedmem + - usedlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) + - usedobjs * sizeof(DB_LOCKOBJ); - - lock_ratio = usedlocks * - ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) / (float)used; - obj_ratio = usedobjs * sizeof(DB_LOCKOBJ) / (float)used; - - newlocks = (u_int32_t)(lock_ratio * - incr / ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)); - newobjs = (u_int32_t)(obj_ratio * incr / sizeof(DB_LOCKOBJ)); - newmem = incr - - (newobjs * sizeof(DB_LOCKOBJ) + - newlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)); - - /* - * Make sure we allocate enough memory for the object being - * requested. - */ - switch (which) { - case DB_LOCK_LOCK: - if (newlocks == 0) { - newlocks = 10; - incr += newlocks * sizeof(struct __db_lock); - } - break; - case DB_LOCK_OBJ: - if (newobjs == 0) { - newobjs = 10; - incr += newobjs * sizeof(DB_LOCKOBJ); - } - break; - case DB_LOCK_MEM: - if (newmem < howmuch * 2) { - incr += howmuch * 2 - newmem; - newmem = howmuch * 2; - } - break; - } - - newmem += ALIGN(incr, sizeof(size_t)) - incr; - incr = ALIGN(incr, sizeof(size_t)); - - /* - * Since we are going to be allocating locks at the beginning of the - * new chunk, we need to make sure that the chunk is MUTEX_ALIGNMENT - * aligned. We did not guarantee this when we created the region, so - * we may need to pad the old region by extra bytes to ensure this - * alignment. - */ - incr += ALIGN(oldsize, MUTEX_ALIGNMENT) - oldsize; - - __db_err(lt->dbenv, - "Growing lock region: %lu locks %lu objs %lu bytes", - (u_long)newlocks, (u_long)newobjs, (u_long)newmem); - - if ((ret = __db_rgrow(lt->dbenv, lt->fd, incr)) != 0) - return (ret); - if ((ret = __db_rremap(lt->dbenv, - lt->region, oldsize, oldsize + incr, lt->fd, <->region)) != 0) - return (ret); - __lock_reset_region(lt); - - /* Update region parameters. */ - lrp = lt->region; - lrp->increment = incr << 1; - lrp->maxlocks += newlocks; - lrp->numobjs += newobjs; - lrp->mem_bytes += newmem; - - curaddr = (u_int8_t *)lrp + oldsize; - curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT); - - /* Put new locks onto the free list. */ - lock_head = &lrp->free_locks; - for (i = 0; i++ < newlocks; - curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) { - newl = (struct __db_lock *)curaddr; - SH_TAILQ_INSERT_HEAD(lock_head, newl, links, __db_lock); - } - - /* Put new objects onto the free list. */ - obj_head = &lrp->free_objs; - for (i = 0; i++ < newobjs; curaddr += sizeof(DB_LOCKOBJ)) { - op = (DB_LOCKOBJ *)curaddr; - SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj); - } - - *((size_t *)curaddr) = newmem - sizeof(size_t); - curaddr += sizeof(size_t); - __db_shalloc_free(lt->mem, curaddr); - - return (0); -} - -#ifdef DEBUG -/* - * __lock_dump_region -- - * - * PUBLIC: void __lock_dump_region __P((DB_LOCKTAB *, u_int)); - */ -void -__lock_dump_region(lt, flags) - DB_LOCKTAB *lt; - u_int flags; -{ - struct __db_lock *lp; - DB_LOCKOBJ *op; - DB_LOCKREGION *lrp; - u_int32_t i, j; - - lrp = lt->region; - - printf("Lock region parameters\n"); - printf("%s:0x%x\t%s:%lu\t%s:%lu\t%s:%lu\n%s:%lu\t%s:%lu\t%s:%lu\t\n", - "magic ", lrp->magic, - "version ", (u_long)lrp->version, - "processes ", (u_long)lrp->hdr.refcnt, - "maxlocks ", (u_long)lrp->maxlocks, - "table size ", (u_long)lrp->table_size, - "nmodes ", (u_long)lrp->nmodes, - "numobjs ", (u_long)lrp->numobjs); - printf("%s:%lu\t%s:%lu\t%s:%lu\n%s:%lu\t%s:%lu\t%s:%lu\n", - "size ", (u_long)lrp->hdr.size, - "nlockers ", (u_long)lrp->nlockers, - "hash_off ", (u_long)lrp->hash_off, - "increment ", (u_long)lrp->increment, - "mem_off ", (u_long)lrp->mem_off, - "mem_bytes ", (u_long)lrp->mem_bytes); -#ifndef HAVE_SPINLOCKS - printf("Mutex: off %lu", (u_long)lrp->hdr.lock.off); -#endif - printf(" waits %lu nowaits %lu", - (u_long)lrp->hdr.lock.mutex_set_wait, - (u_long)lrp->hdr.lock.mutex_set_nowait); - printf("\n%s:%lu\t%s:%lu\t%s:%lu\t%s:%lu\n", - "nconflicts ", (u_long)lrp->nconflicts, - "nrequests ", (u_long)lrp->nrequests, - "nreleases ", (u_long)lrp->nreleases, - "ndeadlocks ", (u_long)lrp->ndeadlocks); - printf("need_dd %lu\n", (u_long)lrp->need_dd); - if (flags & LOCK_DEBUG_CONF) { - printf("\nConflict matrix\n"); - - for (i = 0; i < lrp->nmodes; i++) { - for (j = 0; j < lrp->nmodes; j++) - printf("%lu\t", - (u_long)lt->conflicts[i * lrp->nmodes + j]); - printf("\n"); - } - } - - for (i = 0; i < lrp->table_size; i++) { - op = SH_TAILQ_FIRST(<->hashtab[i], __db_lockobj); - if (op != NULL && flags & LOCK_DEBUG_BUCKET) - printf("Bucket %lu:\n", (unsigned long)i); - while (op != NULL) { - if (op->type == DB_LOCK_LOCKER && - flags & LOCK_DEBUG_LOCKERS) - __lock_dump_locker(lt, op); - else if (flags & LOCK_DEBUG_OBJECTS && - op->type == DB_LOCK_OBJTYPE) - __lock_dump_object(lt, op); - op = SH_TAILQ_NEXT(op, links, __db_lockobj); - } - } - - if (flags & LOCK_DEBUG_LOCK) { - printf("\nLock Free List\n"); - for (lp = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock); - lp != NULL; - lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { - printf("0x%x: %lu\t%lu\t%lu\t0x%x\n", (u_int)lp, - (u_long)lp->holder, (u_long)lp->mode, - (u_long)lp->status, (u_int)lp->obj); - } - } - - if (flags & LOCK_DEBUG_LOCK) { - printf("\nObject Free List\n"); - for (op = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj); - op != NULL; - op = SH_TAILQ_NEXT(op, links, __db_lockobj)) - printf("0x%x\n", (u_int)op); - } - - if (flags & LOCK_DEBUG_MEM) { - printf("\nMemory Free List\n"); - __db_shalloc_dump(stdout, lt->mem); - } -} - -static void -__lock_dump_locker(lt, op) - DB_LOCKTAB *lt; - DB_LOCKOBJ *op; -{ - struct __db_lock *lp; - u_int32_t locker; - void *ptr; - - ptr = SH_DBT_PTR(&op->lockobj); - memcpy(&locker, ptr, sizeof(u_int32_t)); - printf("L %lx", (u_long)locker); - - lp = SH_LIST_FIRST(&op->heldby, __db_lock); - if (lp == NULL) { - printf("\n"); - return; - } - for (; lp != NULL; lp = SH_LIST_NEXT(lp, locker_links, __db_lock)) - __lock_printlock(lt, lp, 0); -} - -static void -__lock_dump_object(lt, op) - DB_LOCKTAB *lt; - DB_LOCKOBJ *op; -{ - struct __db_lock *lp; - u_int32_t j; - char *ptr; - - ptr = SH_DBT_PTR(&op->lockobj); - for (j = 0; j < op->lockobj.size; ptr++, j++) - printf("%c", (int)*ptr); - printf("\n"); - - printf("H:"); - for (lp = - SH_TAILQ_FIRST(&op->holders, __db_lock); - lp != NULL; - lp = SH_TAILQ_NEXT(lp, links, __db_lock)) - __lock_printlock(lt, lp, 0); - lp = SH_TAILQ_FIRST(&op->waiters, __db_lock); - if (lp != NULL) { - printf("\nW:"); - for (; lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock)) - __lock_printlock(lt, lp, 0); - } -} - -/* * __lock_is_locked -- * * PUBLIC: int __lock_is_locked @@ -1136,7 +565,12 @@ __lock_is_locked(lt, locker, dbt, mode) return (0); } -static void +/* + * __lock_printlock -- + * + * PUBLIC: void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int)); + */ +void __lock_printlock(lt, lp, ispgno) DB_LOCKTAB *lt; struct __db_lock *lp; @@ -1213,39 +647,6 @@ __lock_printlock(lt, lp, ispgno) printf("\n"); } } -#endif - -static int -__lock_count_locks(lrp) - DB_LOCKREGION *lrp; -{ - struct __db_lock *newl; - int count; - - count = 0; - for (newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock); - newl != NULL; - newl = SH_TAILQ_NEXT(newl, links, __db_lock)) - count++; - - return (count); -} - -static int -__lock_count_objs(lrp) - DB_LOCKREGION *lrp; -{ - DB_LOCKOBJ *obj; - int count; - - count = 0; - for (obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj); - obj != NULL; - obj = SH_TAILQ_NEXT(obj, links, __db_lockobj)) - count++; - - return (count); -} /* * PUBLIC: int __lock_getobj __P((DB_LOCKTAB *, @@ -1354,19 +755,7 @@ __lock_remove_waiter(lt, sh_obj, lockp, status) lockp->status = status; /* Wake whoever is waiting on this lock. */ - (void)__db_mutex_unlock(&lockp->mutex, lt->fd); -} - -static void -__lock_freeobj(lt, obj) - DB_LOCKTAB *lt; - DB_LOCKOBJ *obj; -{ - HASHREMOVE_EL(lt->hashtab, - __db_lockobj, links, obj, lt->region->table_size, __lock_lhash); - if (obj->lockobj.size > sizeof(obj->objdata)) - __db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj)); - SH_TAILQ_INSERT_HEAD(<->region->free_objs, obj, links, __db_lockobj); + (void)__db_mutex_unlock(&lockp->mutex, lt->reginfo.fd); } static void @@ -1384,17 +773,18 @@ __lock_checklocker(lt, lockp, do_remove) if (__lock_getobj(lt, lockp->holder, NULL, DB_LOCK_LOCKER, &sh_locker) == 0 && SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL) { __lock_freeobj(lt, sh_locker); - lt->region->nlockers--; + lt->region->nlockers--; } } static void -__lock_reset_region(lt) +__lock_freeobj(lt, obj) DB_LOCKTAB *lt; + DB_LOCKOBJ *obj; { - lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION); - lt->hashtab = - (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off); - lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off); - lt->reg_size = lt->region->hdr.size; + HASHREMOVE_EL(lt->hashtab, + __db_lockobj, links, obj, lt->region->table_size, __lock_lhash); + if (obj->lockobj.size > sizeof(obj->objdata)) + __db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj)); + SH_TAILQ_INSERT_HEAD(<->region->free_objs, obj, links, __db_lockobj); } diff --git a/db2/lock/lock_conflict.c b/db2/lock/lock_conflict.c index ff0287f07e..870aa0dc17 100644 --- a/db2/lock/lock_conflict.c +++ b/db2/lock/lock_conflict.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)lock_conflict.c 10.2 (Sleepycat) 6/21/97"; +static const char sccsid[] = "@(#)lock_conflict.c 10.3 (Sleepycat) 4/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES diff --git a/db2/lock/lock_deadlock.c b/db2/lock/lock_deadlock.c index 93c438ca36..4de492944e 100644 --- a/db2/lock/lock_deadlock.c +++ b/db2/lock/lock_deadlock.c @@ -1,25 +1,21 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char copyright[] = -"@(#) Copyright (c) 1997\n\ - Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)lock_deadlock.c 10.26 (Sleepycat) 11/25/97"; -#endif +static const char sccsid[] = "@(#)lock_deadlock.c 10.32 (Sleepycat) 4/26/98"; +#endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> #include <string.h> -#include <stdlib.h> #endif #include "db_int.h" @@ -59,14 +55,14 @@ static int __dd_build static u_int32_t *__dd_find __P((u_int32_t *, locker_info *, u_int32_t)); -#ifdef DEBUG +#ifdef DIAGNOSTIC static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t)); #endif int lock_detect(lt, flags, atype) DB_LOCKTAB *lt; - int flags, atype; + u_int32_t flags, atype; { DB_ENV *dbenv; locker_info *idmap; @@ -96,7 +92,7 @@ lock_detect(lt, flags, atype) if (nlockers == 0) return (0); -#ifdef DEBUG +#ifdef DIAGNOSTIC if (dbenv->db_verbose != 0) __dd_debug(dbenv, idmap, bitmap, nlockers); #endif @@ -202,7 +198,7 @@ __dd_build(dbenv, bmp, nlockers, idmap) u_int8_t *pptr; locker_info *id_array; u_int32_t *bitmap, count, *entryp, i, id, nentries, *tmpmap; - int is_first, ret; + int is_first; lt = dbenv->lk_info; @@ -322,8 +318,8 @@ retry: count = lt->region->nlockers; lp != NULL; is_first = 0, lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { - if ((ret = __lock_getobj(lt, lp->holder, - NULL, DB_LOCK_LOCKER, &lockerp)) != 0) { + if (__lock_getobj(lt, lp->holder, + NULL, DB_LOCK_LOCKER, &lockerp) != 0) { __db_err(dbenv, "warning unable to find object"); continue; @@ -357,8 +353,8 @@ retry: count = lt->region->nlockers; for (id = 0; id < count; id++) { if (!id_array[id].valid) continue; - if ((ret = __lock_getobj(lt, - id_array[id].id, NULL, DB_LOCK_LOCKER, &lockerp)) != 0) { + if (__lock_getobj(lt, + id_array[id].id, NULL, DB_LOCK_LOCKER, &lockerp) != 0) { __db_err(dbenv, "No locks for locker %lu", (u_long)id_array[id].id); continue; @@ -448,7 +444,7 @@ __dd_abort(dbenv, info) SH_LIST_REMOVE(lockp, locker_links, __db_lock); sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj); SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock); - (void)__db_mutex_unlock(&lockp->mutex, lt->fd); + (void)__db_mutex_unlock(&lockp->mutex, lt->reginfo.fd); ret = 0; @@ -456,7 +452,7 @@ out: UNLOCK_LOCKREGION(lt); return (ret); } -#ifdef DEBUG +#ifdef DIAGNOSTIC static void __dd_debug(dbenv, idmap, bitmap, nlockers) DB_ENV *dbenv; diff --git a/db2/lock/lock_region.c b/db2/lock/lock_region.c new file mode 100644 index 0000000000..b597560744 --- /dev/null +++ b/db2/lock/lock_region.c @@ -0,0 +1,726 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)lock_region.c 10.15 (Sleepycat) 6/2/98"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <ctype.h> +#include <errno.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "shqueue.h" +#include "db_shash.h" +#include "lock.h" +#include "common_ext.h" + +static u_int32_t __lock_count_locks __P((DB_LOCKREGION *)); +static u_int32_t __lock_count_objs __P((DB_LOCKREGION *)); +static void __lock_dump_locker __P((DB_LOCKTAB *, DB_LOCKOBJ *, FILE *)); +static void __lock_dump_object __P((DB_LOCKTAB *, DB_LOCKOBJ *, FILE *)); +static const char *__lock_dump_status __P((db_status_t)); +static void __lock_reset_region __P((DB_LOCKTAB *)); +static int __lock_tabinit __P((DB_ENV *, DB_LOCKREGION *)); + +int +lock_open(path, flags, mode, dbenv, ltp) + const char *path; + u_int32_t flags; + int mode; + DB_ENV *dbenv; + DB_LOCKTAB **ltp; +{ + DB_LOCKTAB *lt; + u_int32_t lock_modes, maxlocks, regflags; + int ret; + + /* Validate arguments. */ +#ifdef HAVE_SPINLOCKS +#define OKFLAGS (DB_CREATE | DB_THREAD) +#else +#define OKFLAGS (DB_CREATE) +#endif + if ((ret = __db_fchk(dbenv, "lock_open", flags, OKFLAGS)) != 0) + return (ret); + + /* Create the lock table structure. */ + if ((lt = (DB_LOCKTAB *)__db_calloc(1, sizeof(DB_LOCKTAB))) == NULL) { + __db_err(dbenv, "%s", strerror(ENOMEM)); + return (ENOMEM); + } + lt->dbenv = dbenv; + + /* Grab the values that we need to compute the region size. */ + lock_modes = DB_LOCK_RW_N; + maxlocks = DB_LOCK_DEFAULT_N; + regflags = REGION_SIZEDEF; + if (dbenv != NULL) { + if (dbenv->lk_modes != 0) { + lock_modes = dbenv->lk_modes; + regflags = 0; + } + if (dbenv->lk_max != 0) { + maxlocks = dbenv->lk_max; + regflags = 0; + } + } + + /* Join/create the lock region. */ + lt->reginfo.dbenv = dbenv; + lt->reginfo.appname = DB_APP_NONE; + if (path == NULL) + lt->reginfo.path = NULL; + else + if ((lt->reginfo.path = (char *)__db_strdup(path)) == NULL) + goto err; + lt->reginfo.file = DB_DEFAULT_LOCK_FILE; + lt->reginfo.mode = mode; + lt->reginfo.size = + LOCK_REGION_SIZE(lock_modes, maxlocks, __db_tablesize(maxlocks)); + lt->reginfo.dbflags = flags; + lt->reginfo.addr = NULL; + lt->reginfo.fd = -1; + lt->reginfo.flags = regflags; + + if ((ret = __db_rattach(<->reginfo)) != 0) + goto err; + + /* Now set up the pointer to the region. */ + lt->region = lt->reginfo.addr; + + /* Initialize the region if we created it. */ + if (F_ISSET(<->reginfo, REGION_CREATED)) { + lt->region->maxlocks = maxlocks; + lt->region->nmodes = lock_modes; + if ((ret = __lock_tabinit(dbenv, lt->region)) != 0) + goto err; + } else { + /* Check for an unexpected region. */ + if (lt->region->magic != DB_LOCKMAGIC) { + __db_err(dbenv, + "lock_open: %s: bad magic number", path); + ret = EINVAL; + goto err; + } + } + + /* Check for automatic deadlock detection. */ + if (dbenv != NULL && dbenv->lk_detect != DB_LOCK_NORUN) { + if (lt->region->detect != DB_LOCK_NORUN && + dbenv->lk_detect != DB_LOCK_DEFAULT && + lt->region->detect != dbenv->lk_detect) { + __db_err(dbenv, + "lock_open: incompatible deadlock detector mode"); + ret = EINVAL; + goto err; + } + if (lt->region->detect == DB_LOCK_NORUN) + lt->region->detect = dbenv->lk_detect; + } + + /* Set up remaining pointers into region. */ + lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION); + lt->hashtab = + (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off); + lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off); + + UNLOCK_LOCKREGION(lt); + *ltp = lt; + return (0); + +err: if (lt->reginfo.addr != NULL) { + UNLOCK_LOCKREGION(lt); + (void)__db_rdetach(<->reginfo); + if (F_ISSET(<->reginfo, REGION_CREATED)) + (void)lock_unlink(path, 1, dbenv); + } + + if (lt->reginfo.path != NULL) + FREES(lt->reginfo.path); + FREE(lt, sizeof(*lt)); + return (ret); +} + +/* + * __lock_tabinit -- + * Initialize the lock region. + */ +static int +__lock_tabinit(dbenv, lrp) + DB_ENV *dbenv; + DB_LOCKREGION *lrp; +{ + struct __db_lock *lp; + struct lock_header *tq_head; + struct obj_header *obj_head; + DB_LOCKOBJ *op; + u_int32_t i, nelements; + const u_int8_t *conflicts; + u_int8_t *curaddr; + + conflicts = dbenv == NULL || dbenv->lk_conflicts == NULL ? + db_rw_conflicts : dbenv->lk_conflicts; + + lrp->table_size = __db_tablesize(lrp->maxlocks); + lrp->magic = DB_LOCKMAGIC; + lrp->version = DB_LOCKVERSION; + lrp->id = 0; + /* + * These fields (lrp->maxlocks, lrp->nmodes) are initialized + * in the caller, since we had to grab those values to size + * the region. + */ + lrp->need_dd = 0; + lrp->detect = DB_LOCK_NORUN; + lrp->numobjs = lrp->maxlocks; + lrp->nlockers = 0; + lrp->mem_bytes = ALIGN(STRING_SIZE(lrp->maxlocks), sizeof(size_t)); + lrp->increment = lrp->hdr.size / 2; + lrp->nconflicts = 0; + lrp->nrequests = 0; + lrp->nreleases = 0; + lrp->ndeadlocks = 0; + + /* + * As we write the region, we've got to maintain the alignment + * for the structures that follow each chunk. This information + * ends up being encapsulated both in here as well as in the + * lock.h file for the XXX_SIZE macros. + */ + /* Initialize conflict matrix. */ + curaddr = (u_int8_t *)lrp + sizeof(DB_LOCKREGION); + memcpy(curaddr, conflicts, lrp->nmodes * lrp->nmodes); + curaddr += lrp->nmodes * lrp->nmodes; + + /* + * Initialize hash table. + */ + curaddr = (u_int8_t *)ALIGNP(curaddr, LOCK_HASH_ALIGN); + lrp->hash_off = curaddr - (u_int8_t *)lrp; + nelements = lrp->table_size; + __db_hashinit(curaddr, nelements); + curaddr += nelements * sizeof(DB_HASHTAB); + + /* + * Initialize locks onto a free list. Since locks contains mutexes, + * we need to make sure that each lock is aligned on a MUTEX_ALIGNMENT + * boundary. + */ + curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT); + tq_head = &lrp->free_locks; + SH_TAILQ_INIT(tq_head); + + for (i = 0; i++ < lrp->maxlocks; + curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) { + lp = (struct __db_lock *)curaddr; + lp->status = DB_LSTAT_FREE; + SH_TAILQ_INSERT_HEAD(tq_head, lp, links, __db_lock); + } + + /* Initialize objects onto a free list. */ + obj_head = &lrp->free_objs; + SH_TAILQ_INIT(obj_head); + + for (i = 0; i++ < lrp->maxlocks; curaddr += sizeof(DB_LOCKOBJ)) { + op = (DB_LOCKOBJ *)curaddr; + SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj); + } + + /* + * Initialize the string space; as for all shared memory allocation + * regions, this requires size_t alignment, since we store the + * lengths of malloc'd areas in the area. + */ + curaddr = (u_int8_t *)ALIGNP(curaddr, sizeof(size_t)); + lrp->mem_off = curaddr - (u_int8_t *)lrp; + __db_shalloc_init(curaddr, lrp->mem_bytes); + return (0); +} + +int +lock_close(lt) + DB_LOCKTAB *lt; +{ + int ret; + + if ((ret = __db_rdetach(<->reginfo)) != 0) + return (ret); + + if (lt->reginfo.path != NULL) + FREES(lt->reginfo.path); + FREE(lt, sizeof(*lt)); + + return (0); +} + +int +lock_unlink(path, force, dbenv) + const char *path; + int force; + DB_ENV *dbenv; +{ + REGINFO reginfo; + int ret; + + memset(®info, 0, sizeof(reginfo)); + reginfo.dbenv = dbenv; + reginfo.appname = DB_APP_NONE; + if (path != NULL && (reginfo.path = (char *)__db_strdup(path)) == NULL) + return (ENOMEM); + reginfo.file = DB_DEFAULT_LOCK_FILE; + ret = __db_runlink(®info, force); + if (reginfo.path != NULL) + FREES(reginfo.path); + return (ret); +} + +/* + * __lock_validate_region -- + * Called at every interface to verify if the region has changed size, + * and if so, to remap the region in and reset the process' pointers. + * + * PUBLIC: int __lock_validate_region __P((DB_LOCKTAB *)); + */ +int +__lock_validate_region(lt) + DB_LOCKTAB *lt; +{ + int ret; + + if (lt->reginfo.size == lt->region->hdr.size) + return (0); + + /* Detach/reattach the region. */ + if ((ret = __db_rreattach(<->reginfo, lt->region->hdr.size)) != 0) + return (ret); + + /* Reset region information. */ + lt->region = lt->reginfo.addr; + __lock_reset_region(lt); + + return (0); +} + +/* + * __lock_grow_region -- + * We have run out of space; time to grow the region. + * + * PUBLIC: int __lock_grow_region __P((DB_LOCKTAB *, int, size_t)); + */ +int +__lock_grow_region(lt, which, howmuch) + DB_LOCKTAB *lt; + int which; + size_t howmuch; +{ + struct __db_lock *newl; + struct lock_header *lock_head; + struct obj_header *obj_head; + DB_LOCKOBJ *op; + DB_LOCKREGION *lrp; + float lock_ratio, obj_ratio; + size_t incr, oldsize, used, usedmem; + u_int32_t i, newlocks, newmem, newobjs, usedlocks, usedobjs; + u_int8_t *curaddr; + int ret; + + lrp = lt->region; + oldsize = lrp->hdr.size; + incr = lrp->increment; + + /* Figure out how much of each sort of space we have. */ + usedmem = lrp->mem_bytes - __db_shalloc_count(lt->mem); + usedobjs = lrp->numobjs - __lock_count_objs(lrp); + usedlocks = lrp->maxlocks - __lock_count_locks(lrp); + + /* + * Figure out what fraction of the used space belongs to each + * different type of "thing" in the region. Then partition the + * new space up according to this ratio. + */ + used = usedmem + + usedlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) + + usedobjs * sizeof(DB_LOCKOBJ); + + lock_ratio = usedlocks * + ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) / (float)used; + obj_ratio = usedobjs * sizeof(DB_LOCKOBJ) / (float)used; + + newlocks = (u_int32_t)(lock_ratio * + incr / ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)); + newobjs = (u_int32_t)(obj_ratio * incr / sizeof(DB_LOCKOBJ)); + newmem = incr - + (newobjs * sizeof(DB_LOCKOBJ) + + newlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)); + + /* + * Make sure we allocate enough memory for the object being + * requested. + */ + switch (which) { + case DB_LOCK_LOCK: + if (newlocks == 0) { + newlocks = 10; + incr += newlocks * sizeof(struct __db_lock); + } + break; + case DB_LOCK_OBJ: + if (newobjs == 0) { + newobjs = 10; + incr += newobjs * sizeof(DB_LOCKOBJ); + } + break; + case DB_LOCK_MEM: + if (newmem < howmuch * 2) { + incr += howmuch * 2 - newmem; + newmem = howmuch * 2; + } + break; + } + + newmem += ALIGN(incr, sizeof(size_t)) - incr; + incr = ALIGN(incr, sizeof(size_t)); + + /* + * Since we are going to be allocating locks at the beginning of the + * new chunk, we need to make sure that the chunk is MUTEX_ALIGNMENT + * aligned. We did not guarantee this when we created the region, so + * we may need to pad the old region by extra bytes to ensure this + * alignment. + */ + incr += ALIGN(oldsize, MUTEX_ALIGNMENT) - oldsize; + + __db_err(lt->dbenv, + "Growing lock region: %lu locks %lu objs %lu bytes", + (u_long)newlocks, (u_long)newobjs, (u_long)newmem); + + if ((ret = __db_rgrow(<->reginfo, oldsize + incr)) != 0) + return (ret); + lt->region = lt->reginfo.addr; + __lock_reset_region(lt); + + /* Update region parameters. */ + lrp = lt->region; + lrp->increment = incr << 1; + lrp->maxlocks += newlocks; + lrp->numobjs += newobjs; + lrp->mem_bytes += newmem; + + curaddr = (u_int8_t *)lrp + oldsize; + curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT); + + /* Put new locks onto the free list. */ + lock_head = &lrp->free_locks; + for (i = 0; i++ < newlocks; + curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) { + newl = (struct __db_lock *)curaddr; + SH_TAILQ_INSERT_HEAD(lock_head, newl, links, __db_lock); + } + + /* Put new objects onto the free list. */ + obj_head = &lrp->free_objs; + for (i = 0; i++ < newobjs; curaddr += sizeof(DB_LOCKOBJ)) { + op = (DB_LOCKOBJ *)curaddr; + SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj); + } + + *((size_t *)curaddr) = newmem - sizeof(size_t); + curaddr += sizeof(size_t); + __db_shalloc_free(lt->mem, curaddr); + + return (0); +} + +static void +__lock_reset_region(lt) + DB_LOCKTAB *lt; +{ + lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION); + lt->hashtab = + (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off); + lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off); +} + +/* + * lock_stat -- + * Return LOCK statistics. + */ +int +lock_stat(lt, gspp, db_malloc) + DB_LOCKTAB *lt; + DB_LOCK_STAT **gspp; + void *(*db_malloc) __P((size_t)); +{ + DB_LOCKREGION *rp; + + *gspp = NULL; + + if ((*gspp = db_malloc == NULL ? + (DB_LOCK_STAT *)__db_malloc(sizeof(**gspp)) : + (DB_LOCK_STAT *)db_malloc(sizeof(**gspp))) == NULL) + return (ENOMEM); + + /* Copy out the global statistics. */ + LOCK_LOCKREGION(lt); + + rp = lt->region; + (*gspp)->st_magic = rp->magic; + (*gspp)->st_version = rp->version; + (*gspp)->st_maxlocks = rp->maxlocks; + (*gspp)->st_nmodes = rp->nmodes; + (*gspp)->st_numobjs = rp->numobjs; + (*gspp)->st_nlockers = rp->nlockers; + (*gspp)->st_nconflicts = rp->nconflicts; + (*gspp)->st_nrequests = rp->nrequests; + (*gspp)->st_nreleases = rp->nreleases; + (*gspp)->st_ndeadlocks = rp->ndeadlocks; + (*gspp)->st_region_nowait = rp->hdr.lock.mutex_set_nowait; + (*gspp)->st_region_wait = rp->hdr.lock.mutex_set_wait; + (*gspp)->st_refcnt = rp->hdr.refcnt; + (*gspp)->st_regsize = rp->hdr.size; + + UNLOCK_LOCKREGION(lt); + + return (0); +} + +static u_int32_t +__lock_count_locks(lrp) + DB_LOCKREGION *lrp; +{ + struct __db_lock *newl; + u_int32_t count; + + count = 0; + for (newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock); + newl != NULL; + newl = SH_TAILQ_NEXT(newl, links, __db_lock)) + count++; + + return (count); +} + +static u_int32_t +__lock_count_objs(lrp) + DB_LOCKREGION *lrp; +{ + DB_LOCKOBJ *obj; + u_int32_t count; + + count = 0; + for (obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj); + obj != NULL; + obj = SH_TAILQ_NEXT(obj, links, __db_lockobj)) + count++; + + return (count); +} + +#define LOCK_DUMP_CONF 0x001 /* Conflict matrix. */ +#define LOCK_DUMP_FREE 0x002 /* Display lock free list. */ +#define LOCK_DUMP_LOCKERS 0x004 /* Display lockers. */ +#define LOCK_DUMP_MEM 0x008 /* Display region memory. */ +#define LOCK_DUMP_OBJECTS 0x010 /* Display objects. */ +#define LOCK_DUMP_ALL 0x01f /* Display all. */ + +/* + * __lock_dump_region -- + * + * PUBLIC: void __lock_dump_region __P((DB_LOCKTAB *, char *, FILE *)); + */ +void +__lock_dump_region(lt, area, fp) + DB_LOCKTAB *lt; + char *area; + FILE *fp; +{ + struct __db_lock *lp; + DB_LOCKOBJ *op; + DB_LOCKREGION *lrp; + u_int32_t flags, i, j; + int label; + + /* Make it easy to call from the debugger. */ + if (fp == NULL) + fp = stderr; + + for (flags = 0; *area != '\0'; ++area) + switch (*area) { + case 'A': + LF_SET(LOCK_DUMP_ALL); + break; + case 'c': + LF_SET(LOCK_DUMP_CONF); + break; + case 'f': + LF_SET(LOCK_DUMP_FREE); + break; + case 'l': + LF_SET(LOCK_DUMP_LOCKERS); + break; + case 'm': + LF_SET(LOCK_DUMP_MEM); + break; + case 'o': + LF_SET(LOCK_DUMP_OBJECTS); + break; + } + + lrp = lt->region; + + fprintf(fp, "%s\nLock region parameters\n", DB_LINE); + fprintf(fp, "%s: %lu, %s: %lu, %s: %lu, %s: %lu\n%s: %lu, %s: %lu\n", + "table size", (u_long)lrp->table_size, + "hash_off", (u_long)lrp->hash_off, + "increment", (u_long)lrp->increment, + "mem_off", (u_long)lrp->mem_off, + "mem_bytes", (u_long)lrp->mem_bytes, + "need_dd", (u_long)lrp->need_dd); + + if (LF_ISSET(LOCK_DUMP_CONF)) { + fprintf(fp, "\n%s\nConflict matrix\n", DB_LINE); + for (i = 0; i < lrp->nmodes; i++) { + for (j = 0; j < lrp->nmodes; j++) + fprintf(fp, "%lu\t", + (u_long)lt->conflicts[i * lrp->nmodes + j]); + fprintf(fp, "\n"); + } + } + + if (LF_ISSET(LOCK_DUMP_LOCKERS | LOCK_DUMP_OBJECTS)) { + fprintf(fp, "%s\nLock hash buckets\n", DB_LINE); + for (i = 0; i < lrp->table_size; i++) { + label = 1; + for (op = SH_TAILQ_FIRST(<->hashtab[i], __db_lockobj); + op != NULL; + op = SH_TAILQ_NEXT(op, links, __db_lockobj)) { + if (LF_ISSET(LOCK_DUMP_LOCKERS) && + op->type == DB_LOCK_LOCKER) { + if (label) { + fprintf(fp, + "Bucket %lu:\n", (u_long)i); + label = 0; + } + __lock_dump_locker(lt, op, fp); + } + if (LF_ISSET(LOCK_DUMP_OBJECTS) && + op->type == DB_LOCK_OBJTYPE) { + if (label) { + fprintf(fp, + "Bucket %lu:\n", (u_long)i); + label = 0; + } + __lock_dump_object(lt, op, fp); + } + } + } + } + + if (LF_ISSET(LOCK_DUMP_FREE)) { + fprintf(fp, "%s\nLock free list\n", DB_LINE); + for (lp = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock); + lp != NULL; + lp = SH_TAILQ_NEXT(lp, links, __db_lock)) + fprintf(fp, "0x%x: %lu\t%lu\t%s\t0x%x\n", (u_int)lp, + (u_long)lp->holder, (u_long)lp->mode, + __lock_dump_status(lp->status), (u_int)lp->obj); + + fprintf(fp, "%s\nObject free list\n", DB_LINE); + for (op = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj); + op != NULL; + op = SH_TAILQ_NEXT(op, links, __db_lockobj)) + fprintf(fp, "0x%x\n", (u_int)op); + } + + if (LF_ISSET(LOCK_DUMP_MEM)) + __db_shalloc_dump(lt->mem, fp); +} + +static void +__lock_dump_locker(lt, op, fp) + DB_LOCKTAB *lt; + DB_LOCKOBJ *op; + FILE *fp; +{ + struct __db_lock *lp; + u_int32_t locker; + void *ptr; + + ptr = SH_DBT_PTR(&op->lockobj); + memcpy(&locker, ptr, sizeof(u_int32_t)); + fprintf(fp, "L %lx", (u_long)locker); + + lp = SH_LIST_FIRST(&op->heldby, __db_lock); + if (lp == NULL) { + fprintf(fp, "\n"); + return; + } + for (; lp != NULL; lp = SH_LIST_NEXT(lp, locker_links, __db_lock)) + __lock_printlock(lt, lp, 0); +} + +static void +__lock_dump_object(lt, op, fp) + DB_LOCKTAB *lt; + DB_LOCKOBJ *op; + FILE *fp; +{ + struct __db_lock *lp; + u_int32_t j; + u_int8_t *ptr; + u_int ch; + + ptr = SH_DBT_PTR(&op->lockobj); + for (j = 0; j < op->lockobj.size; ptr++, j++) { + ch = *ptr; + fprintf(fp, isprint(ch) ? "%c" : "\\%o", ch); + } + fprintf(fp, "\n"); + + fprintf(fp, "H:"); + for (lp = + SH_TAILQ_FIRST(&op->holders, __db_lock); + lp != NULL; + lp = SH_TAILQ_NEXT(lp, links, __db_lock)) + __lock_printlock(lt, lp, 0); + lp = SH_TAILQ_FIRST(&op->waiters, __db_lock); + if (lp != NULL) { + fprintf(fp, "\nW:"); + for (; lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock)) + __lock_printlock(lt, lp, 0); + } +} + +static const char * +__lock_dump_status(status) + db_status_t status; +{ + switch (status) { + case DB_LSTAT_ABORTED: + return ("aborted"); + case DB_LSTAT_ERR: + return ("err"); + case DB_LSTAT_FREE: + return ("free"); + case DB_LSTAT_HELD: + return ("held"); + case DB_LSTAT_NOGRANT: + return ("nogrant"); + case DB_LSTAT_PENDING: + return ("pending"); + case DB_LSTAT_WAITING: + return ("waiting"); + } + return ("unknown status"); +} diff --git a/db2/lock/lock_util.c b/db2/lock/lock_util.c index 6c1e30f27c..7274a50422 100644 --- a/db2/lock/lock_util.c +++ b/db2/lock/lock_util.c @@ -1,25 +1,20 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)lock_util.c 10.5 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)lock_util.c 10.9 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <fcntl.h> -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> -#include <unistd.h> #endif #include "db_int.h" @@ -30,11 +25,13 @@ static const char sccsid[] = "@(#)lock_util.c 10.5 (Sleepycat) 1/8/98"; #include "lock.h" /* - * This function is used to compare a DBT that is about to be entered - * into a hash table with an object already in the hash table. Note - * that it just returns true on equal and 0 on not-equal. Therefore this - * cannot be used as a sort function; its purpose is to be used as a - * hash comparison function. + * __lock_cmp -- + * This function is used to compare a DBT that is about to be entered + * into a hash table with an object already in the hash table. Note + * that it just returns true on equal and 0 on not-equal. Therefore + * this function cannot be used as a sort function; its purpose is to + * be used as a hash comparison function. + * * PUBLIC: int __lock_cmp __P((const DBT *, DB_LOCKOBJ *)); */ int @@ -46,6 +43,7 @@ __lock_cmp(dbt, lock_obj) if (lock_obj->type != DB_LOCK_OBJTYPE) return (0); + obj_data = SH_DBT_PTR(&lock_obj->lockobj); return (dbt->size == lock_obj->lockobj.size && memcmp(dbt->data, obj_data, dbt->size) == 0); @@ -69,35 +67,86 @@ __lock_locker_cmp(locker, lock_obj) } /* - * PUBLIC: int __lock_ohash __P((const DBT *)); + * The next two functions are the hash functions used to store objects in the + * lock hash table. They are hashing the same items, but one (__lock_ohash) + * takes a DBT (used for hashing a parameter passed from the user) and the + * other (__lock_lhash) takes a DB_LOCKOBJ (used for hashing something that is + * already in the lock manager). In both cases, we have a special check to + * fast path the case where we think we are doing a hash on a DB page/fileid + * pair. If the size is right, then we do the fast hash. + * + * We know that DB uses struct __db_ilocks for its lock objects. The first + * four bytes are the 4-byte page number and the next DB_FILE_ID_LEN bytes + * are a unique file id, where the first 4 bytes on UNIX systems are the file + * inode number, and the first 4 bytes on Windows systems are the FileIndexLow + * bytes. So, we use the XOR of the page number and the first four bytes of + * the file id to produce a 32-bit hash value. + * + * We have no particular reason to believe that this algorithm will produce + * a good hash, but we want a fast hash more than we want a good one, when + * we're coming through this code path. */ -int -__lock_ohash(dbt) - const DBT *dbt; -{ - return (__ham_func5(dbt->data, dbt->size)); +#define FAST_HASH(P) { \ + u_int32_t __h; \ + u_int8_t *__cp, *__hp; \ + __hp = (u_int8_t *)&__h; \ + __cp = (u_int8_t *)(P); \ + __hp[0] = __cp[0] ^ __cp[4]; \ + __hp[1] = __cp[1] ^ __cp[5]; \ + __hp[2] = __cp[2] ^ __cp[6]; \ + __hp[3] = __cp[3] ^ __cp[7]; \ + return (__h); \ } /* - * PUBLIC: u_int32_t __lock_locker_hash __P((u_int32_t)); + * __lock_ohash -- + * + * PUBLIC: u_int32_t __lock_ohash __P((const DBT *)); */ u_int32_t -__lock_locker_hash(locker) - u_int32_t locker; +__lock_ohash(dbt) + const DBT *dbt; { - return (__ham_func5(&locker, sizeof(locker))); + if (dbt->size == sizeof(struct __db_ilock)) + FAST_HASH(dbt->data); + + return (__ham_func5(dbt->data, dbt->size)); } /* + * __lock_lhash -- + * * PUBLIC: u_int32_t __lock_lhash __P((DB_LOCKOBJ *)); */ u_int32_t __lock_lhash(lock_obj) DB_LOCKOBJ *lock_obj; { + u_int32_t tmp; void *obj_data; obj_data = SH_DBT_PTR(&lock_obj->lockobj); + if (lock_obj->type == DB_LOCK_LOCKER) { + memcpy(&tmp, obj_data, sizeof(u_int32_t)); + return (tmp); + } + + if (lock_obj->lockobj.size == sizeof(struct __db_ilock)) + FAST_HASH(obj_data); + return (__ham_func5(obj_data, lock_obj->lockobj.size)); } +/* + * __lock_locker_hash -- + * Hash function for entering lockers into the hash table. Since these + * are simply 32-bit unsigned integers, just return the locker value. + * + * PUBLIC: u_int32_t __lock_locker_hash __P((u_int32_t)); + */ +u_int32_t +__lock_locker_hash(locker) + u_int32_t locker; +{ + return (locker); +} diff --git a/db2/log/log.c b/db2/log/log.c index 8013d42aef..d642c9f9ef 100644 --- a/db2/log/log.c +++ b/db2/log/log.c @@ -1,21 +1,19 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log.c 10.39 (Sleepycat) 1/17/98"; +static const char sccsid[] = "@(#)log.c 10.54 (Sleepycat) 5/31/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <sys/stat.h> #include <errno.h> -#include <fcntl.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -23,7 +21,6 @@ static const char sccsid[] = "@(#)log.c 10.39 (Sleepycat) 1/17/98"; #include "db_int.h" #include "shqueue.h" -#include "db_shash.h" #include "log.h" #include "db_dispatch.h" #include "txn_auto.h" @@ -38,15 +35,14 @@ static int __log_recover __P((DB_LOG *)); int log_open(path, flags, mode, dbenv, lpp) const char *path; - int flags; + u_int32_t flags; int mode; DB_ENV *dbenv; DB_LOG **lpp; { DB_LOG *dblp; LOG *lp; - size_t len; - int fd, newregion, ret, retry_cnt; + int ret; /* Validate arguments. */ #ifdef HAVE_SPINLOCKS @@ -57,22 +53,13 @@ log_open(path, flags, mode, dbenv, lpp) if ((ret = __db_fchk(dbenv, "log_open", flags, OKFLAGS)) != 0) return (ret); - /* - * We store 4-byte offsets into the file, so the maximum file - * size can't be larger than that. - */ - if (dbenv != NULL && dbenv->lg_max > UINT32_T_MAX) { - __db_err(dbenv, "log_open: maximum file size too large"); - return (EINVAL); - } - /* Create and initialize the DB_LOG structure. */ if ((dblp = (DB_LOG *)__db_calloc(1, sizeof(DB_LOG))) == NULL) return (ENOMEM); if (path != NULL && (dblp->dir = __db_strdup(path)) == NULL) { - __db_free(dblp); - return (ENOMEM); + ret = ENOMEM; + goto err; } dblp->dbenv = dbenv; @@ -85,102 +72,87 @@ log_open(path, flags, mode, dbenv, lpp) * file names there. Make it fairly large so that we don't have to * grow it. */ - len = 30 * 1024; +#define DEF_LOG_SIZE (30 * 1024) /* Map in the region. */ - retry_cnt = newregion = 0; -retry: if (LF_ISSET(DB_CREATE)) { - ret = __db_rcreate(dbenv, DB_APP_LOG, path, - DB_DEFAULT_LOG_FILE, mode, len, 0, &fd, &dblp->maddr); - if (ret == 0) { - /* Put the LOG structure first in the region. */ - lp = dblp->maddr; - - /* Initialize the rest of the region as free space. */ - dblp->addr = (u_int8_t *)dblp->maddr + sizeof(LOG); - __db_shalloc_init(dblp->addr, len - sizeof(LOG)); - - /* Initialize the LOG structure. */ - lp->persist.lg_max = dbenv == NULL ? 0 : dbenv->lg_max; - if (lp->persist.lg_max == 0) - lp->persist.lg_max = DEFAULT_MAX; - lp->persist.magic = DB_LOGMAGIC; - lp->persist.version = DB_LOGVERSION; - lp->persist.mode = mode; - SH_TAILQ_INIT(&lp->fq); - - /* Initialize LOG LSNs. */ - lp->lsn.file = 1; - lp->lsn.offset = 0; - - newregion = 1; - } else if (ret != EEXIST) + dblp->reginfo.dbenv = dbenv; + dblp->reginfo.appname = DB_APP_LOG; + if (path == NULL) + dblp->reginfo.path = NULL; + else + if ((dblp->reginfo.path = __db_strdup(path)) == NULL) goto err; - } - - /* If we didn't or couldn't create the region, try and join it. */ - if (!newregion && - (ret = __db_ropen(dbenv, DB_APP_LOG, - path, DB_DEFAULT_LOG_FILE, 0, &fd, &dblp->maddr)) != 0) { - /* - * If we fail because the file isn't available, wait a - * second and try again. - */ - if (ret == EAGAIN && ++retry_cnt < 3) { - (void)__db_sleep(1, 0); - goto retry; - } + dblp->reginfo.file = DB_DEFAULT_LOG_FILE; + dblp->reginfo.mode = mode; + dblp->reginfo.size = DEF_LOG_SIZE; + dblp->reginfo.dbflags = flags; + dblp->reginfo.flags = REGION_SIZEDEF; + if ((ret = __db_rattach(&dblp->reginfo)) != 0) goto err; - } - /* Set up the common information. */ - dblp->lp = dblp->maddr; - dblp->addr = (u_int8_t *)dblp->maddr + sizeof(LOG); - dblp->fd = fd; + /* + * The LOG structure is first in the region, the rest of the region + * is free space. + */ + dblp->lp = dblp->reginfo.addr; + dblp->addr = (u_int8_t *)dblp->lp + sizeof(LOG); + + /* Initialize a created region. */ + if (F_ISSET(&dblp->reginfo, REGION_CREATED)) { + __db_shalloc_init(dblp->addr, DEF_LOG_SIZE - sizeof(LOG)); + + /* Initialize the LOG structure. */ + lp = dblp->lp; + lp->persist.lg_max = dbenv == NULL ? 0 : dbenv->lg_max; + if (lp->persist.lg_max == 0) + lp->persist.lg_max = DEFAULT_MAX; + lp->persist.magic = DB_LOGMAGIC; + lp->persist.version = DB_LOGVERSION; + lp->persist.mode = mode; + SH_TAILQ_INIT(&lp->fq); + + /* Initialize LOG LSNs. */ + lp->lsn.file = 1; + lp->lsn.offset = 0; + } - /* Initialize thread information. */ + /* Initialize thread information, mutex. */ if (LF_ISSET(DB_THREAD)) { F_SET(dblp, DB_AM_THREAD); - - if (!newregion) - LOCK_LOGREGION(dblp); if ((ret = __db_shalloc(dblp->addr, - sizeof(db_mutex_t), MUTEX_ALIGNMENT, &dblp->mutexp)) == 0) - (void)__db_mutex_init(dblp->mutexp, -1); - if (!newregion) - UNLOCK_LOGREGION(dblp); - if (ret != 0) { - (void)log_close(dblp); - if (newregion) - (void)log_unlink(path, 1, dbenv); - return (ret); - } + sizeof(db_mutex_t), MUTEX_ALIGNMENT, &dblp->mutexp)) != 0) + goto err; + (void)__db_mutex_init(dblp->mutexp, -1); } /* - * If doing recovery, try and recover any previous log files - * before releasing the lock. + * If doing recovery, try and recover any previous log files before + * releasing the lock. */ - if (newregion) { - ret = __log_recover(dblp); - UNLOCK_LOGREGION(dblp); + if (F_ISSET(&dblp->reginfo, REGION_CREATED) && + (ret = __log_recover(dblp)) != 0) + goto err; - if (ret != 0) { - (void)log_close(dblp); - (void)log_unlink(path, 1, dbenv); - return (ret); - } - } + UNLOCK_LOGREGION(dblp); *lpp = dblp; return (0); -err: /* - * We never get here with an allocated thread-mutex, so we do - * not have to worry about freeing it. - */ - FREE(dblp, sizeof(DB_LOG)); - return (ret); +err: if (dblp->reginfo.addr != NULL) { + if (dblp->mutexp != NULL) + __db_shalloc_free(dblp->addr, dblp->mutexp); + + UNLOCK_LOGREGION(dblp); + (void)__db_rdetach(&dblp->reginfo); + if (F_ISSET(&dblp->reginfo, REGION_CREATED)) + (void)log_unlink(path, 1, dbenv); + } + if (dblp->reginfo.path != NULL) + FREES(dblp->reginfo.path); + if (dblp->dir != NULL) + FREES(dblp->dir); + FREE(dblp, sizeof(*dblp)); + return (ret); } /* @@ -234,7 +206,7 @@ __log_recover(dblp) continue; memcpy(&chk, dbt.data, sizeof(u_int32_t)); if (chk == DB_txn_ckp) { - lp->c_lsn = lsn; + lp->chkpt_lsn = lsn; found_checkpoint = 1; } } @@ -273,7 +245,7 @@ __log_recover(dblp) continue; memcpy(&chk, dbt.data, sizeof(u_int32_t)); if (chk == DB_txn_ckp) { - lp->c_lsn = lsn; + lp->chkpt_lsn = lsn; found_checkpoint = 1; } } @@ -281,7 +253,7 @@ __log_recover(dblp) /* If we never find a checkpoint, that's okay, just 0 it out. */ if (!found_checkpoint) - ZERO_LSN(lp->c_lsn); + ZERO_LSN(lp->chkpt_lsn); __db_err(dblp->dbenv, "Recovering the log: last valid LSN: file: %lu offset %lu", @@ -380,7 +352,7 @@ __log_valid(dblp, lp, cnt) if ((ret = __db_open(p, DB_RDONLY | DB_SEQUENTIAL, DB_RDONLY | DB_SEQUENTIAL, 0, &fd)) != 0 || - (ret = __db_seek(fd, 0, 0, sizeof(HDR), SEEK_SET)) != 0 || + (ret = __db_seek(fd, 0, 0, sizeof(HDR), 0, SEEK_SET)) != 0 || (ret = __db_read(fd, &persist, sizeof(LOGP), &nw)) != 0 || nw != sizeof(LOGP)) { if (ret == 0) @@ -429,8 +401,6 @@ log_close(dblp) { int ret, t_ret; - ret = 0; - /* Discard the per-thread pointer. */ if (dblp->mutexp != NULL) { LOCK_LOGREGION(dblp); @@ -439,9 +409,7 @@ log_close(dblp) } /* Close the region. */ - if ((t_ret = - __db_rclose(dblp->dbenv, dblp->fd, dblp->maddr)) != 0 && ret == 0) - ret = t_ret; + ret = __db_rdetach(&dblp->reginfo); /* Close open files, release allocated memory. */ if (dblp->lfd != -1 && (t_ret = __db_close(dblp->lfd)) != 0 && ret == 0) @@ -456,8 +424,9 @@ log_close(dblp) if (dblp->dir != NULL) FREES(dblp->dir); - /* Free the structure. */ - FREE(dblp, sizeof(DB_LOG)); + if (dblp->reginfo.path != NULL) + FREES(dblp->reginfo.path); + FREE(dblp, sizeof(*dblp)); return (ret); } @@ -472,8 +441,19 @@ log_unlink(path, force, dbenv) int force; DB_ENV *dbenv; { - return (__db_runlink(dbenv, - DB_APP_LOG, path, DB_DEFAULT_LOG_FILE, force)); + REGINFO reginfo; + int ret; + + memset(®info, 0, sizeof(reginfo)); + reginfo.dbenv = dbenv; + reginfo.appname = DB_APP_LOG; + if (path != NULL && (reginfo.path = __db_strdup(path)) == NULL) + return (ENOMEM); + reginfo.file = DB_DEFAULT_LOG_FILE; + ret = __db_runlink(®info, force); + if (reginfo.path != NULL) + FREES(reginfo.path); + return (ret); } /* @@ -511,6 +491,9 @@ log_stat(dblp, gspp, db_malloc) (*gspp)->st_cur_file = lp->lsn.file; (*gspp)->st_cur_offset = lp->lsn.offset; + (*gspp)->st_refcnt = lp->rlayout.refcnt; + (*gspp)->st_regsize = lp->rlayout.size; + UNLOCK_LOGREGION(dblp); return (0); diff --git a/db2/log/log.src b/db2/log/log.src index f3d9f32b2d..12883bd1e3 100644 --- a/db2/log/log.src +++ b/db2/log/log.src @@ -1,43 +1,12 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)log.src 10.4 (Sleepycat) 1/17/98 - * - * This is the source file used to create the logging functions for the - * log package. Each access method (or set of routines wishing to register - * record types with the transaction system) should have a file like this. - * Each type of log record and its parameters is defined. The basic - * format of a record definition is: - * - * BEGIN <RECORD_TYPE> - * ARG|STRING|POINTER <variable name> <variable type> <printf format> - * ... - * END - * ARG the argument is a simple parameter of the type * specified. - * DBT the argument is a DBT (db.h) containing a length and pointer. - * PTR the argument is a pointer to the data type specified; the entire - * type should be logged. - * - * There are a set of shell scripts of the form xxx.sh that generate c - * code and or h files to process these. (This is probably better done - * in a single PERL script, but for now, this works.) - * - * The DB recovery system requires the following three fields appear in - * every record, and will assign them to the per-record-type structures - * as well as making them the first parameters to the appropriate logging - * call. - * rectype: record-type, identifies the structure and log/read call - * txnid: transaction id, a DBT in this implementation - * prev: the last LSN for this transaction + * @(#)log.src 10.5 (Sleepycat) 4/10/98 */ -/* - * Use the argument of PREFIX as the prefix for all record types, - * routines, id numbers, etc. - */ PREFIX log /* Used for registering name/id translations at open or close. */ diff --git a/db2/log/log_archive.c b/db2/log/log_archive.c index 91ae5abe0b..7db0cc3e36 100644 --- a/db2/log/log_archive.c +++ b/db2/log/log_archive.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_archive.c 10.30 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)log_archive.c 10.37 (Sleepycat) 5/3/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -24,8 +24,8 @@ static const char sccsid[] = "@(#)log_archive.c 10.30 (Sleepycat) 1/8/98"; #include "db_dispatch.h" #include "shqueue.h" #include "log.h" -#include "clib_ext.h" #include "common_ext.h" +#include "clib_ext.h" /* XXX: needed for getcwd. */ static int __absname __P((char *, char *, char **)); static int __build_data __P((DB_LOG *, char *, char ***, void *(*)(size_t))); @@ -40,7 +40,7 @@ int log_archive(dblp, listp, flags, db_malloc) DB_LOG *dblp; char ***listp; - int flags; + u_int32_t flags; void *(*db_malloc) __P((size_t)); { DBT rec; @@ -89,6 +89,11 @@ log_archive(dblp, listp, flags, db_malloc) break; case 0: if ((ret = __log_findckp(dblp, &stable_lsn)) != 0) { + /* + * A return of DB_NOTFOUND means that we didn't find + * any records in the log (so we are not going to be + * deleting any log files). + */ if (ret != DB_NOTFOUND) return (ret); *listp = NULL; @@ -269,7 +274,7 @@ lg_free: if (F_ISSET(&rec, DB_DBT_MALLOC) && rec.data != NULL) /* Get the real name. */ if ((ret = __db_appname(dblp->dbenv, - DB_APP_DATA, NULL, array[last], NULL, &real_name)) != 0) + DB_APP_DATA, NULL, array[last], 0, NULL, &real_name)) != 0) goto err2; /* If the file doesn't exist, ignore it. */ @@ -335,21 +340,25 @@ __absname(pref, name, newnamep) char *pref, *name, **newnamep; { size_t l_pref, l_name; + int isabspath; char *newname; - l_pref = strlen(pref); l_name = strlen(name); + isabspath = __db_abspath(name); + l_pref = isabspath ? 0 : strlen(pref); /* Malloc space for concatenating the two. */ - if ((newname = (char *)__db_malloc(l_pref + l_name + 2)) == NULL) + if ((*newnamep = + newname = (char *)__db_malloc(l_pref + l_name + 2)) == NULL) return (ENOMEM); - /* Build the name. */ - memcpy(newname, pref, l_pref); - if (strchr(PATH_SEPARATOR, newname[l_pref - 1]) == NULL) - newname[l_pref++] = PATH_SEPARATOR[0]; + /* Build the name. If `name' is an absolute path, ignore any prefix. */ + if (!isabspath) { + memcpy(newname, pref, l_pref); + if (strchr(PATH_SEPARATOR, newname[l_pref - 1]) == NULL) + newname[l_pref++] = PATH_SEPARATOR[0]; + } memcpy(newname + l_pref, name, l_name + 1); - *newnamep = newname; return (0); } @@ -409,5 +418,5 @@ static int __cmpfunc(p1, p2) const void *p1, *p2; { - return (strcmp(*((char **)p1), *((char **)p2))); + return (strcmp(*((char * const *)p1), *((char * const *)p2))); } diff --git a/db2/log/log_auto.c b/db2/log/log_auto.c index 2fe17834c3..b17b1ffb2f 100644 --- a/db2/log/log_auto.c +++ b/db2/log/log_auto.c @@ -15,8 +15,6 @@ #include "db_dispatch.h" #include "log.h" #include "db_am.h" -#include "common_ext.h" - /* * PUBLIC: int __log_register_log * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, @@ -92,7 +90,7 @@ int __log_register_log(logp, txnid, ret_lsnp, flags, bp += sizeof(id); memcpy(bp, &ftype, sizeof(ftype)); bp += sizeof(ftype); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -108,22 +106,23 @@ int __log_register_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__log_register_print(notused1, dbtp, lsnp, notused3, notused4) +__log_register_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __log_register_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __log_register_read(dbtp->data, &argp)) != 0) return (ret); @@ -137,20 +136,20 @@ __log_register_print(notused1, dbtp, lsnp, notused3, notused4) printf("\topcode: %lu\n", (u_long)argp->opcode); printf("\tname: "); for (i = 0; i < argp->name.size; i++) { - c = ((char *)argp->name.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->name.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tuid: "); for (i = 0; i < argp->uid.size; i++) { - c = ((char *)argp->uid.data)[i]; - if (isprint(c) || c == 0xa) - putchar(c); + ch = ((u_int8_t *)argp->uid.data)[i]; + if (isprint(ch) || ch == 0xa) + putchar(ch); else - printf("%#x ", c); + printf("%#x ", ch); } printf("\n"); printf("\tid: %lu\n", (u_long)argp->id); diff --git a/db2/log/log_compare.c b/db2/log/log_compare.c index 601b25c626..320b34af4d 100644 --- a/db2/log/log_compare.c +++ b/db2/log/log_compare.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_compare.c 10.2 (Sleepycat) 6/21/97"; +static const char sccsid[] = "@(#)log_compare.c 10.3 (Sleepycat) 4/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES diff --git a/db2/log/log_findckp.c b/db2/log/log_findckp.c index 115a00e8aa..82bd5890e6 100644 --- a/db2/log/log_findckp.c +++ b/db2/log/log_findckp.c @@ -1,21 +1,20 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_findckp.c 10.12 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)log_findckp.c 10.15 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <stdlib.h> #include <string.h> #endif @@ -44,10 +43,10 @@ static const char sccsid[] = "@(#)log_findckp.c 10.12 (Sleepycat) 10/25/97"; * We find one at 500. This means that we can truncate the log before * 500 or run recovery beginning at 500. * - * Returns 0 if we find a checkpoint. + * Returns 0 if we find a suitable checkpoint or we retrieved the + * first record in the log from which to start. + * Returns DB_NOTFOUND if there are no log records. * Returns errno on error. - * Returns DB_NOTFOUND if we could not find a suitable start point and - * we should start from the beginning. * * PUBLIC: int __log_findckp __P((DB_LOG *, DB_LSN *)); */ @@ -70,9 +69,12 @@ __log_findckp(lp, lsnp) memset(&data, 0, sizeof(data)); if (F_ISSET(lp, DB_AM_THREAD)) F_SET(&data, DB_DBT_MALLOC); - if ((ret = log_get(lp, &last_ckp, &data, DB_CHECKPOINT)) != 0) - return (ret == ENOENT ? DB_NOTFOUND : ret); ZERO_LSN(ckp_lsn); + if ((ret = log_get(lp, &last_ckp, &data, DB_CHECKPOINT)) != 0) + if (ret == ENOENT) + goto get_first; + else + return (ret); next_lsn = last_ckp; do { @@ -115,16 +117,12 @@ __log_findckp(lp, lsnp) * beginning of the log. */ if (log_compare(&last_ckp, &ckp_lsn) > 0) { - if ((ret = log_get(lp, &last_ckp, &data, DB_FIRST)) != 0) +get_first: if ((ret = log_get(lp, &last_ckp, &data, DB_FIRST)) != 0) return (ret); if (F_ISSET(lp, DB_AM_THREAD)) __db_free(data.data); } *lsnp = last_ckp; - if (verbose) - __db_err(lp->dbenv, "Rolling forward from [%lu][%lu]", - (u_long)last_ckp.file, (u_long)last_ckp.offset); - return (IS_ZERO_LSN(last_ckp) ? DB_NOTFOUND : 0); } diff --git a/db2/log/log_get.c b/db2/log/log_get.c index ab6f6247cb..9a055de0a6 100644 --- a/db2/log/log_get.c +++ b/db2/log/log_get.c @@ -1,21 +1,19 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_get.c 10.24 (Sleepycat) 1/17/98"; +static const char sccsid[] = "@(#)log_get.c 10.32 (Sleepycat) 5/6/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <fcntl.h> -#include <stdlib.h> #include <string.h> #include <unistd.h> #endif @@ -36,9 +34,8 @@ log_get(dblp, alsn, dbt, flags) DB_LOG *dblp; DB_LSN *alsn; DBT *dbt; - int flags; + u_int32_t flags; { - LOG *lp; int ret; /* Validate arguments. */ @@ -66,8 +63,6 @@ log_get(dblp, alsn, dbt, flags) return (__db_ferr(dblp->dbenv, "threaded data", 1)); } - lp = dblp->lp; - LOCK_LOGREGION(dblp); /* @@ -97,14 +92,15 @@ log_get(dblp, alsn, dbt, flags) * __log_get -- * Get a log record; internal version. * - * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, int, int)); + * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int)); */ int __log_get(dblp, alsn, dbt, flags, silent) DB_LOG *dblp; DB_LSN *alsn; DBT *dbt; - int flags, silent; + u_int32_t flags; + int silent; { DB_LSN nlsn; HDR hdr; @@ -122,7 +118,7 @@ __log_get(dblp, alsn, dbt, flags, silent) nlsn = dblp->c_lsn; switch (flags) { case DB_CHECKPOINT: - nlsn = lp->c_lsn; + nlsn = lp->chkpt_lsn; if (IS_ZERO_LSN(nlsn)) { __db_err(dblp->dbenv, "log_get: unable to find checkpoint record: no checkpoint set."); @@ -219,7 +215,8 @@ retry: } /* Seek to the header offset and read the header. */ - if ((ret = __db_seek(dblp->c_fd, 0, 0, nlsn.offset, SEEK_SET)) != 0) { + if ((ret = + __db_seek(dblp->c_fd, 0, 0, nlsn.offset, 0, SEEK_SET)) != 0) { fail = "seek"; goto err1; } @@ -272,7 +269,13 @@ retry: goto cksum; } - /* Allocate temporary memory to hold the record. */ + /* + * Allocate temporary memory to hold the record. + * + * XXX + * We're calling malloc(3) with a region locked. This isn't + * a good idea. + */ if ((tbuf = (char *)__db_malloc(len)) == NULL) { ret = ENOMEM; goto err1; diff --git a/db2/log/log_put.c b/db2/log/log_put.c index 65a3990799..d00e7dde21 100644 --- a/db2/log/log_put.c +++ b/db2/log/log_put.c @@ -1,21 +1,19 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_put.c 10.24 (Sleepycat) 1/17/98"; +static const char sccsid[] = "@(#)log_put.c 10.35 (Sleepycat) 5/6/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <fcntl.h> -#include <stdlib.h> #include <string.h> #include <time.h> #include <unistd.h> @@ -43,18 +41,19 @@ log_put(dblp, lsn, dbt, flags) DB_LOG *dblp; DB_LSN *lsn; const DBT *dbt; - int flags; + u_int32_t flags; { int ret; /* Validate arguments. */ -#define OKFLAGS (DB_CHECKPOINT | DB_FLUSH) +#define OKFLAGS (DB_CHECKPOINT | DB_FLUSH | DB_CURLSN) if (flags != 0) { if ((ret = __db_fchk(dblp->dbenv, "log_put", flags, OKFLAGS)) != 0) return (ret); switch (flags) { case DB_CHECKPOINT: + case DB_CURLSN: case DB_FLUSH: case 0: break; @@ -73,14 +72,14 @@ log_put(dblp, lsn, dbt, flags) * __log_put -- * Write a log record; internal version. * - * PUBLIC: int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, int)); + * PUBLIC: int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t)); */ int __log_put(dblp, lsn, dbt, flags) DB_LOG *dblp; DB_LSN *lsn; const DBT *dbt; - int flags; + u_int32_t flags; { DBT fid_dbt, t; DB_LSN r_unused; @@ -91,6 +90,17 @@ __log_put(dblp, lsn, dbt, flags) lp = dblp->lp; + /* + * If the application just wants to know where we are, fill in + * the information. Currently used by the transaction manager + * to avoid writing TXN_begin records. + */ + if (LF_ISSET(DB_CURLSN)) { + lsn->file = lp->lsn.file; + lsn->offset = lp->lsn.offset; + return (0); + } + /* If this information won't fit in the file, swap files. */ if (lp->lsn.offset + sizeof(HDR) + dbt->size > lp->persist.lg_max) { if (sizeof(HDR) + @@ -151,7 +161,7 @@ __log_put(dblp, lsn, dbt, flags) * Append the set of file name information into the log. */ if (flags == DB_CHECKPOINT) { - lp->c_lsn = *lsn; + lp->chkpt_lsn = *lsn; for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname); fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) { @@ -159,7 +169,7 @@ __log_put(dblp, lsn, dbt, flags) t.data = R_ADDR(dblp, fnp->name_off); t.size = strlen(t.data) + 1; memset(&fid_dbt, 0, sizeof(fid_dbt)); - fid_dbt.data = R_ADDR(dblp, fnp->fileid_off); + fid_dbt.data = fnp->ufid; fid_dbt.size = DB_FILE_ID_LEN; if ((ret = __log_register_log(dblp, NULL, &r_unused, 0, LOG_CHECKPOINT, &t, &fid_dbt, fnp->id, fnp->s_type)) @@ -324,7 +334,11 @@ __log_flush(dblp, lsn) */ lp->s_lsn = lp->f_lsn; if (!current) - --lp->s_lsn.offset; + if (lp->s_lsn.offset == 0) { + --lp->s_lsn.file; + lp->s_lsn.offset = lp->persist.lg_max; + } else + --lp->s_lsn.offset; return (0); } @@ -416,7 +430,7 @@ __log_write(dblp, addr, len) * Seek to the offset in the file (someone may have written it * since we last did). */ - if ((ret = __db_seek(dblp->lfd, 0, 0, lp->w_off, SEEK_SET)) != 0) + if ((ret = __db_seek(dblp->lfd, 0, 0, lp->w_off, 0, SEEK_SET)) != 0) return (ret); if ((ret = __db_write(dblp->lfd, addr, len, &nw)) != 0) return (ret); @@ -461,7 +475,7 @@ log_file(dblp, lsn, namep, len) return (ret); /* Check to make sure there's enough room and copy the name. */ - if (len < strlen(p)) { + if (len < strlen(p) + 1) { *namep = '\0'; return (ENOMEM); } @@ -518,5 +532,5 @@ __log_name(dblp, filenumber, namep) (void)snprintf(name, sizeof(name), LFNAME, filenumber); return (__db_appname(dblp->dbenv, - DB_APP_LOG, dblp->dir, name, NULL, namep)); + DB_APP_LOG, dblp->dir, name, 0, NULL, namep)); } diff --git a/db2/log/log_rec.c b/db2/log/log_rec.c index 69334f8bc8..5deac46298 100644 --- a/db2/log/log_rec.c +++ b/db2/log/log_rec.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -40,16 +40,13 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_rec.c 10.16 (Sleepycat) 1/17/98"; +static const char sccsid[] = "@(#)log_rec.c 10.20 (Sleepycat) 4/28/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <fcntl.h> -#include <stddef.h> -#include <stdlib.h> #include <string.h> #endif @@ -90,7 +87,7 @@ __log_register_recover(logp, dbtp, lsnp, redo, info) if ((argp->opcode == LOG_CHECKPOINT && redo == TXN_OPENFILES) || (argp->opcode == LOG_OPEN && - (redo == TXN_REDO || redo == TXN_OPENFILES || + (redo == TXN_REDO || redo == TXN_OPENFILES || redo == TXN_FORWARD_ROLL)) || (argp->opcode == LOG_CLOSE && (redo == TXN_UNDO || redo == TXN_BACKWARD_ROLL))) { @@ -121,6 +118,7 @@ __log_register_recover(logp, dbtp, lsnp, redo, info) if (!logp->dbentry[argp->id].deleted) ret = EINVAL; } else if (--logp->dbentry[argp->id].refcount == 0) { + F_SET(logp->dbentry[argp->id].dbp, DB_AM_RECOVER); ret = logp->dbentry[argp->id].dbp->close( logp->dbentry[argp->id].dbp, 0); logp->dbentry[argp->id].dbp = NULL; diff --git a/db2/log/log_register.c b/db2/log/log_register.c index 9907d6e25a..a6fc4c1b3b 100644 --- a/db2/log/log_register.c +++ b/db2/log/log_register.c @@ -1,20 +1,19 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_register.c 10.14 (Sleepycat) 1/19/98"; +static const char sccsid[] = "@(#)log_register.c 10.18 (Sleepycat) 5/3/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <stdlib.h> #include <string.h> #endif @@ -42,12 +41,12 @@ log_register(dblp, dbp, name, type, idp) u_int32_t fid; int inserted, ret; char *fullname; - void *fidp, *namep; + void *namep; fid = 0; inserted = 0; fullname = NULL; - fnp = fidp = namep = NULL; + fnp = namep = NULL; /* Check the arguments. */ if (type != DB_BTREE && type != DB_HASH && type != DB_RECNO) { @@ -57,7 +56,7 @@ log_register(dblp, dbp, name, type, idp) /* Get the log file id. */ if ((ret = __db_appname(dblp->dbenv, - DB_APP_DATA, NULL, name, NULL, &fullname)) != 0) + DB_APP_DATA, NULL, name, 0, NULL, &fullname)) != 0) return (ret); LOCK_LOGREGION(dblp); @@ -70,8 +69,7 @@ log_register(dblp, dbp, name, type, idp) fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) { if (fid <= fnp->id) fid = fnp->id + 1; - if (!memcmp(dbp->lock.fileid, - R_ADDR(dblp, fnp->fileid_off), DB_FILE_ID_LEN)) { + if (!memcmp(dbp->lock.fileid, fnp->ufid, DB_FILE_ID_LEN)) { ++fnp->ref; fid = fnp->id; goto found; @@ -84,15 +82,7 @@ log_register(dblp, dbp, name, type, idp) fnp->ref = 1; fnp->id = fid; fnp->s_type = type; - - if ((ret = __db_shalloc(dblp->addr, DB_FILE_ID_LEN, 0, &fidp)) != 0) - goto err; - /* - * XXX Now that uids are fixed size, we can put them in the fnp - * structure. - */ - fnp->fileid_off = R_OFFSET(dblp, fidp); - memcpy(fidp, dbp->lock.fileid, DB_FILE_ID_LEN); + memcpy(fnp->ufid, dbp->lock.fileid, DB_FILE_ID_LEN); len = strlen(name) + 1; if ((ret = __db_shalloc(dblp->addr, len, 0, &namep)) != 0) @@ -126,8 +116,6 @@ err: /* SH_TAILQ_REMOVE(&dblp->lp->fq, fnp, q, __fname); if (namep != NULL) __db_shalloc_free(dblp->addr, namep); - if (fidp != NULL) - __db_shalloc_free(dblp->addr, fidp); if (fnp != NULL) __db_shalloc_free(dblp->addr, fnp); } @@ -176,7 +164,7 @@ log_unregister(dblp, fid) r_name.data = R_ADDR(dblp, fnp->name_off); r_name.size = strlen(r_name.data) + 1; memset(&fid_dbt, 0, sizeof(fid_dbt)); - fid_dbt.data = R_ADDR(dblp, fnp->fileid_off); + fid_dbt.data = fnp->ufid; fid_dbt.size = DB_FILE_ID_LEN; if ((ret = __log_register_log(dblp, NULL, &r_unused, 0, LOG_CLOSE, &r_name, &fid_dbt, fid, fnp->s_type)) != 0) @@ -190,7 +178,6 @@ log_unregister(dblp, fid) if (fnp->ref > 1) --fnp->ref; else { - __db_shalloc_free(dblp->addr, R_ADDR(dblp, fnp->fileid_off)); __db_shalloc_free(dblp->addr, R_ADDR(dblp, fnp->name_off)); SH_TAILQ_REMOVE(&dblp->lp->fq, fnp, q, __fname); __db_shalloc_free(dblp->addr, fnp); diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c index c23abdda24..d89f9c2ded 100644 --- a/db2/mp/mp_bh.c +++ b/db2/mp/mp_bh.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_bh.c 10.28 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)mp_bh.c 10.38 (Sleepycat) 5/20/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -59,8 +59,10 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep) dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) if (dbmfp->mfp == mfp) { if (F_ISSET(dbmfp, MP_READONLY) && - __memp_upgrade(dbmp, dbmfp, mfp)) + __memp_upgrade(dbmp, dbmfp, mfp)) { + UNLOCKHANDLE(dbmp, dbmp->mutexp); return (0); + } break; } UNLOCKHANDLE(dbmp, dbmp->mutexp); @@ -111,8 +113,8 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep) if (F_ISSET(mfp, MP_TEMP)) return (0); - if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off), mfp->ftype, - 0, 0, mfp->stat.st_pagesize, 0, NULL, NULL, 0, &dbmfp) != 0) + if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off), + 0, 0, mfp->stat.st_pagesize, 0, NULL, &dbmfp) != 0) return (0); found: return (__memp_pgwrite(dbmfp, bhp, restartp, wrotep)); @@ -152,7 +154,7 @@ __memp_pgread(dbmfp, bhp, can_create) ret = 0; LOCKHANDLE(dbmp, dbmfp->mutexp); if (dbmfp->fd == -1 || (ret = - __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) { + __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, 0, SEEK_SET)) != 0) { if (!can_create) { if (dbmfp->fd == -1) ret = EINVAL; @@ -164,8 +166,17 @@ __memp_pgread(dbmfp, bhp, can_create) } UNLOCKHANDLE(dbmp, dbmfp->mutexp); - /* Clear any uninitialized data. */ - memset(bhp->buf, 0, pagesize); + /* Clear the created page. */ + if (mfp->clear_len == 0) + memset(bhp->buf, 0, pagesize); + else { + memset(bhp->buf, 0, mfp->clear_len); +#ifdef DIAGNOSTIC + memset(bhp->buf + mfp->clear_len, + 0xff, pagesize - mfp->clear_len); +#endif + } + goto pgin; } @@ -186,8 +197,16 @@ __memp_pgread(dbmfp, bhp, can_create) goto err; } - /* Clear any uninitialized data. */ - memset(bhp->buf + nr, 0, pagesize - nr); + /* + * If we didn't fail until we tried the read, don't clear the + * whole page, it wouldn't be insane for a filesystem to just + * always behave that way. Else, clear any uninitialized data. + */ + if (nr == 0) + memset(bhp->buf, 0, + mfp->clear_len == 0 ? pagesize : mfp->clear_len); + else + memset(bhp->buf + nr, 0, pagesize - nr); } /* Call any pgin function. */ @@ -308,31 +327,31 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep) /* Temporary files may not yet have been created. */ LOCKHANDLE(dbmp, dbmfp->mutexp); - if (dbmfp->fd == -1) - if ((ret = __db_appname(dbenv, DB_APP_TMP, - NULL, NULL, &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1) { - UNLOCKHANDLE(dbmp, dbmfp->mutexp); - __db_err(dbenv, - "unable to create temporary backing file"); - goto err; - } + if (dbmfp->fd == -1 && + ((ret = __db_appname(dbenv, DB_APP_TMP, NULL, NULL, + DB_CREATE | DB_EXCL | DB_TEMPORARY, &dbmfp->fd, NULL)) != 0 || + dbmfp->fd == -1)) { + UNLOCKHANDLE(dbmp, dbmfp->mutexp); + __db_err(dbenv, "unable to create temporary backing file"); + goto err; + } - /* Write the page out. */ - if ((ret = __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) + /* + * Write the page out. + * + * XXX + * Shut the compiler up; it doesn't understand the correlation between + * the failing clauses to __db_lseek and __db_write and this ret != 0. + */ + COMPQUIET(fail, NULL); + if ((ret = + __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, 0, SEEK_SET)) != 0) fail = "seek"; else if ((ret = __db_write(dbmfp->fd, bhp->buf, pagesize, &nw)) != 0) fail = "write"; UNLOCKHANDLE(dbmp, dbmfp->mutexp); - if (ret != 0) { - /* - * XXX - * Shut the compiler up; it doesn't understand the correlation - * between the failing clauses to __db_lseek and __db_write and - * this ret != 0. - */ - COMPQUIET(fail, NULL); + if (ret != 0) goto syserr; - } if (nw != (ssize_t)pagesize) { ret = EIO; @@ -548,7 +567,7 @@ __memp_upgrade(dbmp, dbmfp, mfp) * way we could have gotten a file descriptor of any kind. */ if ((ret = __db_appname(dbmp->dbenv, DB_APP_DATA, - NULL, R_ADDR(dbmp, mfp->path_off), NULL, &rpath)) != 0) + NULL, R_ADDR(dbmp, mfp->path_off), 0, NULL, &rpath)) != 0) return (ret); if (__db_open(rpath, 0, 0, 0, &fd) != 0) { F_SET(dbmfp, MP_UPGRADE_FAIL); diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c index f5955c4c6f..c8ae2e9d98 100644 --- a/db2/mp/mp_fget.c +++ b/db2/mp/mp_fget.c @@ -1,21 +1,19 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fget.c 10.33 (Sleepycat) 12/2/97"; +static const char sccsid[] = "@(#)mp_fget.c 10.48 (Sleepycat) 6/2/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <sys/stat.h> #include <errno.h> -#include <stdlib.h> #include <string.h> #endif @@ -25,8 +23,6 @@ static const char sccsid[] = "@(#)mp_fget.c 10.33 (Sleepycat) 12/2/97"; #include "mp.h" #include "common_ext.h" -int __sleep_on_every_page_get; /* XXX: thread debugging option. */ - /* * memp_fget -- * Get a page from the file. @@ -35,7 +31,7 @@ int memp_fget(dbmfp, pgnoaddr, flags, addrp) DB_MPOOLFILE *dbmfp; db_pgno_t *pgnoaddr; - int flags; + u_int32_t flags; void *addrp; { BH *bhp; @@ -43,11 +39,12 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) MPOOL *mp; MPOOLFILE *mfp; size_t bucket, mf_offset; - u_long cnt; - int b_incr, b_inserted, readonly_alloc, ret; - void *addr; + u_int32_t st_hsearch; + int b_incr, first, ret; dbmp = dbmfp->dbmp; + mp = dbmp->mp; + mfp = dbmfp->mfp; /* * Validate arguments. @@ -79,32 +76,62 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) } } -#ifdef DEBUG +#ifdef DIAGNOSTIC /* * XXX * We want to switch threads as often as possible. Sleep every time * we get a new page to make it more likely. */ - if (__sleep_on_every_page_get && + if (DB_GLOBAL(db_pageyield) && (__db_yield == NULL || __db_yield() != 0)) __db_sleep(0, 1); #endif - mp = dbmp->mp; - mfp = dbmfp->mfp; + /* Initialize remaining local variables. */ mf_offset = R_OFFSET(dbmp, mfp); - addr = NULL; bhp = NULL; - b_incr = b_inserted = ret = 0; + st_hsearch = 0; + b_incr = ret = 0; + + /* Determine the hash bucket where this page will live. */ + bucket = BUCKET(mp, mf_offset, *pgnoaddr); LOCKREGION(dbmp); /* - * If mmap'ing the file, just return a pointer. However, if another - * process has opened the file for writing since we mmap'd it, start - * playing the game by their rules, i.e. everything goes through the - * cache. All pages previously returned should be safe, as long as - * a locking protocol was observed. + * Check for the last or last + 1 page requests. + * + * Examine and update the file's last_pgno value. We don't care if + * the last_pgno value immediately changes due to another thread -- + * at this instant in time, the value is correct. We do increment the + * current last_pgno value if the thread is asking for a new page, + * however, to ensure that two threads creating pages don't get the + * same one. + */ + if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) { + if (LF_ISSET(DB_MPOOL_NEW)) + ++mfp->last_pgno; + *pgnoaddr = mfp->last_pgno; + bucket = BUCKET(mp, mf_offset, mfp->last_pgno); + + if (LF_ISSET(DB_MPOOL_NEW)) + goto alloc; + } + + /* + * If mmap'ing the file and the page is not past the end of the file, + * just return a pointer. + * + * The page may be past the end of the file, so check the page number + * argument against the original length of the file. If we previously + * returned pages past the original end of the file, last_pgno will + * have been updated to match the "new" end of the file, and checking + * against it would return pointers past the end of the mmap'd region. + * + * If another process has opened the file for writing since we mmap'd + * it, we will start playing the game by their rules, i.e. everything + * goes through the cache. All pages previously returned will be safe, + * as long as the correct locking protocol was observed. * * XXX * We don't discard the map because we don't know when all of the @@ -112,203 +139,180 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) * It would be possible to do so by reference counting the open * pages from the mmap, but it's unclear to me that it's worth it. */ - if (dbmfp->addr != NULL && F_ISSET(dbmfp->mfp, MP_CAN_MMAP)) { - readonly_alloc = 0; - if (LF_ISSET(DB_MPOOL_LAST)) - *pgnoaddr = mfp->last_pgno; - else { + if (dbmfp->addr != NULL && F_ISSET(mfp, MP_CAN_MMAP)) + if (*pgnoaddr > mfp->orig_last_pgno) { /* * !!! - * Allocate a page that can never really exist. See - * the comment above about non-existent pages and the - * hash access method. + * See the comment above about non-existent pages and + * the hash access method. */ - if (LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW)) - readonly_alloc = 1; - else if (*pgnoaddr > mfp->last_pgno) { + if (!LF_ISSET(DB_MPOOL_CREATE)) { __db_err(dbmp->dbenv, "%s: page %lu doesn't exist", __memp_fn(dbmfp), (u_long)*pgnoaddr); ret = EINVAL; goto err; } - } - if (!readonly_alloc) { - addr = R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize); - + } else { + *(void **)addrp = + R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize); ++mp->stat.st_map; ++mfp->stat.st_map; + goto done; + } - goto mapret; + /* Search the hash chain for the page. */ + for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) { + ++st_hsearch; + if (bhp->pgno != *pgnoaddr || bhp->mf_offset != mf_offset) + continue; + + /* Increment the reference count. */ + if (bhp->ref == UINT16_T_MAX) { + __db_err(dbmp->dbenv, + "%s: page %lu: reference count overflow", + __memp_fn(dbmfp), (u_long)bhp->pgno); + ret = EINVAL; + goto err; } - } - /* Check if requesting the last page or a new page. */ - if (LF_ISSET(DB_MPOOL_LAST)) - *pgnoaddr = mfp->last_pgno; + /* + * Increment the reference count. We may discard the region + * lock as we evaluate and/or read the buffer, so we need to + * ensure that it doesn't move and that its contents remain + * unchanged. + */ + ++bhp->ref; + b_incr = 1; - if (LF_ISSET(DB_MPOOL_NEW)) { - *pgnoaddr = mfp->last_pgno + 1; - goto alloc; - } + /* + * Any buffer we find might be trouble. + * + * BH_LOCKED -- + * I/O is in progress. Because we've incremented the buffer + * reference count, we know the buffer can't move. Unlock + * the region lock, wait for the I/O to complete, and reacquire + * the region. + */ + for (first = 1; F_ISSET(bhp, BH_LOCKED); first = 0) { + UNLOCKREGION(dbmp); - /* Check the BH hash bucket queue. */ - bucket = BUCKET(mp, mf_offset, *pgnoaddr); - for (cnt = 0, - bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) { - ++cnt; - if (bhp->pgno == *pgnoaddr && bhp->mf_offset == mf_offset) { - addr = bhp->buf; - ++mp->stat.st_hash_searches; - if (cnt > mp->stat.st_hash_longest) - mp->stat.st_hash_longest = cnt; - mp->stat.st_hash_examined += cnt; - goto found; + /* + * Explicitly yield the processor if it's not the first + * pass through this loop -- if we don't, we might end + * up running to the end of our CPU quantum as we will + * simply be swapping between the two locks. + */ + if (!first && (__db_yield == NULL || __db_yield() != 0)) + __db_sleep(0, 1); + + LOCKBUFFER(dbmp, bhp); + /* Wait for I/O to finish... */ + UNLOCKBUFFER(dbmp, bhp); + LOCKREGION(dbmp); } - } - if (cnt != 0) { - ++mp->stat.st_hash_searches; - if (cnt > mp->stat.st_hash_longest) - mp->stat.st_hash_longest = cnt; - mp->stat.st_hash_examined += cnt; + + /* + * BH_TRASH -- + * The contents of the buffer are garbage. Shouldn't happen, + * and this read is likely to fail, but might as well try. + */ + if (F_ISSET(bhp, BH_TRASH)) + goto reread; + + /* + * BH_CALLPGIN -- + * The buffer was converted so it could be written, and the + * contents need to be converted again. + */ + if (F_ISSET(bhp, BH_CALLPGIN)) { + if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0) + goto err; + F_CLR(bhp, BH_CALLPGIN); + } + + ++mp->stat.st_cache_hit; + ++mfp->stat.st_cache_hit; + *(void **)addrp = bhp->buf; + goto done; } -alloc: /* - * Allocate a new buffer header and data space, and mark the contents - * as useless. - */ +alloc: /* Allocate new buffer header and data space. */ if ((ret = __memp_ralloc(dbmp, sizeof(BH) - sizeof(u_int8_t) + mfp->stat.st_pagesize, NULL, &bhp)) != 0) goto err; - addr = bhp->buf; -#ifdef DEBUG - if ((ALIGNTYPE)addr & (sizeof(size_t) - 1)) { + +#ifdef DIAGNOSTIC + if ((ALIGNTYPE)bhp->buf & (sizeof(size_t) - 1)) { __db_err(dbmp->dbenv, "Internal error: BH data NOT size_t aligned."); - abort(); + ret = EINVAL; + goto err; } #endif + /* Initialize the BH fields. */ memset(bhp, 0, sizeof(BH)); LOCKINIT(dbmp, &bhp->mutex); + bhp->ref = 1; + bhp->pgno = *pgnoaddr; + bhp->mf_offset = mf_offset; /* * Prepend the bucket header to the head of the appropriate MPOOL * bucket hash list. Append the bucket header to the tail of the * MPOOL LRU chain. - * - * We have to do this before we read in the page so we can discard - * our region lock without screwing up the world. */ - bucket = BUCKET(mp, mf_offset, *pgnoaddr); SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, hq, __bh); SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q); - ++mp->stat.st_page_clean; - b_inserted = 1; - - /* Set the page number, and associated MPOOLFILE. */ - bhp->mf_offset = mf_offset; - bhp->pgno = *pgnoaddr; /* - * If we know we created the page, zero it out and continue. + * If we created the page, zero it out and continue. * * !!! - * Note: DB_MPOOL_NEW deliberately doesn't call the pgin function. + * Note: DB_MPOOL_NEW specifically doesn't call the pgin function. * If DB_MPOOL_CREATE is used, then the application's pgin function * has to be able to handle pages of 0's -- if it uses DB_MPOOL_NEW, * it can detect all of its page creates, and not bother. * * Otherwise, read the page into memory, optionally creating it if * DB_MPOOL_CREATE is set. - * - * Increment the reference count for created buffers, but importantly, - * increment the reference count for buffers we're about to read so - * that the buffer can't move. */ - ++bhp->ref; - b_incr = 1; + if (LF_ISSET(DB_MPOOL_NEW)) { + if (mfp->clear_len == 0) + memset(bhp->buf, 0, mfp->stat.st_pagesize); + else { + memset(bhp->buf, 0, mfp->clear_len); +#ifdef DIAGNOSTIC + memset(bhp->buf + mfp->clear_len, 0xff, + mfp->stat.st_pagesize - mfp->clear_len); +#endif + } - if (LF_ISSET(DB_MPOOL_NEW)) - memset(addr, 0, mfp->stat.st_pagesize); - else { + ++mp->stat.st_page_create; + ++mfp->stat.st_page_create; + } else { /* * It's possible for the read function to fail, which means - * that we fail as well. + * that we fail as well. Note, the __memp_pgread() function + * discards the region lock, so the buffer must be pinned + * down so that it cannot move and its contents are unchanged. */ reread: if ((ret = __memp_pgread(dbmfp, - bhp, LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))) != 0) - goto err; - - /* - * !!! - * The __memp_pgread call discarded and reacquired the region - * lock. Because the buffer reference count was incremented - * before the region lock was discarded the buffer can't move - * and its contents can't change. - */ - ++mp->stat.st_cache_miss; - ++mfp->stat.st_cache_miss; - } - - if (0) { -found: /* Increment the reference count. */ - if (bhp->ref == UINT16_T_MAX) { - __db_err(dbmp->dbenv, - "%s: too many references to page %lu", - __memp_fn(dbmfp), bhp->pgno); - ret = EINVAL; - goto err; - } - ++bhp->ref; - b_incr = 1; - - /* - * Any found buffer might be trouble. - * - * BH_LOCKED -- - * I/O in progress, wait for it to finish. Because the buffer - * reference count was incremented before the region lock was - * discarded we know the buffer can't move and its contents - * can't change. - */ - for (cnt = 0; F_ISSET(bhp, BH_LOCKED); ++cnt) { - UNLOCKREGION(dbmp); - + bhp, LF_ISSET(DB_MPOOL_CREATE))) != 0) { /* - * Sleep so that we don't simply spin, switching locks. - * (See the comment in include/mp.h.) + * !!! + * Discard the buffer unless another thread is waiting + * on our I/O to complete. Regardless, the header has + * the BH_TRASH flag set. */ - if (cnt != 0 && - (__db_yield == NULL || __db_yield() != 0)) - __db_sleep(0, 1); - - LOCKBUFFER(dbmp, bhp); - /* Waiting for I/O to finish... */ - UNLOCKBUFFER(dbmp, bhp); - LOCKREGION(dbmp); - } - - /* - * BH_TRASH -- - * The buffer is garbage. - */ - if (F_ISSET(bhp, BH_TRASH)) - goto reread; - - /* - * BH_CALLPGIN -- - * The buffer was written, and the contents need to be - * converted again. - */ - if (F_ISSET(bhp, BH_CALLPGIN)) { - if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0) - goto err; - F_CLR(bhp, BH_CALLPGIN); + if (bhp->ref == 1) + __memp_bhfree(dbmp, mfp, bhp, 1); + goto err; } - ++mp->stat.st_cache_hit; - ++mfp->stat.st_cache_hit; + ++mp->stat.st_cache_miss; + ++mfp->stat.st_cache_miss; } /* @@ -319,23 +323,30 @@ found: /* Increment the reference count. */ if (bhp->pgno > mfp->last_pgno) mfp->last_pgno = bhp->pgno; -mapret: LOCKHANDLE(dbmp, dbmfp->mutexp); + ++mp->stat.st_page_clean; + *(void **)addrp = bhp->buf; + +done: /* Update the chain search statistics. */ + if (st_hsearch) { + ++mp->stat.st_hash_searches; + if (st_hsearch > mp->stat.st_hash_longest) + mp->stat.st_hash_longest = st_hsearch; + mp->stat.st_hash_examined += st_hsearch; + } + + UNLOCKREGION(dbmp); + + LOCKHANDLE(dbmp, dbmfp->mutexp); ++dbmfp->pinref; UNLOCKHANDLE(dbmp, dbmfp->mutexp); - if (0) { -err: /* - * If no other process is already waiting on a created buffer, - * go ahead and discard it, it's not useful. - */ - if (b_incr) - --bhp->ref; - if (b_inserted && bhp->ref == 0) - __memp_bhfree(dbmp, mfp, bhp, 1); - } + return (0); +err: /* Discard our reference. */ + if (b_incr) + --bhp->ref; UNLOCKREGION(dbmp); - *(void **)addrp = addr; + *(void **)addrp = NULL; return (ret); } diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c index 0f41122373..a4cbac8d4e 100644 --- a/db2/mp/mp_fopen.c +++ b/db2/mp/mp_fopen.c @@ -1,24 +1,20 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fopen.c 10.37 (Sleepycat) 1/18/98"; +static const char sccsid[] = "@(#)mp_fopen.c 10.47 (Sleepycat) 5/4/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <sys/mman.h> -#include <sys/stat.h> #include <errno.h> -#include <stdlib.h> #include <string.h> -#include <unistd.h> #endif #include "db_int.h" @@ -28,22 +24,21 @@ static const char sccsid[] = "@(#)mp_fopen.c 10.37 (Sleepycat) 1/18/98"; #include "common_ext.h" static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *)); -static int __memp_mf_open __P((DB_MPOOL *, const char *, - int, size_t, db_pgno_t, int, DBT *, u_int8_t *, MPOOLFILE **)); +static int __memp_mf_open __P((DB_MPOOL *, + const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **)); /* * memp_fopen -- * Open a backing file for the memory pool. */ int -memp_fopen(dbmp, path, ftype, - flags, mode, pagesize, lsn_offset, pgcookie, fileid, retp) +memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp) DB_MPOOL *dbmp; const char *path; - int ftype, flags, mode, lsn_offset; + u_int32_t flags; + int mode; size_t pagesize; - DBT *pgcookie; - u_int8_t *fileid; + DB_MPOOL_FINFO *finfop; DB_MPOOLFILE **retp; { int ret; @@ -59,31 +54,31 @@ memp_fopen(dbmp, path, ftype, return (EINVAL); } - return (__memp_fopen(dbmp, NULL, path, ftype, - flags, mode, pagesize, lsn_offset, pgcookie, fileid, 1, retp)); + return (__memp_fopen(dbmp, + NULL, path, flags, mode, pagesize, 1, finfop, retp)); } /* * __memp_fopen -- * Open a backing file for the memory pool; internal version. * - * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, int, - * PUBLIC: int, int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **)); + * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, + * PUBLIC: u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **)); */ int -__memp_fopen(dbmp, mfp, path, - ftype, flags, mode, pagesize, lsn_offset, pgcookie, fileid, needlock, retp) +__memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp) DB_MPOOL *dbmp; MPOOLFILE *mfp; const char *path; - int ftype, flags, mode, lsn_offset, needlock; + u_int32_t flags; + int mode, needlock; size_t pagesize; - DBT *pgcookie; - u_int8_t *fileid; + DB_MPOOL_FINFO *finfop; DB_MPOOLFILE **retp; { DB_ENV *dbenv; DB_MPOOLFILE *dbmfp; + DB_MPOOL_FINFO finfo; db_pgno_t last_pgno; size_t size; u_int32_t mbytes, bytes; @@ -91,18 +86,34 @@ __memp_fopen(dbmp, mfp, path, u_int8_t idbuf[DB_FILE_ID_LEN]; char *rpath; - /* - * XXX - * If mfp is provided, the following arguments do NOT need to be - * specified: - * lsn_offset - * pgcookie - * fileid - */ dbenv = dbmp->dbenv; ret = 0; rpath = NULL; + /* + * If mfp is provided, we take the DB_MPOOL_FINFO information from + * the mfp. We don't bother initializing everything, because some + * of them are expensive to acquire. If no mfp is provided and the + * finfop argument is NULL, we default the values. + */ + if (finfop == NULL) { + memset(&finfo, 0, sizeof(finfo)); + if (mfp != NULL) { + finfo.ftype = mfp->ftype; + finfo.pgcookie = NULL; + finfo.fileid = NULL; + finfo.lsn_offset = mfp->lsn_off; + finfo.clear_len = mfp->clear_len; + } else { + finfo.ftype = 0; + finfo.pgcookie = NULL; + finfo.fileid = NULL; + finfo.lsn_offset = -1; + finfo.clear_len = 0; + } + finfop = &finfo; + } + /* Allocate and initialize the per-process structure. */ if ((dbmfp = (DB_MPOOLFILE *)__db_calloc(1, sizeof(DB_MPOOLFILE))) == NULL) { @@ -126,11 +137,11 @@ __memp_fopen(dbmp, mfp, path, } else { /* Get the real name for this file and open it. */ if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, path, NULL, &rpath)) != 0) + DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0) goto err; if ((ret = __db_open(rpath, - LF_ISSET(DB_CREATE | DB_RDONLY), DB_CREATE | DB_RDONLY, - mode, &dbmfp->fd)) != 0) { + LF_ISSET(DB_CREATE | DB_RDONLY), + DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) { __db_err(dbenv, "%s: %s", rpath, strerror(ret)); goto err; } @@ -156,12 +167,11 @@ __memp_fopen(dbmp, mfp, path, * don't use timestamps, otherwise there'd be no chance of any * other process joining the party. */ - if (mfp == NULL && fileid == NULL) { + if (finfop->fileid == NULL) { if ((ret = __db_fileid(dbenv, rpath, 0, idbuf)) != 0) goto err; - fileid = idbuf; + finfop->fileid = idbuf; } - FREES(rpath); } /* @@ -173,8 +183,8 @@ __memp_fopen(dbmp, mfp, path, LOCKREGION(dbmp); if (mfp == NULL) - ret = __memp_mf_open(dbmp, path, ftype, - pagesize, last_pgno, lsn_offset, pgcookie, fileid, &mfp); + ret = __memp_mf_open(dbmp, + path, pagesize, last_pgno, finfop, &mfp); else { ++mfp->ref; ret = 0; @@ -218,7 +228,7 @@ __memp_fopen(dbmp, mfp, path, F_CLR(mfp, MP_CAN_MMAP); if (path == NULL) F_CLR(mfp, MP_CAN_MMAP); - if (ftype != 0) + if (finfop->ftype != 0) F_CLR(mfp, MP_CAN_MMAP); if (LF_ISSET(DB_NOMMAP)) F_CLR(mfp, MP_CAN_MMAP); @@ -229,11 +239,14 @@ __memp_fopen(dbmp, mfp, path, dbmfp->addr = NULL; if (F_ISSET(mfp, MP_CAN_MMAP)) { dbmfp->len = size; - if (__db_map(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) { + if (__db_mapfile(rpath, + dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) { dbmfp->addr = NULL; F_CLR(mfp, MP_CAN_MMAP); } } + if (rpath != NULL) + FREES(rpath); LOCKHANDLE(dbmp, dbmp->mutexp); TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q); @@ -260,15 +273,12 @@ err: /* * Open an MPOOLFILE. */ static int -__memp_mf_open(dbmp, path, - ftype, pagesize, last_pgno, lsn_offset, pgcookie, fileid, retp) +__memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp) DB_MPOOL *dbmp; const char *path; - int ftype, lsn_offset; size_t pagesize; db_pgno_t last_pgno; - DBT *pgcookie; - u_int8_t *fileid; + DB_MPOOL_FINFO *finfop; MPOOLFILE **retp; { MPOOLFILE *mfp; @@ -286,12 +296,13 @@ __memp_mf_open(dbmp, path, mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { if (F_ISSET(mfp, MP_TEMP)) continue; - if (!memcmp(fileid, + if (!memcmp(finfop->fileid, R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) { - if (ftype != mfp->ftype || + if (finfop->clear_len != mfp->clear_len || + finfop->ftype != mfp->ftype || pagesize != mfp->stat.st_pagesize) { __db_err(dbmp->dbenv, - "%s: ftype or pagesize changed", + "%s: ftype, clear length or pagesize changed", path); return (EINVAL); } @@ -311,8 +322,9 @@ __memp_mf_open(dbmp, path, /* Initialize the structure. */ memset(mfp, 0, sizeof(MPOOLFILE)); mfp->ref = 1; - mfp->ftype = ftype; - mfp->lsn_off = lsn_offset; + mfp->ftype = finfop->ftype; + mfp->lsn_off = finfop->lsn_offset; + mfp->clear_len = finfop->clear_len; /* * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget, @@ -320,7 +332,7 @@ __memp_mf_open(dbmp, path, * it away. */ mfp->stat.st_pagesize = pagesize; - mfp->last_pgno = last_pgno; + mfp->orig_last_pgno = mfp->last_pgno = last_pgno; F_SET(mfp, MP_CAN_MMAP); if (ISTEMPORARY) @@ -336,19 +348,19 @@ __memp_mf_open(dbmp, path, if ((ret = __memp_ralloc(dbmp, DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0) goto err; - memcpy(p, fileid, DB_FILE_ID_LEN); + memcpy(p, finfop->fileid, DB_FILE_ID_LEN); } /* Copy the page cookie into shared memory. */ - if (pgcookie == NULL || pgcookie->size == 0) { + if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) { mfp->pgcookie_len = 0; mfp->pgcookie_off = 0; } else { if ((ret = __memp_ralloc(dbmp, - pgcookie->size, &mfp->pgcookie_off, &p)) != 0) + finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0) goto err; - memcpy(p, pgcookie->data, pgcookie->size); - mfp->pgcookie_len = pgcookie->size; + memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size); + mfp->pgcookie_len = finfop->pgcookie->size; } /* Prepend the MPOOLFILE to the list of MPOOLFILE's. */ @@ -397,7 +409,7 @@ memp_fclose(dbmfp) /* Discard any mmap information. */ if (dbmfp->addr != NULL && - (ret = __db_unmap(dbmfp->addr, dbmfp->len)) != 0) + (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0) __db_err(dbmp->dbenv, "%s: %s", __memp_fn(dbmfp), strerror(ret)); @@ -480,13 +492,13 @@ __memp_mf_close(dbmp, dbmfp) SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile); /* Free the space. */ - __db_shalloc_free(dbmp->addr, mfp); if (mfp->path_off != 0) __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off)); if (mfp->fileid_off != 0) __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off)); if (mfp->pgcookie_off != 0) __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off)); + __db_shalloc_free(dbmp->addr, mfp); ret1: UNLOCKREGION(dbmp); return (0); diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c index 335ee9ff16..5675493137 100644 --- a/db2/mp/mp_fput.c +++ b/db2/mp/mp_fput.c @@ -1,20 +1,19 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fput.c 10.17 (Sleepycat) 12/20/97"; +static const char sccsid[] = "@(#)mp_fput.c 10.22 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <stdlib.h> #endif #include "db_int.h" @@ -31,12 +30,11 @@ int memp_fput(dbmfp, pgaddr, flags) DB_MPOOLFILE *dbmfp; void *pgaddr; - int flags; + u_int32_t flags; { BH *bhp; DB_MPOOL *dbmp; MPOOL *mp; - MPOOLFILE *mfp; int wrote, ret; dbmp = dbmfp->dbmp; @@ -71,8 +69,9 @@ memp_fput(dbmfp, pgaddr, flags) /* * If we're mapping the file, there's nothing to do. Because we can - * quit mapping at any time, we have to check on each buffer to see - * if it's in the map region. + * stop mapping the file at any time, we have to check on each buffer + * to see if the address we gave the application was part of the map + * region. */ if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr && (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len) @@ -98,36 +97,33 @@ memp_fput(dbmfp, pgaddr, flags) F_SET(bhp, BH_DISCARD); /* - * If more than one reference to the page, we're done. Ignore discard - * flags (for now) and leave it at its position in the LRU chain. The - * rest gets done at last reference close. + * Check for a reference count going to zero. This can happen if the + * application returns a page twice. */ -#ifdef DEBUG if (bhp->ref == 0) { - __db_err(dbmp->dbenv, - "Unpinned page returned: reference count on page %lu went negative.", - (u_long)bhp->pgno); - abort(); + __db_err(dbmp->dbenv, "%s: page %lu: unpinned page returned", + __memp_fn(dbmfp), (u_long)bhp->pgno); + UNLOCKREGION(dbmp); + return (EINVAL); } -#endif + + /* + * If more than one reference to the page, we're done. Ignore the + * discard flags (for now) and leave it at its position in the LRU + * chain. The rest gets done at last reference close. + */ if (--bhp->ref > 0) { UNLOCKREGION(dbmp); return (0); } - /* Move the buffer to the head/tail of the LRU chain. */ - SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh); - if (F_ISSET(bhp, BH_DISCARD)) - SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh); - else - SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q); - /* - * If this buffer is scheduled for writing because of a checkpoint, - * write it now. If we can't write it, set a flag so that the next - * time the memp_sync function is called we try writing it there, - * as the checkpoint application better be able to write all of the - * files. + * If this buffer is scheduled for writing because of a checkpoint, we + * need to write it (if we marked it dirty), or update the checkpoint + * counters (if we didn't mark it dirty). If we try to write it and + * can't, that's not necessarily an error, but set a flag so that the + * next time the memp_sync function runs we try writing it there, as + * the checkpoint application better be able to write all of the files. */ if (F_ISSET(bhp, BH_WRITE)) if (F_ISSET(bhp, BH_DIRTY)) { @@ -137,12 +133,18 @@ memp_fput(dbmfp, pgaddr, flags) } else { F_CLR(bhp, BH_WRITE); - mfp = R_ADDR(dbmp, bhp->mf_offset); - --mfp->lsn_cnt; - + --dbmfp->mfp->lsn_cnt; --mp->lsn_cnt; } + /* Move the buffer to the head/tail of the LRU chain. */ + SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh); + if (F_ISSET(bhp, BH_DISCARD)) + SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh); + else + SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q); + + UNLOCKREGION(dbmp); return (0); } diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c index 2eff7dd74c..3b352aa553 100644 --- a/db2/mp/mp_fset.c +++ b/db2/mp/mp_fset.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fset.c 10.12 (Sleepycat) 11/26/97"; +static const char sccsid[] = "@(#)mp_fset.c 10.15 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -30,16 +30,14 @@ int memp_fset(dbmfp, pgaddr, flags) DB_MPOOLFILE *dbmfp; void *pgaddr; - int flags; + u_int32_t flags; { BH *bhp; DB_MPOOL *dbmp; MPOOL *mp; - MPOOLFILE *mfp; int ret; dbmp = dbmfp->dbmp; - mfp = dbmfp->mfp; mp = dbmp->mp; /* Validate arguments. */ diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c index ca81f8d6d6..fc985bc521 100644 --- a/db2/mp/mp_open.c +++ b/db2/mp/mp_open.c @@ -1,23 +1,20 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_open.c 10.16 (Sleepycat) 11/28/97"; +static const char sccsid[] = "@(#)mp_open.c 10.23 (Sleepycat) 5/3/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> -#include <fcntl.h> -#include <stdlib.h> #include <string.h> -#include <unistd.h> #endif #include "db_int.h" @@ -33,13 +30,14 @@ static const char sccsid[] = "@(#)mp_open.c 10.16 (Sleepycat) 11/28/97"; int memp_open(path, flags, mode, dbenv, retp) const char *path; - int flags, mode; + u_int32_t flags; + int mode; DB_ENV *dbenv; DB_MPOOL **retp; { DB_MPOOL *dbmp; size_t cachesize; - int ret; + int is_private, ret; /* Validate arguments. */ #ifdef HAVE_SPINLOCKS @@ -62,15 +60,16 @@ memp_open(path, flags, mode, dbenv, retp) dbmp->dbenv = dbenv; /* Decide if it's possible for anyone else to access the pool. */ - if ((dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE)) - F_SET(dbmp, MP_ISPRIVATE); + is_private = + (dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE); /* * Map in the region. We do locking regardless, as portions of it are * implemented in common code (if we put the region in a file, that is). */ F_SET(dbmp, MP_LOCKREGION); - if ((ret = __memp_ropen(dbmp, path, cachesize, mode, flags)) != 0) + if ((ret = __memp_ropen(dbmp, + path, cachesize, mode, is_private, LF_ISSET(DB_CREATE))) != 0) goto err; F_CLR(dbmp, MP_LOCKREGION); @@ -79,7 +78,7 @@ memp_open(path, flags, mode, dbenv, retp) * If it's threaded, then we have to lock both the handles and the * region, and we need to allocate a mutex for that purpose. */ - if (!F_ISSET(dbmp, MP_ISPRIVATE)) + if (!is_private) F_SET(dbmp, MP_LOCKREGION); if (LF_ISSET(DB_THREAD)) { F_SET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION); @@ -135,10 +134,11 @@ memp_close(dbmp) } /* Close the region. */ - if ((t_ret = __memp_rclose(dbmp)) && ret == 0) + if ((t_ret = __db_rdetach(&dbmp->reginfo)) != 0 && ret == 0) ret = t_ret; - /* Discard the structure. */ + if (dbmp->reginfo.path != NULL) + FREES(dbmp->reginfo.path); FREE(dbmp, sizeof(DB_MPOOL)); return (ret); @@ -154,8 +154,19 @@ memp_unlink(path, force, dbenv) int force; DB_ENV *dbenv; { - return (__db_runlink(dbenv, - DB_APP_NONE, path, DB_DEFAULT_MPOOL_FILE, force)); + REGINFO reginfo; + int ret; + + memset(®info, 0, sizeof(reginfo)); + reginfo.dbenv = dbenv; + reginfo.appname = DB_APP_NONE; + if (path != NULL && (reginfo.path = __db_strdup(path)) == NULL) + return (ENOMEM); + reginfo.file = DB_DEFAULT_MPOOL_FILE; + ret = __db_runlink(®info, force); + if (reginfo.path != NULL) + FREES(reginfo.path); + return (ret); } /* diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c index 13a6c62d35..e83e0f44fa 100644 --- a/db2/mp/mp_pr.c +++ b/db2/mp/mp_pr.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_pr.c 10.21 (Sleepycat) 1/6/98"; +static const char sccsid[] = "@(#)mp_pr.c 10.26 (Sleepycat) 5/23/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -15,20 +15,20 @@ static const char sccsid[] = "@(#)mp_pr.c 10.21 (Sleepycat) 1/6/98"; #include <errno.h> #include <stdio.h> -#include <stdlib.h> #include <string.h> #include <unistd.h> #endif #include "db_int.h" +#include "db_page.h" #include "shqueue.h" #include "db_shash.h" #include "mp.h" +#include "db_auto.h" +#include "db_ext.h" +#include "common_ext.h" -static void __memp_pbh __P((FILE *, DB_MPOOL *, BH *, int)); -static void __memp_pdbmf __P((FILE *, DB_MPOOLFILE *, int)); -static void __memp_pmf __P((FILE *, MPOOLFILE *, int)); -static void __memp_pmp __P((FILE *, DB_MPOOL *, MPOOL *, int)); +static void __memp_pbh __P((DB_MPOOL *, BH *, size_t *, FILE *)); /* * memp_stat -- @@ -64,6 +64,8 @@ memp_stat(dbmp, gspp, fspp, db_malloc) dbmp->mp->rlayout.lock.mutex_set_wait; (*gspp)->st_region_nowait = dbmp->mp->rlayout.lock.mutex_set_nowait; + (*gspp)->st_refcnt = dbmp->mp->rlayout.refcnt; + (*gspp)->st_regsize = dbmp->mp->rlayout.size; UNLOCKREGION(dbmp); } @@ -77,7 +79,8 @@ memp_stat(dbmp, gspp, fspp, db_malloc) for (len = 0, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); mfp != NULL; - ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)); + ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) + ; UNLOCKREGION(dbmp); @@ -148,174 +151,118 @@ __memp_fns(dbmp, mfp) return ((char *)R_ADDR(dbmp, mfp->path_off)); } +#define FMAP_ENTRIES 200 /* Files we map. */ + +#define MPOOL_DUMP_HASH 0x01 /* Debug hash chains. */ +#define MPOOL_DUMP_LRU 0x02 /* Debug LRU chains. */ +#define MPOOL_DUMP_MEM 0x04 /* Debug region memory. */ +#define MPOOL_DUMP_ALL 0x07 /* Debug all. */ + + /* - * __memp_debug -- + * __memp_dump_region -- * Display MPOOL structures. * - * PUBLIC: void __memp_debug __P((DB_MPOOL *, FILE *, int)); + * PUBLIC: void __memp_dump_region __P((DB_MPOOL *, char *, FILE *)); */ void -__memp_debug(dbmp, fp, data) +__memp_dump_region(dbmp, area, fp) DB_MPOOL *dbmp; + char *area; FILE *fp; - int data; { + BH *bhp; + DB_HASHTAB *htabp; DB_MPOOLFILE *dbmfp; - u_long cnt; + MPOOL *mp; + MPOOLFILE *mfp; + size_t bucket, fmap[FMAP_ENTRIES + 1]; + u_int32_t flags; + int cnt; /* Make it easy to call from the debugger. */ if (fp == NULL) fp = stderr; - /* Welcome message. */ - (void)fprintf(fp, "%s\nMpool per-process (%lu) statistics\n", - DB_LINE, (u_long)getpid()); - - if (data) - (void)fprintf(fp, " fd: %d; addr %lx; maddr %lx\n", - dbmp->fd, (u_long)dbmp->addr, (u_long)dbmp->maddr); - - /* Display the DB_MPOOLFILE structures. */ - for (cnt = 0, dbmfp = TAILQ_FIRST(&dbmp->dbmfq); - dbmfp != NULL; ++cnt, dbmfp = TAILQ_NEXT(dbmfp, q)); - (void)fprintf(fp, "%lu process-local files\n", cnt); - for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq); - dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) { - (void)fprintf(fp, "%s\n", __memp_fn(dbmfp)); - __memp_pdbmf(fp, dbmfp, data); - } + for (flags = 0; *area != '\0'; ++area) + switch (*area) { + case 'A': + LF_SET(MPOOL_DUMP_ALL); + break; + case 'h': + LF_SET(MPOOL_DUMP_HASH); + break; + case 'l': + LF_SET(MPOOL_DUMP_LRU); + break; + case 'm': + LF_SET(MPOOL_DUMP_MEM); + break; + } - /* Switch to global statistics. */ - (void)fprintf(fp, "\n%s\nMpool statistics\n", DB_LINE); + LOCKREGION(dbmp); - /* Display the MPOOL structure. */ - __memp_pmp(fp, dbmp, dbmp->mp, data); + mp = dbmp->mp; - /* Flush in case we're debugging. */ - (void)fflush(fp); -} - -/* - * __memp_pdbmf -- - * Display a DB_MPOOLFILE structure. - */ -static void -__memp_pdbmf(fp, dbmfp, data) - FILE *fp; - DB_MPOOLFILE *dbmfp; - int data; -{ - if (!data) - return; - - (void)fprintf(fp, " fd: %d; %s\n", - dbmfp->fd, F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write"); -} - -/* - * __memp_pmp -- - * Display the MPOOL structure. - */ -static void -__memp_pmp(fp, dbmp, mp, data) - FILE *fp; - DB_MPOOL *dbmp; - MPOOL *mp; - int data; -{ - BH *bhp; - MPOOLFILE *mfp; - DB_HASHTAB *htabp; - size_t bucket; - int cnt; - const char *sep; - - (void)fprintf(fp, "references: %lu; cachesize: %lu\n", - (u_long)mp->rlayout.refcnt, (u_long)mp->stat.st_cachesize); - (void)fprintf(fp, - " %lu pages created\n", (u_long)mp->stat.st_page_create); - (void)fprintf(fp, - " %lu mmap pages returned\n", (u_long)mp->stat.st_map); - (void)fprintf(fp, " %lu I/O's (%lu read, %lu written)\n", - (u_long)mp->stat.st_page_in + mp->stat.st_page_out, - (u_long)mp->stat.st_page_in, (u_long)mp->stat.st_page_out); - if (mp->stat.st_cache_hit + mp->stat.st_cache_miss != 0) - (void)fprintf(fp, - " %.0f%% cache hit rate (%lu hit, %lu miss)\n", - ((double)mp->stat.st_cache_hit / - (mp->stat.st_cache_hit + mp->stat.st_cache_miss)) * 100, - (u_long)mp->stat.st_cache_hit, - (u_long)mp->stat.st_cache_miss); + /* Display MPOOL structures. */ + (void)fprintf(fp, "%s\nPool (region addr 0x%lx, alloc addr 0x%lx)\n", + DB_LINE, (u_long)dbmp->reginfo.addr, (u_long)dbmp->addr); /* Display the MPOOLFILE structures. */ - for (cnt = 0, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); - mfp != NULL; ++cnt, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)); - (void)fprintf(fp, "%d total files\n", cnt); - for (cnt = 1, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); - mfp != NULL; ++cnt, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { - (void)fprintf(fp, "file %d\n", cnt); - __memp_pmf(fp, mfp, data); + cnt = 0; + for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); + mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile), ++cnt) { + (void)fprintf(fp, "file #%d: %s: %lu references: %s\n", + cnt + 1, __memp_fns(dbmp, mfp), (u_long)mfp->ref, + F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write"); + if (cnt < FMAP_ENTRIES) + fmap[cnt] = R_OFFSET(dbmp, mfp); } - if (!data) - return; + for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq); + dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q), ++cnt) { + (void)fprintf(fp, "file #%d: %s: fd: %d: per-process, %s\n", + cnt + 1, __memp_fn(dbmfp), dbmfp->fd, + F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write"); + if (cnt < FMAP_ENTRIES) + fmap[cnt] = R_OFFSET(dbmp, mfp); + } + if (cnt < FMAP_ENTRIES) + fmap[cnt] = INVALID; + else + fmap[FMAP_ENTRIES] = INVALID; /* Display the hash table list of BH's. */ - (void)fprintf(fp, "%s\nHASH table of BH's (%lu buckets):\n", - DB_LINE, (u_long)mp->htab_buckets); - (void)fprintf(fp, - "longest chain searched %lu\n", (u_long)mp->stat.st_hash_longest); - (void)fprintf(fp, "average chain searched %lu (total/calls: %lu/%lu)\n", - (u_long)mp->stat.st_hash_examined / - (mp->stat.st_hash_searches ? mp->stat.st_hash_searches : 1), - (u_long)mp->stat.st_hash_examined, - (u_long)mp->stat.st_hash_searches); - for (htabp = dbmp->htab, - bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) { - if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL) - (void)fprintf(fp, "%lu:\n", (u_long)bucket); - for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) - __memp_pbh(fp, dbmp, bhp, data); + if (LF_ISSET(MPOOL_DUMP_HASH)) { + (void)fprintf(fp, + "%s\nBH hash table (%lu hash slots)\npageno, file, ref, address\n", + DB_LINE, (u_long)mp->htab_buckets); + for (htabp = dbmp->htab, + bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) { + if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL) + (void)fprintf(fp, "%lu:\n", (u_long)bucket); + for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) + __memp_pbh(dbmp, bhp, fmap, fp); + } } /* Display the LRU list of BH's. */ - (void)fprintf(fp, "LRU list of BH's (pgno/offset):"); - for (sep = "\n ", bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh); - bhp != NULL; sep = ", ", bhp = SH_TAILQ_NEXT(bhp, q, __bh)) - (void)fprintf(fp, "%s%lu/%lu", sep, - (u_long)bhp->pgno, (u_long)R_OFFSET(dbmp, bhp)); - (void)fprintf(fp, "\n"); -} + if (LF_ISSET(MPOOL_DUMP_LRU)) { + (void)fprintf(fp, "%s\nBH LRU list\n", DB_LINE); + (void)fprintf(fp, "pageno, file, ref, address\n"); + for (bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh); + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) + __memp_pbh(dbmp, bhp, fmap, fp); + } -/* - * __memp_pmf -- - * Display an MPOOLFILE structure. - */ -static void -__memp_pmf(fp, mfp, data) - FILE *fp; - MPOOLFILE *mfp; - int data; -{ - (void)fprintf(fp, " %lu pages created\n", - (u_long)mfp->stat.st_page_create); - (void)fprintf(fp, " %lu I/O's (%lu read, %lu written)\n", - (u_long)mfp->stat.st_page_in + mfp->stat.st_page_out, - (u_long)mfp->stat.st_page_in, (u_long)mfp->stat.st_page_out); - if (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss != 0) - (void)fprintf(fp, - " %.0f%% cache hit rate (%lu hit, %lu miss)\n", - ((double)mfp->stat.st_cache_hit / - (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss)) * 100, - (u_long)mfp->stat.st_cache_hit, - (u_long)mfp->stat.st_cache_miss); - if (!data) - return; - - (void)fprintf(fp, " %d references; %s; pagesize: %lu\n", mfp->ref, - F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write", - (u_long)mfp->stat.st_pagesize); + if (LF_ISSET(MPOOL_DUMP_MEM)) + __db_shalloc_dump(dbmp->addr, fp); + + UNLOCKREGION(dbmp); + + /* Flush in case we're debugging. */ + (void)fflush(fp); } /* @@ -323,28 +270,37 @@ __memp_pmf(fp, mfp, data) * Display a BH structure. */ static void -__memp_pbh(fp, dbmp, bhp, data) - FILE *fp; +__memp_pbh(dbmp, bhp, fmap, fp) DB_MPOOL *dbmp; BH *bhp; - int data; + size_t *fmap; + FILE *fp; { - const char *sep; - - if (!data) - return; - - (void)fprintf(fp, " BH @ %lu (mf: %lu): page %lu; ref %lu", - (u_long)R_OFFSET(dbmp, bhp), - (u_long)bhp->mf_offset, (u_long)bhp->pgno, (u_long)bhp->ref); - sep = "; "; - if (F_ISSET(bhp, BH_DIRTY)) { - (void)fprintf(fp, "%sdirty", sep); - sep = ", "; - } - if (F_ISSET(bhp, BH_WRITE)) { - (void)fprintf(fp, "%schk_write", sep); - sep = ", "; - } + static const FN fn[] = { + { BH_CALLPGIN, "callpgin" }, + { BH_DIRTY, "dirty" }, + { BH_DISCARD, "discard" }, + { BH_LOCKED, "locked" }, + { BH_TRASH, "trash" }, + { BH_WRITE, "write" }, + { 0 }, + }; + int i; + + for (i = 0; i < FMAP_ENTRIES; ++i) + if (fmap[i] == INVALID || fmap[i] == bhp->mf_offset) + break; + + if (fmap[i] == INVALID) + (void)fprintf(fp, " %4lu, %lu, %2lu, %lu", + (u_long)bhp->pgno, (u_long)bhp->mf_offset, + (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp)); + else + (void)fprintf(fp, " %4lu, #%d, %2lu, %lu", + (u_long)bhp->pgno, i + 1, + (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp)); + + __db_prflags(bhp->flags, fn, fp); + (void)fprintf(fp, "\n"); } diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c index c20e669749..6b92fbdad4 100644 --- a/db2/mp/mp_region.c +++ b/db2/mp/mp_region.c @@ -1,24 +1,20 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_region.c 10.18 (Sleepycat) 11/29/97"; +static const char sccsid[] = "@(#)mp_region.c 10.30 (Sleepycat) 5/31/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <sys/stat.h> #include <errno.h> -#include <fcntl.h> -#include <stdlib.h> #include <string.h> -#include <unistd.h> #endif #include "db_int.h" @@ -86,7 +82,7 @@ alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) { /* * Retry as soon as we've freed up sufficient space. If we - * have to coalesce of memory to satisfy the request, don't + * will have to coalesce memory to satisfy the request, don't * try until it's likely (possible?) that we'll succeed. */ total += fsize = __db_shsizeof(bhp); @@ -179,18 +175,19 @@ retry: /* Find a buffer we can flush; pure LRU. */ * Attach to, and optionally create, the mpool region. * * PUBLIC: int __memp_ropen - * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int)); + * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t)); */ int -__memp_ropen(dbmp, path, cachesize, mode, flags) +__memp_ropen(dbmp, path, cachesize, mode, is_private, flags) DB_MPOOL *dbmp; const char *path; size_t cachesize; - int mode, flags; + int mode, is_private; + u_int32_t flags; { MPOOL *mp; size_t rlen; - int fd, newregion, ret, retry_cnt; + int defcache, ret; /* * Unlike other DB subsystems, mpool can't simply grow the region @@ -204,155 +201,107 @@ __memp_ropen(dbmp, path, cachesize, mode, flags) * * Up the user's cachesize by 25% to account for our overhead. */ + defcache = 0; if (cachesize < DB_CACHESIZE_MIN) - if (cachesize == 0) + if (cachesize == 0) { + defcache = 1; cachesize = DB_CACHESIZE_DEF; - else + } else cachesize = DB_CACHESIZE_MIN; rlen = cachesize + cachesize / 4; - /* Map in the region. */ - retry_cnt = newregion = 0; -retry: if (LF_ISSET(DB_CREATE)) { - /* - * If it's a private mpool, use malloc, it's a lot faster than - * instantiating a region. - * - * XXX - * If we're doing locking and don't have spinlocks for this - * architecture, we'd have to instantiate the file, we need - * the file descriptor for locking. However, it should not - * be possible for DB_THREAD to be set if HAVE_SPINLOCKS aren't - * defined. - * - * XXX - * HP-UX won't permit mutexes to live in anything but shared - * memory. So, instantiate the shared mpool region file on - * that architecture, regardless. If this turns out to be a - * performance problem, we could use anonymous memory instead. - */ -#if !defined(__hppa) - if (F_ISSET(dbmp, MP_ISPRIVATE)) - if ((dbmp->maddr = __db_malloc(rlen)) == NULL) - ret = ENOMEM; - else { - F_SET(dbmp, MP_MALLOC); - ret = __db_rinit(dbmp->dbenv, - dbmp->maddr, 0, rlen, 0); - } - else -#endif - ret = __db_rcreate(dbmp->dbenv, DB_APP_NONE, path, - DB_DEFAULT_MPOOL_FILE, mode, rlen, - F_ISSET(dbmp, MP_ISPRIVATE) ? DB_TEMPORARY : 0, - &fd, &dbmp->maddr); - if (ret == 0) { - /* Put the MPOOL structure first in the region. */ - mp = dbmp->maddr; - - SH_TAILQ_INIT(&mp->bhq); - SH_TAILQ_INIT(&mp->bhfq); - SH_TAILQ_INIT(&mp->mpfq); - - /* Initialize the rest of the region as free space. */ - dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL); - __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL)); - - /* - * - * Pretend that the cache will be broken up into 4K - * pages, and that we want to keep it under, say, 10 - * pages on each chain. This means a 256MB cache will - * allocate ~6500 offset pairs. - */ - mp->htab_buckets = - __db_tablesize((cachesize / (4 * 1024)) / 10); + /* + * Map in the region. + * + * If it's a private mpool, use malloc, it's a lot faster than + * instantiating a region. + */ + dbmp->reginfo.dbenv = dbmp->dbenv; + dbmp->reginfo.appname = DB_APP_NONE; + if (path == NULL) + dbmp->reginfo.path = NULL; + else + if ((dbmp->reginfo.path = __db_strdup(path)) == NULL) + return (ENOMEM); + dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE; + dbmp->reginfo.mode = mode; + dbmp->reginfo.size = rlen; + dbmp->reginfo.dbflags = flags; + dbmp->reginfo.flags = 0; + if (defcache) + F_SET(&dbmp->reginfo, REGION_SIZEDEF); - /* Allocate hash table space and initialize it. */ - if ((ret = __db_shalloc(dbmp->addr, - mp->htab_buckets * sizeof(DB_HASHTAB), - 0, &dbmp->htab)) != 0) - goto err; - __db_hashinit(dbmp->htab, mp->htab_buckets); - mp->htab = R_OFFSET(dbmp, dbmp->htab); + /* + * If we're creating a temporary region, don't use any standard + * naming. + */ + if (is_private) { + dbmp->reginfo.appname = DB_APP_TMP; + dbmp->reginfo.file = NULL; + F_SET(&dbmp->reginfo, REGION_PRIVATE); + } - ZERO_LSN(mp->lsn); - mp->lsn_cnt = 0; + if ((ret = __db_rattach(&dbmp->reginfo)) != 0) { + if (dbmp->reginfo.path != NULL) + FREES(dbmp->reginfo.path); + return (ret); + } - memset(&mp->stat, 0, sizeof(mp->stat)); - mp->stat.st_cachesize = cachesize; + /* + * The MPOOL structure is first in the region, the rest of the region + * is free space. + */ + dbmp->mp = dbmp->reginfo.addr; + dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL); - mp->flags = 0; + /* Initialize a created region. */ + if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) { + mp = dbmp->mp; + SH_TAILQ_INIT(&mp->bhq); + SH_TAILQ_INIT(&mp->bhfq); + SH_TAILQ_INIT(&mp->mpfq); - newregion = 1; - } else if (ret != EEXIST) - return (ret); - } + __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL)); - /* If we didn't or couldn't create the region, try and join it. */ - if (!newregion && - (ret = __db_ropen(dbmp->dbenv, DB_APP_NONE, - path, DB_DEFAULT_MPOOL_FILE, 0, &fd, &dbmp->maddr)) != 0) { /* - * If we failed because the file wasn't available, wait a - * second and try again. + * Assume we want to keep the hash chains with under 10 pages + * on each chain. We don't know the pagesize in advance, and + * it may differ for different files. Use a pagesize of 1K for + * the calculation -- we walk these chains a lot, they should + * be short. */ - if (ret == EAGAIN && ++retry_cnt < 3) { - (void)__db_sleep(1, 0); - goto retry; - } - return (ret); - } + mp->htab_buckets = + __db_tablesize((cachesize / (1 * 1024)) / 10); - /* Set up the common pointers. */ - dbmp->mp = dbmp->maddr; - dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL); + /* Allocate hash table space and initialize it. */ + if ((ret = __db_shalloc(dbmp->addr, + mp->htab_buckets * sizeof(DB_HASHTAB), + 0, &dbmp->htab)) != 0) + goto err; + __db_hashinit(dbmp->htab, mp->htab_buckets); + mp->htab = R_OFFSET(dbmp, dbmp->htab); - /* - * If not already locked, lock the region -- if it's a new region, - * then either __db_rcreate() locked it for us or we malloc'd it - * instead of creating a region, neither of which requires locking - * here. - */ - if (!newregion) - LOCKREGION(dbmp); + ZERO_LSN(mp->lsn); + mp->lsn_cnt = 0; - /* - * Get the hash table address; it's on the shared page, so we have - * to lock first. - */ - dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab); + memset(&mp->stat, 0, sizeof(mp->stat)); + mp->stat.st_cachesize = cachesize; - dbmp->fd = fd; + mp->flags = 0; + } - /* If we locked the region, release it now. */ - if (!F_ISSET(dbmp, MP_MALLOC)) - UNLOCKREGION(dbmp); - return (0); + /* Get the local hash table address. */ + dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab); -err: if (fd != -1) { - dbmp->fd = fd; - (void)__memp_rclose(dbmp); - } + UNLOCKREGION(dbmp); + return (0); - if (newregion) +err: UNLOCKREGION(dbmp); + (void)__db_rdetach(&dbmp->reginfo); + if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) (void)memp_unlink(path, 1, dbmp->dbenv); - return (ret); -} -/* - * __memp_rclose -- - * Close the mpool region. - * - * PUBLIC: int __memp_rclose __P((DB_MPOOL *)); - */ -int -__memp_rclose(dbmp) - DB_MPOOL *dbmp; -{ - if (F_ISSET(dbmp, MP_MALLOC)) { - __db_free(dbmp->maddr); - return (0); - } - return (__db_rclose(dbmp->dbenv, dbmp->fd, dbmp->maddr)); + if (dbmp->reginfo.path != NULL) + FREES(dbmp->reginfo.path); + return (ret); } diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c index 6d16cf3cd4..33218eef1a 100644 --- a/db2/mp/mp_sync.c +++ b/db2/mp/mp_sync.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_sync.c 10.19 (Sleepycat) 12/3/97"; +static const char sccsid[] = "@(#)mp_sync.c 10.25 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -15,7 +15,6 @@ static const char sccsid[] = "@(#)mp_sync.c 10.19 (Sleepycat) 12/3/97"; #include <errno.h> #include <stdlib.h> -#include <string.h> #endif #include "db_int.h" @@ -25,6 +24,7 @@ static const char sccsid[] = "@(#)mp_sync.c 10.19 (Sleepycat) 12/3/97"; #include "common_ext.h" static int __bhcmp __P((const void *, const void *)); +static int __memp_fsync __P((DB_MPOOLFILE *)); /* * memp_sync -- @@ -145,7 +145,8 @@ memp_sync(dbmp, lsnp) bharray[ar_cnt++] = bhp; } } else - F_CLR(bhp, BH_WRITE); + if (F_ISSET(bhp, BH_WRITE)) + F_CLR(bhp, BH_WRITE); /* If there no buffers we can write immediately, we're done. */ if (ar_cnt == 0) { @@ -235,10 +236,8 @@ int memp_fsync(dbmfp) DB_MPOOLFILE *dbmfp; { - BH *bhp, **bharray; DB_MPOOL *dbmp; - size_t mf_offset; - int ar_cnt, cnt, nalloc, next, pincnt, ret, wrote; + int is_tmp; dbmp = dbmfp->dbmp; @@ -250,14 +249,62 @@ memp_fsync(dbmfp) if (F_ISSET(dbmfp, MP_READONLY)) return (0); - ret = 0; LOCKREGION(dbmp); - if (F_ISSET(dbmfp->mfp, MP_TEMP)) - ret = 1; + is_tmp = F_ISSET(dbmfp->mfp, MP_TEMP); UNLOCKREGION(dbmp); - if (ret) + if (is_tmp) return (0); + return (__memp_fsync(dbmfp)); +} + +/* + * __mp_xxx_fd -- + * Return a file descriptor for DB 1.85 compatibility locking. + * + * PUBLIC: int __mp_xxx_fd __P((DB_MPOOLFILE *, int *)); + */ +int +__mp_xxx_fd(dbmfp, fdp) + DB_MPOOLFILE *dbmfp; + int *fdp; +{ + int ret; + + /* + * This is a truly spectacular layering violation, intended ONLY to + * support compatibility for the DB 1.85 DB->fd call. + * + * Sync the database file to disk, creating the file as necessary. + * + * We skip the MP_READONLY and MP_TEMP tests done by memp_fsync(3). + * The MP_READONLY test isn't interesting because we will either + * already have a file descriptor (we opened the database file for + * reading) or we aren't readonly (we created the database which + * requires write privileges). The MP_TEMP test isn't interesting + * because we want to write to the backing file regardless so that + * we get a file descriptor to return. + */ + ret = dbmfp->fd == -1 ? __memp_fsync(dbmfp) : 0; + + return ((*fdp = dbmfp->fd) == -1 ? ENOENT : ret); +} + +/* + * __memp_fsync -- + * Mpool file internal sync function. + */ +static int +__memp_fsync(dbmfp) + DB_MPOOLFILE *dbmfp; +{ + BH *bhp, **bharray; + DB_MPOOL *dbmp; + size_t mf_offset; + int ar_cnt, cnt, nalloc, next, pincnt, ret, wrote; + + ret = 0; + dbmp = dbmfp->dbmp; mf_offset = R_OFFSET(dbmp, dbmfp->mfp); /* @@ -359,7 +406,6 @@ err: UNLOCKREGION(dbmp); if (ret == 0) return (pincnt == 0 ? __db_fsync(dbmfp->fd) : DB_INCOMPLETE); return (ret); - } /* @@ -453,8 +499,8 @@ __bhcmp(p1, p2) { BH *bhp1, *bhp2; - bhp1 = *(BH **)p1; - bhp2 = *(BH **)p2; + bhp1 = *(BH * const *)p1; + bhp2 = *(BH * const *)p2; /* Sort by file (shared memory pool offset). */ if (bhp1->mf_offset < bhp2->mf_offset) diff --git a/db2/mutex/68020.gcc b/db2/mutex/68020.gcc index 9d8be641d8..21410e61d4 100644 --- a/db2/mutex/68020.gcc +++ b/db2/mutex/68020.gcc @@ -1,5 +1,5 @@ /* - * @(#)68020.gcc 10.1 (Sleepycat) 4/12/97 + * @(#)68020.gcc 10.2 (Sleepycat) 2/15/98 * * For gcc/68K, 0 is clear, 1 is set. */ @@ -16,4 +16,3 @@ #define TSL_UNSET(tsl) (*(tsl) = 0) #define TSL_INIT(tsl) TSL_UNSET(tsl) - diff --git a/db2/mutex/mutex.c b/db2/mutex/mutex.c index 6dca323113..de0d0e23fe 100644 --- a/db2/mutex/mutex.c +++ b/db2/mutex/mutex.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mutex.c 10.32 (Sleepycat) 1/16/98"; +static const char sccsid[] = "@(#)mutex.c 10.48 (Sleepycat) 5/23/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -16,14 +16,12 @@ static const char sccsid[] = "@(#)mutex.c 10.32 (Sleepycat) 1/16/98"; #include <errno.h> #include <fcntl.h> -#include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #endif #include "db_int.h" -#include "common_ext.h" #ifdef HAVE_SPINLOCKS @@ -43,13 +41,21 @@ static const char sccsid[] = "@(#)mutex.c 10.32 (Sleepycat) 1/16/98"; * Should we not use MSEM_IF_NOWAIT and let the system block for us? * I've no idea if this will block all threads in the process or not. */ -#define TSL_INIT(x) msem_init(x, MSEM_UNLOCKED) +#define TSL_INIT(x) (msem_init(x, MSEM_UNLOCKED) == NULL) +#define TSL_INIT_ERROR 1 #define TSL_SET(x) (!msem_lock(x, MSEM_IF_NOWAIT)) #define TSL_UNSET(x) msem_unlock(x, 0) #endif +#ifdef HAVE_FUNC_RELIANT +#define TSL_INIT(x) initspin(x, 1) +#define TSL_SET(x) (cspinlock(x) == 0) +#define TSL_UNSET(x) spinunlock(x) +#endif + #ifdef HAVE_FUNC_SGI -#define TSL_INIT(x) init_lock(x) +#define TSL_INIT(x) (init_lock(x) != 0) +#define TSL_INIT_ERROR 1 #define TSL_SET(x) (!acquire_lock(x)) #define TSL_UNSET(x) release_lock(x) #endif @@ -58,7 +64,8 @@ static const char sccsid[] = "@(#)mutex.c 10.32 (Sleepycat) 1/16/98"; /* * Semaphore calls don't work on Solaris 5.5. * - * #define TSL_INIT(x) sema_init(x, 1, USYNC_PROCESS, NULL) + * #define TSL_INIT(x) (sema_init(x, 1, USYNC_PROCESS, NULL) != 0) + * #define TSL_INIT_ERROR 1 * #define TSL_SET(x) (sema_wait(x) == 0) * #define TSL_UNSET(x) sema_post(x) */ @@ -67,6 +74,10 @@ static const char sccsid[] = "@(#)mutex.c 10.32 (Sleepycat) 1/16/98"; #define TSL_UNSET(x) _lock_clear(x) #endif +#ifdef HAVE_ASSEM_PARISC_GCC +#include "parisc.gcc" +#endif + #ifdef HAVE_ASSEM_SCO_CC #include "sco.cc" #endif @@ -85,17 +96,20 @@ static const char sccsid[] = "@(#)mutex.c 10.32 (Sleepycat) 1/16/98"; #include "x86.gcc" #endif -#if defined(_WIN32) -/* DBDB this needs to be byte-aligned!! */ +#ifdef WIN16 +/* Win16 spinlocks are simple because we cannot possibly be preempted. */ #define TSL_INIT(tsl) -#define TSL_SET(tsl) (!InterlockedExchange((PLONG)tsl, 1)) +#define TSL_SET(tsl) (*(tsl) = 1) #define TSL_UNSET(tsl) (*(tsl) = 0) #endif -#ifdef macintosh -/* Mac spinlocks are simple because we cannot possibly be preempted. */ +#if defined(_WIN32) +/* + * XXX + * DBDB this needs to be byte-aligned!! + */ #define TSL_INIT(tsl) -#define TSL_SET(tsl) (*(tsl) = 1) +#define TSL_SET(tsl) (!InterlockedExchange((PLONG)tsl, 1)) #define TSL_UNSET(tsl) (*(tsl) = 0) #endif @@ -105,14 +119,14 @@ static const char sccsid[] = "@(#)mutex.c 10.32 (Sleepycat) 1/16/98"; * __db_mutex_init -- * Initialize a DB mutex structure. * - * PUBLIC: void __db_mutex_init __P((db_mutex_t *, u_int32_t)); + * PUBLIC: int __db_mutex_init __P((db_mutex_t *, u_int32_t)); */ -void +int __db_mutex_init(mp, off) db_mutex_t *mp; u_int32_t off; { -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((ALIGNTYPE)mp & (MUTEX_ALIGNMENT - 1)) { (void)fprintf(stderr, "MUTEX ERROR: mutex NOT %d-byte aligned!\n", @@ -125,11 +139,17 @@ __db_mutex_init(mp, off) #ifdef HAVE_SPINLOCKS COMPQUIET(off, 0); +#ifdef TSL_INIT_ERROR + if (TSL_INIT(&mp->tsl_resource)) + return (errno); +#else TSL_INIT(&mp->tsl_resource); +#endif mp->spins = __os_spin(); #else mp->off = off; #endif + return (0); } #define MS(n) ((n) * 1000) /* Milliseconds to micro-seconds. */ @@ -147,17 +167,25 @@ __db_mutex_lock(mp, fd) int fd; { u_long usecs; - #ifdef HAVE_SPINLOCKS int nspins; +#else + struct flock k_lock; + pid_t mypid; + int locked; +#endif + + if (!DB_GLOBAL(db_mutexlocks)) + return (0); +#ifdef HAVE_SPINLOCKS COMPQUIET(fd, 0); for (usecs = MS(10);;) { /* Try and acquire the uncontested resource lock for N spins. */ for (nspins = mp->spins; nspins > 0; --nspins) if (TSL_SET(&mp->tsl_resource)) { -#ifdef DEBUG +#ifdef DIAGNOSTIC if (mp->pid != 0) { (void)fprintf(stderr, "MUTEX ERROR: __db_mutex_lock: lock currently locked\n"); @@ -182,9 +210,6 @@ __db_mutex_lock(mp, fd) /* NOTREACHED */ #else /* !HAVE_SPINLOCKS */ - struct flock k_lock; - pid_t mypid; - int locked; /* Initialize the lock. */ k_lock.l_whence = SEEK_SET; @@ -246,7 +271,10 @@ __db_mutex_unlock(mp, fd) db_mutex_t *mp; int fd; { -#ifdef DEBUG + if (!DB_GLOBAL(db_mutexlocks)) + return (0); + +#ifdef DIAGNOSTIC if (mp->pid == 0) { (void)fprintf(stderr, "MUTEX ERROR: __db_mutex_unlock: lock already unlocked\n"); @@ -257,7 +285,7 @@ __db_mutex_unlock(mp, fd) #ifdef HAVE_SPINLOCKS COMPQUIET(fd, 0); -#ifdef DEBUG +#ifdef DIAGNOSTIC mp->pid = 0; #endif diff --git a/db2/mutex/parisc.gcc b/db2/mutex/parisc.gcc index e15f6f2dba..2e4540f767 100644 --- a/db2/mutex/parisc.gcc +++ b/db2/mutex/parisc.gcc @@ -1,5 +1,5 @@ -/* - * @(#)parisc.gcc 8.5 (Sleepycat) 1/18/97 +/* + * @(#)parisc.gcc 8.8 (Sleepycat) 6/2/98 * * Copyright (c) 1996-1997, The University of Utah and the Computer Systems * Laboratory at the University of Utah (CSL). All rights reserved. @@ -22,19 +22,15 @@ /* * The PA-RISC has a "load and clear" instead of a "test and set" instruction. - * The 32-bit word used by that instruction must be 16-byte aligned hence we - * allocate 16 bytes for a tsl_t and use the word that is properly aligned. - * We could use the "aligned" attribute in GCC but that doesn't work for stack - * variables. + * The 32-bit word used by that instruction must be 16-byte aligned. We could + * use the "aligned" attribute in GCC but that doesn't work for stack variables. */ #define TSL_SET(tsl) ({ \ - int *__l = (int *)(((int)(tsl)+15)&~15); \ + register tsl_t *__l = (tsl); \ int __r; \ asm volatile("ldcws 0(%1),%0" : "=r" (__r) : "r" (__l)); \ __r & 1; \ }) -#define TSL_UNSET(tsl) ({ \ - int *__l = (int *)(((int)(tsl)+15)&~15); \ - *__l = -1; \ -}) +#define TSL_UNSET(tsl) (*(tsl) = -1) +#define TSL_INIT(tsl) TSL_UNSET(tsl) diff --git a/db2/mutex/parisc.hp b/db2/mutex/parisc.hp index d10807b7f1..bd0e37fc78 100644 --- a/db2/mutex/parisc.hp +++ b/db2/mutex/parisc.hp @@ -1,5 +1,5 @@ -/* - * @(#)parisc.hp 8.5 (Sleepycat) 1/18/97 +/* + * @(#)parisc.hp 8.6 (Sleepycat) 6/2/98 * * Copyright (c) 1996-1997, The University of Utah and the Computer Systems * Laboratory at the University of Utah (CSL). All rights reserved. diff --git a/db2/mutex/sco.cc b/db2/mutex/sco.cc new file mode 100644 index 0000000000..7c165a2072 --- /dev/null +++ b/db2/mutex/sco.cc @@ -0,0 +1,24 @@ +/* + * @(#)x86.uslc + * + * UnixWare has threads in libthread, but OpenServer doesn't (yet). + * + * For cc/x86, 0 is clear, 1 is set. + */ + +#if defined(__USLC__) +asm int +_tsl_set(void *tsl) +{ +%mem tsl + movl tsl, %ecx + movl $1, %eax + lock + xchgb (%ecx),%al + xorl $1,%eax +} +#endif + +#define TSL_SET(tsl) _tsl_set(tsl) +#define TSL_UNSET(tsl) (*(tsl) = 0) +#define TSL_INIT(tsl) TSL_UNSET(tsl) diff --git a/db2/os/os_abs.c b/db2/os/os_abs.c index 872e46d058..d9f4970467 100644 --- a/db2/os/os_abs.c +++ b/db2/os/os_abs.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_abs.c 10.7 (Sleepycat) 10/24/97"; +static const char sccsid[] = "@(#)os_abs.c 10.8 (Sleepycat) 4/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES diff --git a/db2/os/os_alloc.c b/db2/os/os_alloc.c index 27abffbf0d..35784476c0 100644 --- a/db2/os/os_alloc.c +++ b/db2/os/os_alloc.c @@ -1,26 +1,46 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_alloc.c 10.1 (Sleepycat) 12/1/97"; +static const char sccsid[] = "@(#)os_alloc.c 10.6 (Sleepycat) 5/2/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <errno.h> #include <string.h> #endif #include "db_int.h" /* + * __db_strdup -- + * The strdup(3) function for DB. + * + * PUBLIC: char *__db_strdup __P((const char *)); + */ +char * +__db_strdup(str) + const char *str; +{ + size_t len; + char *copy; + + len = strlen(str) + 1; + if ((copy = __db_malloc(len)) == NULL) + return (NULL); + + memcpy(copy, str, len); + return (copy); +} + +/* * XXX * Correct for systems that return NULL when you allocate 0 bytes of memory. * There are several places in DB where we allocate the number of bytes held @@ -28,6 +48,10 @@ static const char sccsid[] = "@(#)os_alloc.c 10.1 (Sleepycat) 12/1/97"; * returns a NULL for that reason (which behavior is permitted by ANSI). We * could make these calls macros on non-Alpha architectures (that's where we * saw the problem), but it's probably not worth the autoconf complexity. + * + * Out of memory. + * We wish to hold the whole sky, + * But we never will. */ /* * __db_calloc -- @@ -42,7 +66,7 @@ __db_calloc(num, size) void *p; size *= num; - if ((p = __db_jump.db_malloc(size == 0 ? 1 : size)) != NULL) + if ((p = __db_jump.j_malloc(size == 0 ? 1 : size)) != NULL) memset(p, 0, size); return (p); } @@ -57,7 +81,15 @@ void * __db_malloc(size) size_t size; { - return (__db_jump.db_malloc(size == 0 ? 1 : size)); +#ifdef DIAGNOSTIC + void *p; + + p = __db_jump.j_malloc(size == 0 ? 1 : size); + memset(p, 0xff, size == 0 ? 1 : size); + return (p); +#else + return (__db_jump.j_malloc(size == 0 ? 1 : size)); +#endif } /* @@ -71,5 +103,5 @@ __db_realloc(ptr, size) void *ptr; size_t size; { - return (__db_jump.db_realloc(ptr, size == 0 ? 1 : size)); + return (__db_jump.j_realloc(ptr, size == 0 ? 1 : size)); } diff --git a/db2/os/os_config.c b/db2/os/os_config.c index 7a89ba58ab..4150c843e4 100644 --- a/db2/os/os_config.c +++ b/db2/os/os_config.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_config.c 10.12 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)os_config.c 10.26 (Sleepycat) 5/23/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -20,22 +20,6 @@ static const char sccsid[] = "@(#)os_config.c 10.12 (Sleepycat) 1/8/98"; #include "db_int.h" /* - * __os_oldwin -- - * Return if Windows 95 (as opposed to Windows NT). - * - * PUBLIC: int __os_oldwin __P((void)); - */ -int -__os_oldwin() -{ -#ifdef _WIN32 - return ((GetVersion() & 0x80000000) != 0); -#else - return (0); -#endif -} - -/* * XXX * We provide our own extern declarations so that we don't collide with * systems that get them wrong, e.g., SunOS. @@ -47,13 +31,20 @@ __os_oldwin() #define imported #endif +/* + * XXX + * HP/UX MPE doesn't have fsync, but you can build one using FCONTROL. + */ +#ifdef __hp3000s900 +#define fsync __mpe_fsync +#endif + imported extern int close __P((int)); imported extern void free __P((void *)); imported extern int fsync __P((int)); imported extern void *malloc __P((size_t)); imported extern int open __P((const char *, int, ...)); imported extern ssize_t read __P((int, void *, size_t)); -imported extern char *strdup __P((const char *)); imported extern void *realloc __P((void *, size_t)); imported extern int unlink __P((const char *)); imported extern ssize_t write __P((int, const void *, size_t)); @@ -63,7 +54,7 @@ imported extern ssize_t write __P((int, const void *, size_t)); * This list of interfaces that applications can replace. In some * cases, the user is permitted to replace the standard ANSI C or * POSIX 1003.1 call, e.g., malloc or read. In others, we provide - * a local interface to the functionality, e.g., __os_map. + * a local interface to the functionality, e.g., __os_ioinfo. */ struct __db_jumptab __db_jump = { close, /* DB_FUNC_CLOSE */ @@ -74,20 +65,26 @@ struct __db_jumptab __db_jump = { fsync, /* DB_FUNC_FSYNC */ __os_ioinfo, /* DB_FUNC_IOINFO */ malloc, /* DB_FUNC_MALLOC */ - __os_map, /* DB_FUNC_MAP */ + NULL, /* DB_FUNC_MAP */ open, /* DB_FUNC_OPEN */ read, /* DB_FUNC_READ */ realloc, /* DB_FUNC_REALLOC */ + NULL, /* DB_FUNC_RUNLINK */ __os_seek, /* DB_FUNC_SEEK */ __os_sleep, /* DB_FUNC_SLEEP */ - strdup, /* DB_FUNC_STRDUP */ unlink, /* DB_FUNC_UNLINK */ - __os_unmap, /* DB_FUNC_UNMAP */ + NULL, /* DB_FUNC_UNMAP */ write, /* DB_FUNC_WRITE */ NULL /* DB_FUNC_YIELD */ }; -int __db_tsl_spins; /* DB_TSL_SPINS */ +DB_GLOBALS __db_global_values = { + 1, /* DB_MUTEXLOCKS */ + 0, /* DB_REGION_ANON, DB_REGION_NAME */ + 0, /* DB_REGION_INIT */ + 0, /* DB_TSL_SPINS */ + 0 /* DB_PAGEYIELD */ +}; /* * db_jump_set -- @@ -99,74 +96,68 @@ db_jump_set(func, which) int which; { switch (which) { - case DB_FUNC_CALLOC: - /* - * XXX - * Obsolete, calloc is no longer called by DB. - */ - break; case DB_FUNC_CLOSE: - __db_jump.db_close = (int (*) __P((int)))func; + __db_jump.j_close = (int (*) __P((int)))func; break; case DB_FUNC_DIRFREE: - __db_jump.db_dirfree = (void (*) __P((char **, int)))func; + __db_jump.j_dirfree = (void (*) __P((char **, int)))func; break; case DB_FUNC_DIRLIST: - __db_jump.db_dirlist = + __db_jump.j_dirlist = (int (*) __P((const char *, char ***, int *)))func; break; case DB_FUNC_EXISTS: - __db_jump.db_exists = (int (*) __P((const char *, int *)))func; + __db_jump.j_exists = (int (*) __P((const char *, int *)))func; break; case DB_FUNC_FREE: - __db_jump.db_free = (void (*) __P((void *)))func; + __db_jump.j_free = (void (*) __P((void *)))func; break; case DB_FUNC_FSYNC: - __db_jump.db_fsync = (int (*) __P((int)))func; + __db_jump.j_fsync = (int (*) __P((int)))func; break; case DB_FUNC_IOINFO: - __db_jump.db_ioinfo = (int (*) __P((const char *, + __db_jump.j_ioinfo = (int (*) __P((const char *, int, u_int32_t *, u_int32_t *, u_int32_t *)))func; break; case DB_FUNC_MALLOC: - __db_jump.db_malloc = (void *(*) __P((size_t)))func; + __db_jump.j_malloc = (void *(*) __P((size_t)))func; break; case DB_FUNC_MAP: - __db_jump.db_map = - (int (*) __P((int, size_t, int, int, void **)))func; + __db_jump.j_map = (int (*) + __P((char *, int, size_t, int, int, int, void **)))func; break; case DB_FUNC_OPEN: - __db_jump.db_open = (int (*) __P((const char *, int, ...)))func; + __db_jump.j_open = (int (*) __P((const char *, int, ...)))func; break; case DB_FUNC_READ: - __db_jump.db_read = + __db_jump.j_read = (ssize_t (*) __P((int, void *, size_t)))func; break; case DB_FUNC_REALLOC: - __db_jump.db_realloc = (void *(*) __P((void *, size_t)))func; + __db_jump.j_realloc = (void *(*) __P((void *, size_t)))func; + break; + case DB_FUNC_RUNLINK: + __db_jump.j_runlink = (int (*) __P((char *)))func; break; case DB_FUNC_SEEK: - __db_jump.db_seek = - (int (*) __P((int, size_t, db_pgno_t, u_long, int)))func; + __db_jump.j_seek = (int (*) + __P((int, size_t, db_pgno_t, u_int32_t, int, int)))func; break; case DB_FUNC_SLEEP: - __db_jump.db_sleep = (int (*) __P((u_long, u_long)))func; - break; - case DB_FUNC_STRDUP: - __db_jump.db_strdup = (char *(*) __P((const char *)))func; + __db_jump.j_sleep = (int (*) __P((u_long, u_long)))func; break; case DB_FUNC_UNLINK: - __db_jump.db_unlink = (int (*) __P((const char *)))func; + __db_jump.j_unlink = (int (*) __P((const char *)))func; break; case DB_FUNC_UNMAP: - __db_jump.db_unmap = (int (*) __P((void *, size_t)))func; + __db_jump.j_unmap = (int (*) __P((void *, size_t)))func; break; case DB_FUNC_WRITE: - __db_jump.db_write = + __db_jump.j_write = (ssize_t (*) __P((int, const void *, size_t)))func; break; case DB_FUNC_YIELD: - __db_jump.db_yield = (int (*) __P((void)))func; + __db_jump.j_yield = (int (*) __P((void)))func; break; default: return (EINVAL); @@ -182,11 +173,32 @@ int db_value_set(value, which) int value, which; { + int ret; + switch (which) { + case DB_MUTEXLOCKS: + DB_GLOBAL(db_mutexlocks) = value; + break; + case DB_PAGEYIELD: + DB_GLOBAL(db_pageyield) = value; + break; + case DB_REGION_ANON: + if (value != 0 && (ret = __db_mapanon_ok(0)) != 0) + return (ret); + DB_GLOBAL(db_region_anon) = value; + break; + case DB_REGION_INIT: + DB_GLOBAL(db_region_init) = value; + break; + case DB_REGION_NAME: + if (value != 0 && (ret = __db_mapanon_ok(1)) != 0) + return (ret); + DB_GLOBAL(db_region_anon) = value; + break; case DB_TSL_SPINS: if (value <= 0) return (EINVAL); - __db_tsl_spins = value; + DB_GLOBAL(db_tsl_spins) = value; break; default: return (EINVAL); diff --git a/db2/os/os_dir.c b/db2/os/os_dir.c index 10fb8b6739..14a10ad23f 100644 --- a/db2/os/os_dir.c +++ b/db2/os/os_dir.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_dir.c 10.13 (Sleepycat) 10/28/97"; +static const char sccsid[] = "@(#)os_dir.c 10.15 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -32,13 +32,9 @@ static const char sccsid[] = "@(#)os_dir.c 10.13 (Sleepycat) 10/28/97"; #endif #include <errno.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> #endif #include "db_int.h" -#include "common_ext.h" /* * __os_dirlist -- diff --git a/db2/os/os_fid.c b/db2/os/os_fid.c index 6820b88786..cf48c01bd8 100644 --- a/db2/os/os_fid.c +++ b/db2/os/os_fid.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_fid.c 10.9 (Sleepycat) 10/24/97"; +static const char sccsid[] = "@(#)os_fid.c 10.11 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -16,7 +16,6 @@ static const char sccsid[] = "@(#)os_fid.c 10.9 (Sleepycat) 10/24/97"; #include <sys/stat.h> #include <errno.h> -#include <stdlib.h> #include <string.h> #include <time.h> #endif diff --git a/db2/os/os_fsync.c b/db2/os/os_fsync.c index 7b001ceeb0..e1f271a75c 100644 --- a/db2/os/os_fsync.c +++ b/db2/os/os_fsync.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_fsync.c 10.3 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)os_fsync.c 10.5 (Sleepycat) 4/19/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -32,3 +32,18 @@ __db_fsync(fd) { return (__os_fsync(fd) ? errno : 0); } + +#ifdef __hp3000s900 +#include <fcntl.h> + +int +__mpe_fsync(fd) + int fd; +{ + extern FCONTROL(short, short, void *); + + FCONTROL(_MPE_FILENO(fd), 2, NULL); /* Flush the buffers */ + FCONTROL(_MPE_FILENO(fd), 6, NULL); /* Write the EOF */ + return (0); +} +#endif diff --git a/db2/os/os_map.c b/db2/os/os_map.c index b1553188dc..5f0fd790e6 100644 --- a/db2/os/os_map.c +++ b/db2/os/os_map.c @@ -1,47 +1,395 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_map.c 10.7 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)os_map.c 10.19 (Sleepycat) 5/3/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> +#ifdef HAVE_MMAP #include <sys/mman.h> +#endif + +#ifdef HAVE_SHMGET +#include <sys/ipc.h> +#include <sys/shm.h> +#endif #include <errno.h> +#include <string.h> #endif #include "db_int.h" +#include "common_ext.h" + +#ifdef HAVE_MMAP +static int __os_map __P((char *, int, size_t, int, int, int, void **)); +#endif +#ifdef HAVE_SHMGET +static int __os_shmget __P((char *, REGINFO *)); +#endif /* - * __os_map -- - * Map in some shared memory backed by a file descriptor. + * __db_mapanon_ok -- + * Return if this OS can support anonymous memory regions. + * + * PUBLIC: int __db_mapanon_ok __P((int)); + */ +int +__db_mapanon_ok(need_names) + int need_names; +{ + int ret; + + ret = EINVAL; + + /* + * If we don't have spinlocks, we have to have a file descriptor + * for fcntl(2) locking, which implies using mmap(2) to map in a + * regular file. Theoretically, we could probably find ways to + * get a file descriptor to lock other types of shared regions, + * but I don't see any reason to do so. + * + * If need_names is set, the application wants to share anonymous + * memory among multiple processes, so we have to have a way to + * name it. This requires shmget(2), on UNIX systems. + */ +#ifdef HAVE_SPINLOCKS +#ifdef HAVE_SHMGET + ret = 0; +#endif +#ifdef HAVE_MMAP +#ifdef MAP_ANON + if (!need_names) + ret = 0; +#endif +#ifdef MAP_ANONYMOUS + if (!need_names) + ret = 0; +#endif +#else + COMPQUIET(need_names, 0); +#endif /* HAVE_MMAP */ +#endif /* HAVE_SPINLOCKS */ + + return (ret); +} + +/* + * __db_mapinit -- + * Return if shared regions need to be initialized. + * + * PUBLIC: int __db_mapinit __P((void)); + */ +int +__db_mapinit() +{ + /* + * Historically, some systems required that all of the bytes of the + * region be written before it could be mmapped and accessed randomly. + * We have the option of setting REGION_INIT_NEEDED at configuration + * time if we're running on one of those systems. + */ +#ifdef REGION_INIT_NEEDED + return (1); +#else + return (0); +#endif +} + +/* + * __db_mapregion -- + * Attach to a shared memory region. + * + * PUBLIC: int __db_mapregion __P((char *, REGINFO *)); + */ +int +__db_mapregion(path, infop) + char *path; + REGINFO *infop; +{ + int called, ret; + + called = 0; + ret = EINVAL; + + /* If the user replaces the map call, call through their interface. */ + if (__db_jump.j_map != NULL) { + F_SET(infop, REGION_HOLDINGSYS); + return (__db_jump.j_map(path, infop->fd, infop->size, + 1, F_ISSET(infop, REGION_ANONYMOUS), 0, &infop->addr)); + } + + if (F_ISSET(infop, REGION_ANONYMOUS)) { + /* + * !!! + * If we're creating anonymous regions: + * + * If it's private, we use mmap(2). The problem with using + * shmget(2) is that we may be creating a region of which the + * application isn't aware, and if the application crashes + * we'll have no way to remove the system resources for the + * region. + * + * If it's not private, we use the shmget(2) interface if it's + * available, because it allows us to name anonymous memory. + * If shmget(2) isn't available, use the mmap(2) calls. + * + * In the case of anonymous memory, using mmap(2) means the + * memory isn't named and only the single process and its + * threads can access the region. + */ +#ifdef HAVE_MMAP +#ifdef MAP_ANON +#define HAVE_MMAP_ANONYMOUS 1 +#else +#ifdef MAP_ANONYMOUS +#define HAVE_MMAP_ANONYMOUS 1 +#endif +#endif +#endif +#ifdef HAVE_MMAP_ANONYMOUS + if (!called && F_ISSET(infop, REGION_PRIVATE)) { + called = 1; + ret = __os_map(path, + infop->fd, infop->size, 1, 1, 0, &infop->addr); + } +#endif +#ifdef HAVE_SHMGET + if (!called) { + called = 1; + ret = __os_shmget(path, infop); + } +#endif +#ifdef HAVE_MMAP + /* + * If we're trying to join an unnamed anonymous region, fail -- + * that's not possible. + */ + if (!called) { + called = 1; + + if (!F_ISSET(infop, REGION_CREATED)) { + __db_err(infop->dbenv, + "cannot join region in unnamed anonymous memory"); + return (EINVAL); + } + + ret = __os_map(path, + infop->fd, infop->size, 1, 1, 0, &infop->addr); + } +#endif + } else { + /* + * !!! + * If we're creating normal regions, we use the mmap(2) + * interface if it's available because it's POSIX 1003.1 + * standard and we trust it more than we do shmget(2). + */ +#ifdef HAVE_MMAP + if (!called) { + called = 1; + + /* Mmap(2) regions that aren't anonymous can grow. */ + F_SET(infop, REGION_CANGROW); + + ret = __os_map(path, + infop->fd, infop->size, 1, 0, 0, &infop->addr); + } +#endif +#ifdef HAVE_SHMGET + if (!called) { + called = 1; + ret = __os_shmget(path, infop); + } +#endif + } + return (ret); +} + +/* + * __db_unmapregion -- + * Detach from the shared memory region. + * + * PUBLIC: int __db_unmapregion __P((REGINFO *)); + */ +int +__db_unmapregion(infop) + REGINFO *infop; +{ + int called, ret; + + called = 0; + ret = EINVAL; + + if (__db_jump.j_unmap != NULL) + return (__db_jump.j_unmap(infop->addr, infop->size)); + +#ifdef HAVE_SHMGET + if (infop->segid != INVALID_SEGID) { + called = 1; + ret = shmdt(infop->addr) ? errno : 0; + } +#endif +#ifdef HAVE_MMAP + if (!called) { + called = 1; + ret = munmap(infop->addr, infop->size) ? errno : 0; + } +#endif + return (ret); +} + +/* + * __db_unlinkregion -- + * Remove the shared memory region. + * + * PUBLIC: int __db_unlinkregion __P((char *, REGINFO *)); + */ +int +__db_unlinkregion(name, infop) + char *name; + REGINFO *infop; +{ + int called, ret; + + called = 0; + ret = EINVAL; + + if (__db_jump.j_runlink != NULL) + return (__db_jump.j_runlink(name)); + +#ifdef HAVE_SHMGET + if (infop->segid != INVALID_SEGID) { + called = 1; + ret = shmctl(infop->segid, IPC_RMID, NULL) ? errno : 0; + } +#else + COMPQUIET(infop, NULL); +#endif +#ifdef HAVE_MMAP + if (!called) { + called = 1; + ret = 0; + } +#endif + return (ret); +} + +/* + * __db_mapfile -- + * Map in a shared memory file. + * + * PUBLIC: int __db_mapfile __P((char *, int, size_t, int, void **)); + */ +int +__db_mapfile(path, fd, len, is_rdonly, addr) + char *path; + int fd, is_rdonly; + size_t len; + void **addr; +{ + if (__db_jump.j_map != NULL) + return (__db_jump.j_map(path, fd, len, 0, 0, is_rdonly, addr)); + +#ifdef HAVE_MMAP + return (__os_map(path, fd, len, 0, 0, is_rdonly, addr)); +#else + return (EINVAL); +#endif +} + +/* + * __db_unmapfile -- + * Unmap the shared memory file. * - * PUBLIC: int __os_map __P((int, size_t, int, int, void **)); + * PUBLIC: int __db_unmapfile __P((void *, size_t)); */ int -__os_map(fd, len, is_private, is_rdonly, addr) - int fd, is_private, is_rdonly; +__db_unmapfile(addr, len) + void *addr; + size_t len; +{ + if (__db_jump.j_unmap != NULL) + return (__db_jump.j_unmap(addr, len)); + +#ifdef HAVE_MMAP + return (munmap(addr, len) ? errno : 0); +#else + return (EINVAL); +#endif +} + +#ifdef HAVE_MMAP +/* + * __os_map -- + * Call the mmap(2) function. + */ +static int +__os_map(path, fd, len, is_region, is_anonymous, is_rdonly, addr) + char *path; + int fd, is_region, is_anonymous, is_rdonly; size_t len; void **addr; { void *p; int flags, prot; - flags = is_private ? MAP_PRIVATE : MAP_SHARED; + COMPQUIET(path, NULL); + + /* + * If it's read-only, it's private, and if it's not, it's shared. + * Don't bother with an additional parameter. + */ + flags = is_rdonly ? MAP_PRIVATE : MAP_SHARED; + + if (is_region && is_anonymous) { + /* + * BSD derived systems use MAP_ANON; Digital Unix and HP/UX + * use MAP_ANONYMOUS. + */ +#ifdef MAP_ANON + flags |= MAP_ANON; +#endif +#ifdef MAP_ANONYMOUS + flags |= MAP_ANONYMOUS; +#endif + fd = -1; + } +#ifdef MAP_FILE + if (!is_region || !is_anonymous) { + /* + * Historically, MAP_FILE was required for mapping regular + * files, even though it was the default. Some systems have + * it, some don't, some that have it set it to 0. + */ + flags |= MAP_FILE; + } +#endif + + /* + * I know of no systems that implement the flag to tell the system + * that the region contains semaphores, but it's not an unreasonable + * thing to do, and has been part of the design since forever. I + * don't think anyone will object, but don't set it for read-only + * files, it doesn't make sense. + */ #ifdef MAP_HASSEMAPHORE - flags |= MAP_HASSEMAPHORE; + if (!is_rdonly) + flags |= MAP_HASSEMAPHORE; #endif + prot = PROT_READ | (is_rdonly ? 0 : PROT_WRITE); -#ifndef MAP_FAILED /* XXX: Mmap(2) failure return. */ + /* MAP_FAILED was not defined in early mmap implementations. */ +#ifndef MAP_FAILED #define MAP_FAILED -1 #endif if ((p = @@ -51,21 +399,67 @@ __os_map(fd, len, is_private, is_rdonly, addr) *addr = p; return (0); } +#endif +#ifdef HAVE_SHMGET /* - * __os_unmap -- - * Release the specified shared memory. - * - * PUBLIC: int __os_unmap __P((void *, size_t)); + * __os_shmget -- + * Call the shmget(2) family of functions. */ -int -__os_unmap(addr, len) - void *addr; - size_t len; +static int +__os_shmget(path, infop) + REGINFO *infop; + char *path; { - /* - * !!! - * The argument len is always the same length as was mapped. - */ - return (munmap(addr, len) ? errno : 0); + key_t key; + int shmflg; + + if (F_ISSET(infop, REGION_CREATED)) { + /* + * The return key from ftok(3) is not guaranteed to be unique. + * The nice thing about the shmget(2) interface is that it + * allows you to name anonymous pieces of memory. The evil + * thing about it is that the name space is separate from the + * filesystem. + */ +#ifdef __hp3000s900 + {char mpe_path[MAXPATHLEN]; + /* + * MPE ftok() is broken as of 5.5pp4. If the file path does + * not start with '/' or '.', then ftok() tries to interpret + * the file path in MPE syntax instead of POSIX HFS syntax. + * The workaround is to prepend "./" to these paths. See HP + * SR 5003416081 for details. + */ + if (*path != '/' && *path != '.') { + if (strlen(path) + strlen("./") + 1 > sizeof(mpe_path)) + return (ENAMETOOLONG); + mpe_path[0] = '.'; + mpe_path[1] = '/'; + (void)strcpy(mpe_path + 2, path); + path = mpe_path; + } + } +#endif + if ((key = ftok(path, 1)) == (key_t)-1) + return (errno); + + shmflg = IPC_CREAT | 0600; + if ((infop->segid = shmget(key, infop->size, shmflg)) == -1) + return (errno); + } + + if ((infop->addr = shmat(infop->segid, NULL, 0)) == (void *)-1) { + /* + * If we're trying to join the region and failing, assume + * that there was a reboot and the region no longer exists. + */ + if (!F_ISSET(infop, REGION_CREATED)) + errno = EAGAIN; + return (errno); + } + + F_SET(infop, REGION_HOLDINGSYS); + return (0); } +#endif diff --git a/db2/os/os_oflags.c b/db2/os/os_oflags.c index 3656eef1c4..388c1c6faa 100644 --- a/db2/os/os_oflags.c +++ b/db2/os/os_oflags.c @@ -1,18 +1,19 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_oflags.c 10.2 (Sleepycat) 10/24/97"; +static const char sccsid[] = "@(#)os_oflags.c 10.6 (Sleepycat) 4/19/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> +#include <sys/stat.h> #include <fcntl.h> #endif @@ -23,13 +24,13 @@ static const char sccsid[] = "@(#)os_oflags.c 10.2 (Sleepycat) 10/24/97"; * __db_oflags -- * Convert open(2) flags to DB flags. * - * PUBLIC: int __db_oflags __P((int)); + * PUBLIC: u_int32_t __db_oflags __P((int)); */ -int +u_int32_t __db_oflags(oflags) int oflags; { - int dbflags; + u_int32_t dbflags; /* * XXX @@ -46,3 +47,48 @@ __db_oflags(oflags) dbflags |= DB_TRUNCATE; return (dbflags); } + +/* + * __db_omode -- + * Convert a permission string to the correct open(2) flags. + * + * PUBLIC: int __db_omode __P((const char *)); + */ +int +__db_omode(perm) + const char *perm; +{ + int mode; + +#ifndef S_IRUSR +#if defined(_WIN32) || defined(WIN16) +#define S_IRUSR S_IREAD /* R for owner */ +#define S_IWUSR S_IWRITE /* W for owner */ +#define S_IRGRP 0 /* R for group */ +#define S_IWGRP 0 /* W for group */ +#define S_IROTH 0 /* R for other */ +#define S_IWOTH 0 /* W for other */ +#else +#define S_IRUSR 0000400 /* R for owner */ +#define S_IWUSR 0000200 /* W for owner */ +#define S_IRGRP 0000040 /* R for group */ +#define S_IWGRP 0000020 /* W for group */ +#define S_IROTH 0000004 /* R for other */ +#define S_IWOTH 0000002 /* W for other */ +#endif /* _WIN32 || WIN16 */ +#endif + mode = 0; + if (perm[0] == 'r') + mode |= S_IRUSR; + if (perm[1] == 'w') + mode |= S_IWUSR; + if (perm[2] == 'r') + mode |= S_IRGRP; + if (perm[3] == 'w') + mode |= S_IWGRP; + if (perm[4] == 'r') + mode |= S_IROTH; + if (perm[5] == 'w') + mode |= S_IWOTH; + return (mode); +} diff --git a/db2/os/os_open.c b/db2/os/os_open.c index a628765556..e960377ebb 100644 --- a/db2/os/os_open.c +++ b/db2/os/os_open.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_open.c 10.20 (Sleepycat) 11/27/97"; +static const char sccsid[] = "@(#)os_open.c 10.26 (Sleepycat) 5/4/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -25,12 +25,13 @@ static const char sccsid[] = "@(#)os_open.c 10.20 (Sleepycat) 11/27/97"; * __db_open -- * Open a file descriptor. * - * PUBLIC: int __db_open __P((const char *, int, int, int, int *)); + * PUBLIC: int __db_open __P((const char *, u_int32_t, u_int32_t, int, int *)); */ int __db_open(name, arg_flags, ok_flags, mode, fdp) const char *name; - int arg_flags, ok_flags, mode, *fdp; + u_int32_t arg_flags, ok_flags; + int mode, *fdp; { int fd, flags; @@ -54,7 +55,7 @@ __db_open(name, arg_flags, ok_flags, mode, fdp) else flags |= O_RDWR; -#ifdef _WIN32 +#if defined(_WIN32) || defined(WIN16) #ifdef _MSC_VER if (arg_flags & DB_SEQUENTIAL) flags |= _O_SEQUENTIAL; @@ -80,7 +81,7 @@ __db_open(name, arg_flags, ok_flags, mode, fdp) (void)__os_unlink(name); #endif -#if !defined(_WIN32) && !defined(macintosh) +#if !defined(_WIN32) && !defined(WIN16) /* * Deny access to any child process; done for Win32 by O_NOINHERIT, * MacOS has neither child processes nor fd inheritance. diff --git a/db2/os/os_rpath.c b/db2/os/os_rpath.c index 44fd4ec9f4..23867b35ac 100644 --- a/db2/os/os_rpath.c +++ b/db2/os/os_rpath.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_rpath.c 10.2 (Sleepycat) 10/24/97"; +static const char sccsid[] = "@(#)os_rpath.c 10.3 (Sleepycat) 4/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES diff --git a/db2/os/os_rw.c b/db2/os/os_rw.c index 48f7fdc5b1..7591041981 100644 --- a/db2/os/os_rw.c +++ b/db2/os/os_rw.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_rw.c 10.6 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)os_rw.c 10.7 (Sleepycat) 4/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES diff --git a/db2/os/os_seek.c b/db2/os/os_seek.c index e27044b626..159425cc27 100644 --- a/db2/os/os_seek.c +++ b/db2/os/os_seek.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_seek.c 10.6 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)os_seek.c 10.9 (Sleepycat) 4/19/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -24,19 +24,21 @@ static const char sccsid[] = "@(#)os_seek.c 10.6 (Sleepycat) 10/25/97"; * __os_seek -- * Seek to a page/byte offset in the file. * - * PUBLIC: int __os_seek __P((int, size_t, db_pgno_t, u_long, int)); + * PUBLIC: int __os_seek __P((int, size_t, db_pgno_t, u_int32_t, int, int)); */ int -__os_seek(fd, pgsize, pageno, relative, whence) +__os_seek(fd, pgsize, pageno, relative, isrewind, whence) int fd; size_t pgsize; db_pgno_t pageno; - u_long relative; - int whence; + u_int32_t relative; + int isrewind, whence; { off_t offset; - offset = pgsize * pageno + relative; + offset = (off_t)pgsize * pageno + relative; + if (isrewind) + offset = -offset; return (lseek(fd, offset, whence) == -1 ? errno : 0); } diff --git a/db2/os/os_sleep.c b/db2/os/os_sleep.c index 2d2cb71f6d..6a5b91f5c4 100644 --- a/db2/os/os_sleep.c +++ b/db2/os/os_sleep.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_sleep.c 10.8 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)os_sleep.c 10.10 (Sleepycat) 4/27/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -42,7 +42,8 @@ __os_sleep(secs, usecs) struct timeval t; /* Don't require that the values be normalized. */ - for (; usecs >= 1000000; ++secs, usecs -= 1000000); + for (; usecs >= 1000000; ++secs, usecs -= 1000000) + ; /* * It's important that we yield the processor here so that other diff --git a/db2/os/os_spin.c b/db2/os/os_spin.c index fb693c2848..2fd21d018b 100644 --- a/db2/os/os_spin.c +++ b/db2/os/os_spin.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_spin.c 10.3 (Sleepycat) 11/25/97"; +static const char sccsid[] = "@(#)os_spin.c 10.7 (Sleepycat) 5/20/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -29,28 +29,33 @@ static const char sccsid[] = "@(#)os_spin.c 10.3 (Sleepycat) 11/25/97"; int __os_spin() { - extern int __db_tsl_spins; + static long sys_val; /* If the application specified the spins, use its value. */ - if (__db_tsl_spins != 0) - return (__db_tsl_spins); + if (DB_GLOBAL(db_tsl_spins) != 0) + return (DB_GLOBAL(db_tsl_spins)); + + /* If we've already figured this out, return the value. */ + if (sys_val != 0) + return (sys_val); /* * XXX - * Sysconf: Solaris uses _SC_NPROCESSORS_ONLN to return the number - * of online processors. I don't know if this call is portable or - * not. + * Solaris and Linux use _SC_NPROCESSORS_ONLN to return the number of + * online processors. We don't want to repeatedly call sysconf because + * it's quite expensive (requiring multiple filesystem accesses) under + * Debian Linux. + * + * Spin 50 times per processor -- we have anecdotal evidence that this + * is a reasonable value. */ #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) - { - long sys_val; - - sys_val = sysconf(_SC_NPROCESSORS_ONLN); - if (sys_val > 0) - return (sys_val * 50); - } + if ((sys_val = sysconf(_SC_NPROCESSORS_ONLN)) > 1) + sys_val *= 50; + else + sys_val = 1; +#else + sys_val = 1; #endif - - /* Default to a single processor. */ - return (1); + return (sys_val); } diff --git a/db2/os/os_stat.c b/db2/os/os_stat.c index 73600b6336..e7d3f24174 100644 --- a/db2/os/os_stat.c +++ b/db2/os/os_stat.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_stat.c 10.11 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)os_stat.c 10.15 (Sleepycat) 4/27/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -16,11 +16,9 @@ static const char sccsid[] = "@(#)os_stat.c 10.11 (Sleepycat) 1/8/98"; #include <sys/stat.h> #include <errno.h> -#include <string.h> #endif #include "db_int.h" -#include "common_ext.h" /* * __os_exists -- @@ -37,8 +35,17 @@ __os_exists(path, isdirp) if (stat(path, &sb) != 0) return (errno); + +#if !defined(S_ISDIR) || defined(STAT_MACROS_BROKEN) +#if defined(_WIN32) || defined(WIN16) +#define S_ISDIR(m) (_S_IFDIR & (m)) +#else +#define S_ISDIR(m) (((m) & 0170000) == 0040000) +#endif +#endif if (isdirp != NULL) *isdirp = S_ISDIR(sb.st_mode); + return (0); } @@ -69,10 +76,16 @@ __os_ioinfo(path, fd, mbytesp, bytesp, iosizep) if (bytesp != NULL) *bytesp = sb.st_size % MEGABYTE; - /* Return the underlying filesystem blocksize, if available. */ + /* + * Return the underlying filesystem blocksize, if available. + * + * XXX + * Check for a 0 size -- HP's MPE architecture has st_blksize, + * but it's always 0. + */ #ifdef HAVE_ST_BLKSIZE - if (iosizep != NULL) - *iosizep = sb.st_blksize; + if (iosizep != NULL && (*iosizep = sb.st_blksize) == 0) + *iosizep = DB_DEF_IOSIZE; #else if (iosizep != NULL) *iosizep = DB_DEF_IOSIZE; diff --git a/db2/os/os_unlink.c b/db2/os/os_unlink.c index 473ce77d39..3a1fa3ff99 100644 --- a/db2/os/os_unlink.c +++ b/db2/os/os_unlink.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1997 + * Copyright (c) 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)os_unlink.c 10.4 (Sleepycat) 10/28/97"; +static const char sccsid[] = "@(#)os_unlink.c 10.5 (Sleepycat) 4/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES diff --git a/db2/progs/db_archive/db_archive.c b/db2/progs/db_archive/db_archive.c index a9c6c28e70..691824c2ab 100644 --- a/db2/progs/db_archive/db_archive.c +++ b/db2/progs/db_archive/db_archive.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ @@ -9,9 +9,9 @@ #ifndef lint static const char copyright[] = -"@(#) Copyright (c) 1997\n\ +"@(#) Copyright (c) 1996, 1997, 1998\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_archive.c 10.15 (Sleepycat) 8/27/97"; +static const char sccsid[] = "@(#)db_archive.c 10.17 (Sleepycat) 4/10/98"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -50,7 +50,8 @@ main(argc, argv) extern char *optarg; extern int optind; DB_ENV *dbenv; - int ch, flags, verbose; + u_int32_t flags; + int ch, verbose; char *home, **list; flags = verbose = 0; diff --git a/db2/progs/db_checkpoint/db_checkpoint.c b/db2/progs/db_checkpoint/db_checkpoint.c index 3157a52666..74f95ccce2 100644 --- a/db2/progs/db_checkpoint/db_checkpoint.c +++ b/db2/progs/db_checkpoint/db_checkpoint.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ @@ -9,9 +9,9 @@ #ifndef lint static const char copyright[] = -"@(#) Copyright (c) 1997\n\ +"@(#) Copyright (c) 1996, 1997, 1998\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_checkpoint.c 10.14 (Sleepycat) 1/17/98"; +static const char sccsid[] = "@(#)db_checkpoint.c 10.17 (Sleepycat) 5/3/98"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -37,7 +37,6 @@ static const char sccsid[] = "@(#)db_checkpoint.c 10.14 (Sleepycat) 1/17/98"; #include "common_ext.h" char *check __P((DB_ENV *, long, long)); -int checkpoint __P((DB_ENV *, char *, int)); DB_ENV *db_init __P((char *)); int logpid __P((char *, int)); int main __P((int, char *[])); @@ -58,26 +57,39 @@ main(argc, argv) extern int optind; DB_ENV *dbenv; time_t now; - long kbytes, minutes, seconds; - int ch, eval, verbose; + long argval; + u_int32_t kbytes, minutes, seconds; + int ch, eval, once, verbose; char *home, *logfile; - home = logfile = NULL; + /* + * XXX + * Don't allow a fully unsigned 32-bit number, some compilers get + * upset and require it to be specified in hexadecimal and so on. + */ +#define MAX_UINT32_T 2147483647 + kbytes = minutes = 0; - verbose = 0; - while ((ch = getopt(argc, argv, "h:k:L:p:v")) != EOF) + once = verbose = 0; + home = logfile = NULL; + while ((ch = getopt(argc, argv, "1h:k:L:p:v")) != EOF) switch (ch) { + case '1': + once = 1; + break; case 'h': home = optarg; break; case 'k': - get_long(optarg, 1, LONG_MAX, &kbytes); + get_long(optarg, 1, (long)MAX_UINT32_T, &argval); + kbytes = argval; break; case 'L': logfile = optarg; break; case 'p': - get_long(optarg, 1, LONG_MAX, &minutes); + get_long(optarg, 1, (long)MAX_UINT32_T, &argval); + minutes = argval; break; case 'v': verbose = 1; @@ -92,8 +104,8 @@ main(argc, argv) if (argc != 0) usage(); - if (kbytes == 0 && minutes == 0) { - warnx("at least one of -k and -p must be specified"); + if (once == 0 && kbytes == 0 && minutes == 0) { + warnx("at least one of -1, -k and -p must be specified"); usage(); } @@ -113,8 +125,6 @@ main(argc, argv) eval = 0; seconds = kbytes != 0 ? 30 : minutes * 60; while (!interrupted) { - (void)__db_sleep(seconds, 0); - if (verbose) { (void)time(&now); printf("checkpoint: %s", ctime(&now)); @@ -134,6 +144,11 @@ main(argc, argv) __db_err(dbenv, "checkpoint: %s", strerror(errno)); break; } + + if (once) + break; + + (void)__db_sleep(seconds, 0); } if (logfile != NULL && logpid(logfile, 0)) @@ -244,6 +259,6 @@ void usage() { (void)fprintf(stderr, - "usage: db_checkpoint [-v] [-h home] [-k kbytes] [-L file] [-p min]\n"); + "usage: db_checkpoint [-1v] [-h home] [-k kbytes] [-L file] [-p min]\n"); exit(1); } diff --git a/db2/progs/db_deadlock/db_deadlock.c b/db2/progs/db_deadlock/db_deadlock.c index 97fa8ca4f6..49a52416dd 100644 --- a/db2/progs/db_deadlock/db_deadlock.c +++ b/db2/progs/db_deadlock/db_deadlock.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ @@ -9,9 +9,9 @@ #ifndef lint static const char copyright[] = -"@(#) Copyright (c) 1997\n\ +"@(#) Copyright (c) 1996, 1997, 1998\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_deadlock.c 10.17 (Sleepycat) 1/15/98"; +static const char sccsid[] = "@(#)db_deadlock.c 10.19 (Sleepycat) 4/10/98"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -54,13 +54,15 @@ main(argc, argv) u_int32_t atype; time_t now; long usecs; - int ch, flags, verbose; + u_int32_t flags; + int ch, verbose; char *home, *logfile; atype = DB_LOCK_DEFAULT; home = logfile = NULL; usecs = 0; - flags = verbose = 0; + flags = 0; + verbose = 0; while ((ch = getopt(argc, argv, "a:h:L:t:vw")) != EOF) switch (ch) { case 'a': diff --git a/db2/progs/db_dump/db_dump.c b/db2/progs/db_dump/db_dump.c index c09719059b..f532bc2779 100644 --- a/db2/progs/db_dump/db_dump.c +++ b/db2/progs/db_dump/db_dump.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ @@ -9,9 +9,9 @@ #ifndef lint static const char copyright[] = -"@(#) Copyright (c) 1997\n\ +"@(#) Copyright (c) 1996, 1997, 1998\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_dump.c 10.16 (Sleepycat) 8/27/97"; +static const char sccsid[] = "@(#)db_dump.c 10.19 (Sleepycat) 5/23/98"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -35,8 +35,6 @@ static const char sccsid[] = "@(#)db_dump.c 10.16 (Sleepycat) 8/27/97"; void configure __P((char *)); DB_ENV *db_init __P((char *)); -void dbt_dump __P((DBT *)); -void dbt_print __P((DBT *)); int main __P((int, char *[])); void pheader __P((DB *, int)); void usage __P((void)); @@ -55,11 +53,11 @@ main(argc, argv) DBC *dbcp; DBT key, data; DB_ENV *dbenv; - int ch, dflag, pflag; + int ch, checkprint, dflag; char *home; home = NULL; - dflag = pflag = 0; + checkprint = dflag = 0; while ((ch = getopt(argc, argv, "df:h:p")) != EOF) switch (ch) { case 'd': @@ -73,7 +71,7 @@ main(argc, argv) home = optarg; break; case 'p': - pflag = 1; + checkprint = 1; break; case '?': default: @@ -89,7 +87,7 @@ main(argc, argv) if (home != NULL) errx(1, "the -d and -h options may not both be specified"); - if (pflag) + if (checkprint) errx(1, "the -d and -p options may not both be specified"); } @@ -116,23 +114,19 @@ main(argc, argv) } /* Print out the header. */ - pheader(dbp, pflag); + pheader(dbp, checkprint); /* Print out the key/data pairs. */ memset(&key, 0, sizeof(key)); memset(&data, 0, sizeof(data)); - if (pflag) - while ((errno = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) { - if (dbp->type != DB_RECNO) - dbt_print(&key); - dbt_print(&data); - } - else - while ((errno = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) { - if (dbp->type != DB_RECNO) - dbt_dump(&key); - dbt_dump(&data); - } + while ((errno = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) { + if (dbp->type != DB_RECNO && + (errno = __db_prdbt(&key, checkprint, stdout)) != 0) + break; + if ((errno = __db_prdbt(&data, checkprint, stdout)) != 0) + break; + } + if (errno != DB_NOTFOUND) err(1, "cursor get"); @@ -229,47 +223,6 @@ pheader(dbp, pflag) printf("HEADER=END\n"); } -static char hex[] = "0123456789abcdef"; - -/* - * dbt_dump -- - * Write out a key or data item using byte values. - */ -void -dbt_dump(dbtp) - DBT *dbtp; -{ - u_int32_t len; - u_int8_t *p; - - for (len = dbtp->size, p = dbtp->data; len--; ++p) - (void)printf("%c%c", - hex[(u_int8_t)(*p & 0xf0) >> 4], hex[*p & 0x0f]); - printf("\n"); -} - -/* - * dbt_print -- - * Write out a key or data item using printable characters. - */ -void -dbt_print(dbtp) - DBT *dbtp; -{ - u_int32_t len; - u_int8_t *p; - - for (len = dbtp->size, p = dbtp->data; len--; ++p) - if (isprint(*p)) { - if (*p == '\\') - (void)printf("\\"); - (void)printf("%c", *p); - } else - (void)printf("\\%c%c", - hex[(u_int8_t)(*p & 0xf0) >> 4], hex[*p & 0x0f]); - printf("\n"); -} - /* * usage -- * Display the usage message. diff --git a/db2/progs/db_dump185/db_dump185.c b/db2/progs/db_dump185/db_dump185.c index 5ec7673f1b..17451100f9 100644 --- a/db2/progs/db_dump185/db_dump185.c +++ b/db2/progs/db_dump185/db_dump185.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ @@ -9,9 +9,9 @@ #ifndef lint static const char copyright[] = -"@(#) Copyright (c) 1997\n\ +"@(#) Copyright (c) 1996, 1997, 1998\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_dump185.c 10.8 (Sleepycat) 9/21/97"; +static const char sccsid[] = "@(#)db_dump185.c 10.10 (Sleepycat) 4/10/98"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -30,7 +30,7 @@ static const char sccsid[] = "@(#)db_dump185.c 10.8 (Sleepycat) 9/21/97"; #include "clib_ext.h" /* Hash Table Information */ -typedef struct hashhdr { /* Disk resident portion */ +typedef struct hashhdr185 { /* Disk resident portion */ int magic; /* Magic NO for hash tables */ int version; /* Version ID */ u_int32_t lorder; /* Byte Order */ @@ -48,11 +48,34 @@ typedef struct hashhdr { /* Disk resident portion */ * table */ int ffactor; /* Fill factor */ int nkeys; /* Number of keys in hash table */ -} HASHHDR; +} HASHHDR185; +typedef struct htab185 { /* Memory resident data structure */ + HASHHDR185 hdr; /* Header */ +} HTAB185; -typedef struct htab { /* Memory resident data structure */ - HASHHDR hdr; /* Header */ -} HTAB; +/* Hash Table Information */ +typedef struct hashhdr186 { /* Disk resident portion */ + int32_t magic; /* Magic NO for hash tables */ + int32_t version; /* Version ID */ + int32_t lorder; /* Byte Order */ + int32_t bsize; /* Bucket/Page Size */ + int32_t bshift; /* Bucket shift */ + int32_t ovfl_point; /* Where overflow pages are being allocated */ + int32_t last_freed; /* Last overflow page freed */ + int32_t max_bucket; /* ID of Maximum bucket in use */ + int32_t high_mask; /* Mask to modulo into entire table */ + int32_t low_mask; /* Mask to modulo into lower half of table */ + int32_t ffactor; /* Fill factor */ + int32_t nkeys; /* Number of keys in hash table */ + int32_t hdrpages; /* Size of table header */ + int32_t h_charkey; /* value of hash(CHARKEY) */ +#define NCACHED 32 /* number of bit maps and spare points */ + int32_t spares[NCACHED];/* spare pages for overflow */ + u_int16_t bitmaps[NCACHED]; /* address of overflow page bitmaps */ +} HASHHDR186; +typedef struct htab186 { /* Memory resident data structure */ + HASHHDR186 hdr; /* Header */ +} HTAB186; typedef struct _epgno { u_int32_t pgno; /* the page number */ @@ -149,8 +172,8 @@ typedef struct _btree { u_int32_t flags; } BTREE; -void db_185_btree __P((DB *, int)); -void db_185_hash __P((DB *, int)); +void db_btree __P((DB *, int)); +void db_hash __P((DB *, int)); void dbt_dump __P((DBT *)); void dbt_print __P((DBT *)); int main __P((int, char *[])); @@ -193,9 +216,9 @@ main(argc, argv) if ((dbp = dbopen(argv[0], O_RDONLY, 0, DB_BTREE, NULL)) == NULL) { if ((dbp = dbopen(argv[0], O_RDONLY, 0, DB_HASH, NULL)) == NULL) err(1, "%s", argv[0]); - db_185_hash(dbp, pflag); + db_hash(dbp, pflag); } else - db_185_btree(dbp, pflag); + db_btree(dbp, pflag); /* * !!! @@ -219,36 +242,43 @@ main(argc, argv) } /* - * db_185_hash -- + * db_hash -- * Dump out hash header information. */ void -db_185_hash(dbp, pflag) +db_hash(dbp, pflag) DB *dbp; int pflag; { - HTAB *hashp; - - hashp = dbp->internal; + HTAB185 *hash185p; + HTAB186 *hash186p; printf("format=%s\n", pflag ? "print" : "bytevalue"); printf("type=hash\n"); - printf("h_ffactor=%lu\n", (u_long)hashp->hdr.ffactor); -#ifdef NOT_AVAILABLE_IN_DB_185 - printf("h_nelem=%lu\n", (u_long)hashp->hdr.nelem); -#endif - if (hashp->hdr.lorder != 0) - printf("db_lorder=%lu\n", (u_long)hashp->hdr.lorder); - printf("db_pagesize=%lu\n", (u_long)hashp->hdr.bsize); + + /* DB 1.85 was version 2, DB 1.86 was version 3. */ + hash185p = dbp->internal; + if (hash185p->hdr.version > 2) { + hash186p = dbp->internal; + printf("h_ffactor=%lu\n", (u_long)hash186p->hdr.ffactor); + if (hash186p->hdr.lorder != 0) + printf("db_lorder=%lu\n", (u_long)hash186p->hdr.lorder); + printf("db_pagesize=%lu\n", (u_long)hash186p->hdr.bsize); + } else { + printf("h_ffactor=%lu\n", (u_long)hash185p->hdr.ffactor); + if (hash185p->hdr.lorder != 0) + printf("db_lorder=%lu\n", (u_long)hash185p->hdr.lorder); + printf("db_pagesize=%lu\n", (u_long)hash185p->hdr.bsize); + } printf("HEADER=END\n"); } /* - * db_185_btree -- + * db_btree -- * Dump out btree header information. */ void -db_185_btree(dbp, pflag) +db_btree(dbp, pflag) DB *dbp; int pflag; { diff --git a/db2/progs/db_load/db_load.c b/db2/progs/db_load/db_load.c index afa5730c25..5ac17753f5 100644 --- a/db2/progs/db_load/db_load.c +++ b/db2/progs/db_load/db_load.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ @@ -9,14 +9,13 @@ #ifndef lint static const char copyright[] = -"@(#) Copyright (c) 1997\n\ +"@(#) Copyright (c) 1996, 1997, 1998\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_load.c 10.15 (Sleepycat) 12/29/97"; +static const char sccsid[] = "@(#)db_load.c 10.20 (Sleepycat) 6/2/98"; #endif #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <sys/stat.h> #include <errno.h> #include <limits.h> @@ -27,6 +26,8 @@ static const char sccsid[] = "@(#)db_load.c 10.15 (Sleepycat) 12/29/97"; #endif #include "db_int.h" +#include "db_page.h" +#include "db_am.h" #include "clib_ext.h" void badnum __P((void)); @@ -55,7 +56,8 @@ main(argc, argv) DB_ENV *dbenv; DB_INFO dbinfo; db_recno_t recno; - int ch, no_header, pflag; + u_int32_t db_nooverwrite; + int ch, checkprint, existed, no_header; char **clist, **clp, *home; /* Allocate enough room for configuration arguments. */ @@ -63,9 +65,10 @@ main(argc, argv) err(1, NULL); home = NULL; - no_header = 0; + db_nooverwrite = 0; + existed = checkprint = no_header = 0; argtype = dbtype = DB_UNKNOWN; - while ((ch = getopt(argc, argv, "c:f:h:Tt:")) != EOF) + while ((ch = getopt(argc, argv, "c:f:h:nTt:")) != EOF) switch (ch) { case 'c': *clp++ = optarg; @@ -77,8 +80,11 @@ main(argc, argv) case 'h': home = optarg; break; + case 'n': + db_nooverwrite = DB_NOOVERWRITE; + break; case 'T': - no_header = pflag = 1; + no_header = checkprint = 1; break; case 't': if (strcmp(optarg, "btree") == 0) { @@ -105,18 +111,18 @@ main(argc, argv) if (argc != 1) usage(); - /* Initialize the environment. */ - dbenv = db_init(home); - memset(&dbinfo, 0, sizeof(DB_INFO)); + /* Initialize the environment if the user specified one. */ + dbenv = home == NULL ? NULL : db_init(home); /* * Read the header. If there isn't any header, we're expecting flat - * text, set the pflag appropriately. + * text, set the checkprint flag appropriately. */ + memset(&dbinfo, 0, sizeof(DB_INFO)); if (no_header) dbtype = argtype; else { - rheader(&dbtype, &pflag, &dbinfo); + rheader(&dbtype, &checkprint, &dbinfo); if (argtype != DB_UNKNOWN) { /* Conversion to/from recno is prohibited. */ if ((dbtype == DB_RECNO && argtype != DB_RECNO) || @@ -133,17 +139,20 @@ main(argc, argv) configure(&dbinfo, clist); /* Open the DB file. */ - if ((errno = db_open(argv[0], dbtype, DB_CREATE | DB_TRUNCATE, - S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH, - dbenv, &dbinfo, &dbp)) != 0) + if ((errno = db_open(argv[0], dbtype, DB_CREATE, + __db_omode("rwrwrw"), dbenv, &dbinfo, &dbp)) != 0) err(1, "%s", argv[0]); /* Initialize the key/data pair. */ memset(&key, 0, sizeof(DBT)); - if ((key.data = (void *)malloc(key.ulen = 1024)) == NULL) { - errno = ENOMEM; - err(1, NULL); - } + if (dbtype == DB_RECNO) { + key.data = &recno; + key.size = sizeof(recno); + } else + if ((key.data = (void *)malloc(key.ulen = 1024)) == NULL) { + errno = ENOMEM; + err(1, NULL); + } memset(&data, 0, sizeof(DBT)); if ((data.data = (void *)malloc(data.ulen = 1024)) == NULL) { errno = ENOMEM; @@ -151,22 +160,17 @@ main(argc, argv) } /* Get each key/data pair and add them to the database. */ - if (dbtype == DB_RECNO) { - key.data = &recno; - key.size = sizeof(recno); - for (recno = 1;; ++recno) { - if (pflag) { + for (recno = 1;; ++recno) { + if (dbtype == DB_RECNO) + if (checkprint) { if (dbt_rprint(&data)) break; - } else + } else { if (dbt_rdump(&data)) break; - if ((errno = dbp->put(dbp, NULL, &key, &data, 0)) != 0) - err(1, "%s", argv[0]); - } - } else - for (;;) { - if (pflag) { + } + else + if (checkprint) { if (dbt_rprint(&key)) break; if (dbt_rprint(&data)) @@ -177,13 +181,26 @@ main(argc, argv) if (dbt_rdump(&data)) fmt: err(1, "odd number of key/data pairs"); } - if ((errno = dbp->put(dbp, NULL, &key, &data, 0)) != 0) - err(1, "%s", argv[0]); + switch (errno = + dbp->put(dbp, NULL, &key, &data, db_nooverwrite)) { + case 0: + break; + case DB_KEYEXIST: + existed = 1; + warnx("%s: line %d: key already exists, not loaded:", + argv[0], + dbtype == DB_RECNO ? recno : recno * 2 - 1); + (void)__db_prdbt(&key, checkprint, stderr); + break; + default: + err(1, "%s", argv[0]); + /* NOTREACHED */ } + } if ((errno = dbp->close(dbp, 0)) != 0) err(1, "%s", argv[0]); - return (0); + return (existed ? 1 : 0); } /* @@ -200,13 +217,26 @@ db_init(home) errno = ENOMEM; err(1, NULL); } - dbenv->db_errfile = stderr; - dbenv->db_errpfx = progname; - if ((errno = - db_appinit(home, NULL, dbenv, DB_CREATE | DB_USE_ENVIRON)) != 0) - err(1, "db_appinit"); - return (dbenv); + /* + * The database may be live, try and use the shared regions. + * + * If it works, we're done. Set the error output options so that + * future errors are correctly reported. + */ + if ((errno = db_appinit(home, NULL, dbenv, DB_INIT_LOCK | + DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_USE_ENVIRON)) == 0) { + dbenv->db_errfile = stderr; + dbenv->db_errpfx = progname; + return (dbenv); + } + + /* + * If the db_appinit fails, assume the database isn't live, and don't + * bother with an environment. + */ + free(dbenv); + return (NULL); } #define FLAG(name, value, keyword, flag) \ @@ -279,16 +309,16 @@ configure(dbinfop, clp) * Read the header message. */ void -rheader(dbtypep, pflagp, dbinfop) +rheader(dbtypep, checkprintp, dbinfop) DBTYPE *dbtypep; - int *pflagp; + int *checkprintp; DB_INFO *dbinfop; { long lineno, val; char name[256], value[256]; *dbtypep = DB_UNKNOWN; - *pflagp = 0; + *checkprintp = 0; for (lineno = 1;; ++lineno) { /* If we don't see the expected information, it's an error. */ @@ -301,11 +331,11 @@ rheader(dbtypep, pflagp, dbinfop) if (strcmp(name, "format") == 0) { if (strcmp(value, "bytevalue") == 0) { - *pflagp = 0; + *checkprintp = 0; continue; } if (strcmp(value, "print") == 0) { - *pflagp = 1; + *checkprintp = 1; continue; } errx(1, "line %d: unknown format", lineno); @@ -390,39 +420,6 @@ dbt_rprint(dbtp) } /* - * digitize -- - * Convert a character to an integer. - */ -int -digitize(c) - int c; -{ - switch (c) { /* Don't depend on ASCII ordering. */ - case '0': return (0); - case '1': return (1); - case '2': return (2); - case '3': return (3); - case '4': return (4); - case '5': return (5); - case '6': return (6); - case '7': return (7); - case '8': return (8); - case '9': return (9); - case 'a': return (10); - case 'b': return (11); - case 'c': return (12); - case 'd': return (13); - case 'e': return (14); - case 'f': return (15); - } - - err(1, "unexpected hexadecimal value"); - /* NOTREACHED */ - - return (0); -} - -/* * dbt_rdump -- * Read a byte dump line into a DBT structure. */ @@ -459,6 +456,39 @@ dbt_rdump(dbtp) } /* + * digitize -- + * Convert a character to an integer. + */ +int +digitize(c) + int c; +{ + switch (c) { /* Don't depend on ASCII ordering. */ + case '0': return (0); + case '1': return (1); + case '2': return (2); + case '3': return (3); + case '4': return (4); + case '5': return (5); + case '6': return (6); + case '7': return (7); + case '8': return (8); + case '9': return (9); + case 'a': return (10); + case 'b': return (11); + case 'c': return (12); + case 'd': return (13); + case 'e': return (14); + case 'f': return (15); + } + + err(1, "unexpected hexadecimal value"); + /* NOTREACHED */ + + return (0); +} + +/* * badnum -- * Display the bad number message. */ @@ -475,7 +505,8 @@ badnum() void usage() { - (void)fprintf(stderr, -"usage: db_load [-T]\n\t[-c name=value] [-f file] [-h home] [-t btree | hash] db_file\n"); + (void)fprintf(stderr, "%s\n\t%s\n", + "usage: db_load [-nT]", + "[-c name=value] [-f file] [-h home] [-t btree | hash | recno] db_file"); exit(1); } diff --git a/db2/progs/db_printlog/db_printlog.c b/db2/progs/db_printlog/db_printlog.c index 24554bcd14..3b48ad9643 100644 --- a/db2/progs/db_printlog/db_printlog.c +++ b/db2/progs/db_printlog/db_printlog.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ @@ -9,9 +9,9 @@ #ifndef lint static const char copyright[] = -"@(#) Copyright (c) 1997\n\ +"@(#) Copyright (c) 1996, 1997, 1998\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_printlog.c 10.11 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)db_printlog.c 10.12 (Sleepycat) 4/10/98"; #endif #ifndef NO_SYSTEM_INCLUDES diff --git a/db2/progs/db_recover/db_recover.c b/db2/progs/db_recover/db_recover.c index f902fed8c0..a2845725b8 100644 --- a/db2/progs/db_recover/db_recover.c +++ b/db2/progs/db_recover/db_recover.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ @@ -9,9 +9,9 @@ #ifndef lint static const char copyright[] = -"@(#) Copyright (c) 1997\n\ +"@(#) Copyright (c) 1996, 1997, 1998\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_recover.c 10.17 (Sleepycat) 1/15/98"; +static const char sccsid[] = "@(#)db_recover.c 10.19 (Sleepycat) 4/10/98"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -29,7 +29,7 @@ static const char sccsid[] = "@(#)db_recover.c 10.17 (Sleepycat) 1/15/98"; #include "common_ext.h" #include "clib_ext.h" -DB_ENV *db_init __P((char *, int, int)); +DB_ENV *db_init __P((char *, u_int32_t, int)); int main __P((int, char *[])); void usage __P((void)); @@ -45,7 +45,8 @@ main(argc, argv) extern int optind; DB_ENV *dbenv; time_t now; - int ch, flags, verbose; + u_int32_t flags; + int ch, verbose; char *home; home = NULL; @@ -88,10 +89,11 @@ main(argc, argv) DB_ENV * db_init(home, flags, verbose) char *home; - int flags, verbose; + u_int32_t flags; + int verbose; { DB_ENV *dbenv; - int local_flags; + u_int32_t local_flags; if ((dbenv = (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) { errno = ENOMEM; diff --git a/db2/progs/db_stat/db_stat.c b/db2/progs/db_stat/db_stat.c index 5295f011a6..f2551805b0 100644 --- a/db2/progs/db_stat/db_stat.c +++ b/db2/progs/db_stat/db_stat.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ @@ -9,9 +9,9 @@ #ifndef lint static const char copyright[] = -"@(#) Copyright (c) 1997\n\ +"@(#) Copyright (c) 1996, 1997, 1998\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_stat.c 8.30 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)db_stat.c 8.38 (Sleepycat) 5/30/98"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -27,25 +27,35 @@ static const char sccsid[] = "@(#)db_stat.c 8.30 (Sleepycat) 1/8/98"; #endif #include "db_int.h" +#include "shqueue.h" +#include "db_shash.h" +#include "lock.h" +#include "mp.h" #include "clib_ext.h" #undef stat -typedef enum { T_NOTSET, T_DB, T_LOG, T_MPOOL, T_TXN } test_t; +typedef enum { T_NOTSET, T_DB, T_LOCK, T_LOG, T_MPOOL, T_TXN } test_t; +int argcheck __P((char *, const char *)); void btree_stats __P((DB *)); DB_ENV *db_init __P((char *, test_t)); +void dl __P((const char *, u_long)); void hash_stats __P((DB *)); -int main __P((int, char *[])); +int lock_ok __P((char *)); +void lock_stats __P((DB_ENV *)); void log_stats __P((DB_ENV *)); +int main __P((int, char *[])); +int mpool_ok __P((char *)); void mpool_stats __P((DB_ENV *)); void onint __P((int)); void prflags __P((u_int32_t, const FN *)); -void txn_stats __P((DB_ENV *)); int txn_compare __P((const void *, const void *)); +void txn_stats __P((DB_ENV *)); void usage __P((void)); int interrupted; +char *internal; const char *progname = "db_stat"; /* Program name. */ @@ -64,8 +74,16 @@ main(argc, argv) ttype = T_NOTSET; db = home = NULL; - while ((ch = getopt(argc, argv, "d:h:lmt")) != EOF) + while ((ch = getopt(argc, argv, "C:cd:h:lM:mNt")) != EOF) switch (ch) { + case 'C': + ttype = T_LOCK; + if (!argcheck(internal = optarg, "Acflmo")) + usage(); + break; + case 'c': + ttype = T_LOCK; + break; case 'd': db = optarg; ttype = T_DB; @@ -76,9 +94,17 @@ main(argc, argv) case 'l': ttype = T_LOG; break; + case 'M': + ttype = T_MPOOL; + if (!argcheck(internal = optarg, "Ahlm")) + usage(); + break; case 'm': ttype = T_MPOOL; break; + case 'N': + (void)db_value_set(0, DB_MUTEXLOCKS); + break; case 't': ttype = T_TXN; break; @@ -115,6 +141,9 @@ main(argc, argv) } (void)dbp->close(dbp, 0); break; + case T_LOCK: + lock_stats(dbenv); + break; case T_LOG: log_stats(dbenv); break; @@ -168,13 +197,12 @@ btree_stats(dbp) prflags(sp->bt_flags, fn); if (dbp->type == DB_BTREE) { #ifdef NOT_IMPLEMENTED - printf("%lu\tMaximum keys per-page.\n", (u_long)sp->bt_maxkey); + dl("Maximum keys per-page.\n", (u_long)sp->bt_maxkey); #endif - printf("%lu\tMinimum keys per-page.\n", (u_long)sp->bt_minkey); + dl("Minimum keys per-page.\n", (u_long)sp->bt_minkey); } if (dbp->type == DB_RECNO) { - printf("%lu\tFixed-length record size.\n", - (u_long)sp->bt_re_len); + dl("Fixed-length record size.\n", (u_long)sp->bt_re_len); if (isprint(sp->bt_re_pad)) printf("%c\tFixed-length record pad.\n", (int)sp->bt_re_pad); @@ -182,43 +210,38 @@ btree_stats(dbp) printf("0x%x\tFixed-length record pad.\n", (int)sp->bt_re_pad); } - printf("%lu\tUnderlying tree page size.\n", (u_long)sp->bt_pagesize); - printf("%lu\tNumber of levels in the tree.\n", (u_long)sp->bt_levels); - printf("%lu\tNumber of keys in the tree.\n", (u_long)sp->bt_nrecs); - printf("%lu\tNumber of tree internal pages.\n", (u_long)sp->bt_int_pg); - printf("%lu\tNumber of tree leaf pages.\n", (u_long)sp->bt_leaf_pg); - printf("%lu\tNumber of tree duplicate pages.\n", - (u_long)sp->bt_dup_pg); - printf("%lu\tNumber of tree overflow pages.\n", - (u_long)sp->bt_over_pg); - printf("%lu\tNumber of pages on the free list.\n", - (u_long)sp->bt_free); - printf("%lu\tNumber of pages freed for reuse.\n", - (u_long)sp->bt_freed); - printf("%lu\tNumber of bytes free in tree internal pages (%.0f%% ff)\n", - (u_long)sp->bt_int_pgfree, - PCT(sp->bt_int_pgfree, sp->bt_int_pg)); - printf("%lu\tNumber of bytes free in tree leaf pages (%.0f%% ff).\n", - (u_long)sp->bt_leaf_pgfree, - PCT(sp->bt_leaf_pgfree, sp->bt_leaf_pg)); -printf("%lu\tNumber of bytes free in tree duplicate pages (%.0f%% ff).\n", - (u_long)sp->bt_dup_pgfree, - PCT(sp->bt_dup_pgfree, sp->bt_dup_pg)); -printf("%lu\tNumber of bytes free in tree overflow pages (%.0f%% ff).\n", - (u_long)sp->bt_over_pgfree, - PCT(sp->bt_over_pgfree, sp->bt_over_pg)); - printf("%lu\tNumber of bytes saved by prefix compression.\n", + dl("Underlying tree page size.\n", (u_long)sp->bt_pagesize); + dl("Number of levels in the tree.\n", (u_long)sp->bt_levels); + dl("Number of keys in the tree.\n", (u_long)sp->bt_nrecs); + dl("Number of tree internal pages.\n", (u_long)sp->bt_int_pg); + dl("Number of tree leaf pages.\n", (u_long)sp->bt_leaf_pg); + dl("Number of tree duplicate pages.\n", (u_long)sp->bt_dup_pg); + dl("Number of tree overflow pages.\n", (u_long)sp->bt_over_pg); + dl("Number of pages on the free list.\n", (u_long)sp->bt_free); + dl("Number of pages freed for reuse.\n", (u_long)sp->bt_freed); + dl("Number of bytes free in tree internal pages", + (u_long)sp->bt_int_pgfree); + printf(" (%.0f%% ff).\n", PCT(sp->bt_int_pgfree, sp->bt_int_pg)); + dl("Number of bytes free in tree leaf pages", + (u_long)sp->bt_leaf_pgfree); + printf(" (%.0f%% ff).\n", PCT(sp->bt_leaf_pgfree, sp->bt_leaf_pg)); + dl("Number of bytes free in tree duplicate pages", + (u_long)sp->bt_dup_pgfree); + printf(" (%.0f%% ff).\n", PCT(sp->bt_dup_pgfree, sp->bt_dup_pg)); + dl("Number of bytes free in tree overflow pages", + (u_long)sp->bt_over_pgfree); + printf(" (%.0f%% ff).\n", PCT(sp->bt_over_pgfree, sp->bt_over_pg)); + dl("Number of bytes saved by prefix compression.\n", (u_long)sp->bt_pfxsaved); - printf("%lu\tTotal number of tree page splits.\n", - (u_long)sp->bt_split); - printf("%lu\tNumber of root page splits.\n", (u_long)sp->bt_rootsplit); - printf("%lu\tNumber of fast splits.\n", (u_long)sp->bt_fastsplit); - printf("%lu\tNumber of hits in tree fast-insert code.\n", + dl("Total number of tree page splits.\n", (u_long)sp->bt_split); + dl("Number of root page splits.\n", (u_long)sp->bt_rootsplit); + dl("Number of fast splits.\n", (u_long)sp->bt_fastsplit); + dl("Number of hits in tree fast-insert code.\n", (u_long)sp->bt_cache_hit); - printf("%lu\tNumber of misses in tree fast-insert code.\n", + dl("Number of misses in tree fast-insert code.\n", (u_long)sp->bt_cache_miss); - printf("%lu\tNumber of keys added.\n", (u_long)sp->bt_added); - printf("%lu\tNumber of keys deleted.\n", (u_long)sp->bt_deleted); + dl("Number of keys added.\n", (u_long)sp->bt_added); + dl("Number of keys deleted.\n", (u_long)sp->bt_deleted); } /* @@ -231,10 +254,47 @@ hash_stats(dbp) { COMPQUIET(dbp, NULL); + printf("Hash statistics not currently available.\n"); return; } /* + * lock_stats -- + * Display lock statistics. + */ +void +lock_stats(dbenv) + DB_ENV *dbenv; +{ + DB_LOCK_STAT *sp; + + if (internal != NULL) { + __lock_dump_region(dbenv->lk_info, internal, stdout); + return; + } + + if (lock_stat(dbenv->lk_info, &sp, NULL)) + err(1, NULL); + + printf("%#lx\tLock magic number.\n", (u_long)sp->st_magic); + printf("%lu\tLock version number.\n", (u_long)sp->st_version); + dl("Lock region reference count.\n", (u_long)sp->st_refcnt); + dl("Lock region size.\n", (u_long)sp->st_regsize); + dl("Maximum number of locks.\n", (u_long)sp->st_maxlocks); + dl("Number of lock modes.\n", (u_long)sp->st_nmodes); + dl("Number of lock objects.\n", (u_long)sp->st_numobjs); + dl("Number of lockers.\n", (u_long)sp->st_nlockers); + dl("Number of lock conflicts.\n", (u_long)sp->st_nconflicts); + dl("Number of lock requests.\n", (u_long)sp->st_nrequests); + dl("Number of lock releases.\n", (u_long)sp->st_nreleases); + dl("Number of deadlocks.\n", (u_long)sp->st_ndeadlocks); + dl("The number of region locks granted without waiting.\n", + (u_long)sp->st_region_nowait); + dl("The number of region locks granted after waiting.\n", + (u_long)sp->st_region_wait); +} + +/* * log_stats -- * Display log statistics. */ @@ -249,6 +309,8 @@ log_stats(dbenv) printf("%#lx\tLog magic number.\n", (u_long)sp->st_magic); printf("%lu\tLog version number.\n", (u_long)sp->st_version); + dl("Log region reference count.\n", (u_long)sp->st_refcnt); + dl("Log region size.\n", (u_long)sp->st_regsize); printf("%#o\tLog file mode.\n", sp->st_mode); if (sp->st_lg_max % MEGABYTE == 0) printf("%luMb\tLog file size.\n", @@ -261,13 +323,13 @@ log_stats(dbenv) (u_long)sp->st_w_mbytes, (u_long)sp->st_w_bytes); printf("%luMb\tLog bytes written since last checkpoint (+%lu bytes).\n", (u_long)sp->st_wc_mbytes, (u_long)sp->st_wc_bytes); - printf("%lu\tTotal log file writes.\n", (u_long)sp->st_wcount); - printf("%lu\tTotal log file flushes.\n", (u_long)sp->st_scount); + dl("Total log file writes.\n", (u_long)sp->st_wcount); + dl("Total log file flushes.\n", (u_long)sp->st_scount); printf("%lu\tCurrent log file number.\n", (u_long)sp->st_cur_file); printf("%lu\tCurrent log file offset.\n", (u_long)sp->st_cur_offset); - printf("%lu\tThe number of region locks granted without waiting.\n", + dl("The number of region locks granted without waiting.\n", (u_long)sp->st_region_nowait); - printf("%lu\tThe number of region locks granted after waiting.\n", + dl("The number of region locks granted after waiting.\n", (u_long)sp->st_region_wait); } @@ -282,70 +344,74 @@ mpool_stats(dbenv) DB_MPOOL_FSTAT **fsp; DB_MPOOL_STAT *gsp; + if (internal != NULL) { + __memp_dump_region(dbenv->mp_info, internal, stdout); + return; + } + if (memp_stat(dbenv->mp_info, &gsp, &fsp, NULL)) err(1, NULL); - printf("%lu\tCache size (%luK).\n", - (u_long)gsp->st_cachesize, (u_long)gsp->st_cachesize / 1024); - printf("%lu\tRequested pages found in the cache", - (u_long)gsp->st_cache_hit); + dl("Pool region reference count.\n", (u_long)gsp->st_refcnt); + dl("Pool region size.\n", (u_long)gsp->st_regsize); + dl("Cache size", (u_long)gsp->st_cachesize); + printf(" (%luK).\n", (u_long)gsp->st_cachesize / 1024); + dl("Requested pages found in the cache", (u_long)gsp->st_cache_hit); if (gsp->st_cache_hit + gsp->st_cache_miss != 0) printf(" (%.0f%%)", ((double)gsp->st_cache_hit / (gsp->st_cache_hit + gsp->st_cache_miss)) * 100); printf(".\n"); - printf("%lu\tRequested pages mapped into the process' address space.\n", + dl("Requested pages mapped into the process' address space.\n", (u_long)gsp->st_map); - printf("%lu\tRequested pages not found in the cache.\n", + dl("Requested pages not found in the cache.\n", (u_long)gsp->st_cache_miss); - printf("%lu\tPages created in the cache.\n", - (u_long)gsp->st_page_create); - printf("%lu\tPages read into the cache.\n", (u_long)gsp->st_page_in); - printf("%lu\tPages written from the cache to the backing file.\n", + dl("Pages created in the cache.\n", (u_long)gsp->st_page_create); + dl("Pages read into the cache.\n", (u_long)gsp->st_page_in); + dl("Pages written from the cache to the backing file.\n", (u_long)gsp->st_page_out); - printf("%lu\tClean pages forced from the cache.\n", + dl("Clean pages forced from the cache.\n", (u_long)gsp->st_ro_evict); - printf("%lu\tDirty pages forced from the cache.\n", + dl("Dirty pages forced from the cache.\n", (u_long)gsp->st_rw_evict); - printf("%lu\tDirty buffers written by trickle-sync thread.\n", + dl("Dirty buffers written by trickle-sync thread.\n", (u_long)gsp->st_page_trickle); - printf("%lu\tCurrent clean buffer count.\n", + dl("Current clean buffer count.\n", (u_long)gsp->st_page_clean); - printf("%lu\tCurrent dirty buffer count.\n", + dl("Current dirty buffer count.\n", (u_long)gsp->st_page_dirty); - printf("%lu\tNumber of hash buckets used for page location.\n", + dl("Number of hash buckets used for page location.\n", (u_long)gsp->st_hash_buckets); - printf("%lu\tTotal number of times hash chains searched for a page.\n", + dl("Total number of times hash chains searched for a page.\n", (u_long)gsp->st_hash_searches); - printf("%lu\tThe longest hash chain searched for a page.\n", + dl("The longest hash chain searched for a page.\n", (u_long)gsp->st_hash_longest); - printf( - "%lu\tTotal number of hash buckets examined for page location.\n", + dl("Total number of hash buckets examined for page location.\n", (u_long)gsp->st_hash_examined); - printf("%lu\tThe number of region locks granted without waiting.\n", + dl("The number of region locks granted without waiting.\n", (u_long)gsp->st_region_nowait); - printf("%lu\tThe number of region locks granted after waiting.\n", + dl("The number of region locks granted after waiting.\n", (u_long)gsp->st_region_wait); for (; fsp != NULL && *fsp != NULL; ++fsp) { printf("%s\n", DB_LINE); printf("%s\n", (*fsp)->file_name); - printf("%lu\tPage size.\n", (u_long)(*fsp)->st_pagesize); - printf("%lu\tRequested pages found in the cache", + dl("Page size.\n", (u_long)(*fsp)->st_pagesize); + dl("Requested pages found in the cache", (u_long)(*fsp)->st_cache_hit); if ((*fsp)->st_cache_hit + (*fsp)->st_cache_miss != 0) printf(" (%.0f%%)", ((double)(*fsp)->st_cache_hit / ((*fsp)->st_cache_hit + (*fsp)->st_cache_miss)) * 100); printf(".\n"); - printf("%lu\tRequested pages mapped into the process' address space.\n", + dl("Requested pages mapped into the process' address space.\n", (u_long)(*fsp)->st_map); - printf("%lu\tRequested pages not found in the cache.\n", + dl("Requested pages not found in the cache.\n", (u_long)(*fsp)->st_cache_miss); - printf("%lu\tPages created in the cache.\n", + dl("Pages created in the cache.\n", (u_long)(*fsp)->st_page_create); - printf("%lu\tPages read into the cache.\n", + dl("Pages read into the cache.\n", (u_long)(*fsp)->st_page_in); - printf("%lu\tPages written from the cache to the backing file.\n", + dl("Pages written from the cache to the backing file.\n", (u_long)(*fsp)->st_page_out); } } @@ -358,46 +424,48 @@ void txn_stats(dbenv) DB_ENV *dbenv; { - DB_TXN_STAT *tstat; + DB_TXN_STAT *sp; u_int32_t i; const char *p; - if (txn_stat(dbenv->tx_info, &tstat, NULL)) + if (txn_stat(dbenv->tx_info, &sp, NULL)) err(1, NULL); - p = tstat->st_last_ckp.file == 0 ? + dl("Txn region reference count.\n", (u_long)sp->st_refcnt); + dl("Txn region size.\n", (u_long)sp->st_regsize); + p = sp->st_last_ckp.file == 0 ? "No checkpoint LSN." : "File/offset for last checkpoint LSN."; - printf("%lu/%lu\t%s\n", (u_long)tstat->st_last_ckp.file, - (u_long)tstat->st_last_ckp.offset, p); - p = tstat->st_pending_ckp.file == 0 ? + printf("%lu/%lu\t%s\n", + (u_long)sp->st_last_ckp.file, (u_long)sp->st_last_ckp.offset, p); + p = sp->st_pending_ckp.file == 0 ? "No pending checkpoint LSN." : "File/offset for last pending checkpoint LSN."; printf("%lu/%lu\t%s\n", - (u_long)tstat->st_pending_ckp.file, - (u_long)tstat->st_pending_ckp.offset, p); - if (tstat->st_time_ckp == 0) + (u_long)sp->st_pending_ckp.file, + (u_long)sp->st_pending_ckp.offset, p); + if (sp->st_time_ckp == 0) printf("0\tNo checkpoint timestamp.\n"); else printf("%.24s\tCheckpoint timestamp.\n", - ctime(&tstat->st_time_ckp)); + ctime(&sp->st_time_ckp)); printf("%lx\tLast transaction ID allocated.\n", - (u_long)tstat->st_last_txnid); - printf("%lu\tMaximum number of active transactions.\n", - (u_long)tstat->st_maxtxns); - printf("%lu\tNumber of transactions begun.\n", - (u_long)tstat->st_nbegins); - printf("%lu\tNumber of transactions aborted.\n", - (u_long)tstat->st_naborts); - printf("%lu\tNumber of transactions committed.\n", - (u_long)tstat->st_ncommits); - printf("%lu\tActive transactions.\n", (u_long)tstat->st_nactive); - qsort(tstat->st_txnarray, - tstat->st_nactive, sizeof(tstat->st_txnarray[0]), txn_compare); - for (i = 0; i < tstat->st_nactive; ++i) + (u_long)sp->st_last_txnid); + dl("Maximum number of active transactions.\n", (u_long)sp->st_maxtxns); + dl("Number of transactions begun.\n", (u_long)sp->st_nbegins); + dl("Number of transactions aborted.\n", (u_long)sp->st_naborts); + dl("Number of transactions committed.\n", (u_long)sp->st_ncommits); + dl("The number of region locks granted without waiting.\n", + (u_long)sp->st_region_nowait); + dl("The number of region locks granted after waiting.\n", + (u_long)sp->st_region_wait); + dl("Active transactions.\n", (u_long)sp->st_nactive); + qsort(sp->st_txnarray, + sp->st_nactive, sizeof(sp->st_txnarray[0]), txn_compare); + for (i = 0; i < sp->st_nactive; ++i) printf("\tid: %lx; initial LSN file/offest %lu/%lu\n", - (u_long)tstat->st_txnarray[i].txnid, - (u_long)tstat->st_txnarray[i].lsn.file, - (u_long)tstat->st_txnarray[i].lsn.offset); + (u_long)sp->st_txnarray[i].txnid, + (u_long)sp->st_txnarray[i].lsn.file, + (u_long)sp->st_txnarray[i].lsn.offset); } int @@ -417,25 +485,41 @@ txn_compare(a1, b1) } /* + * dl -- + * Display a big value. + */ +void +dl(msg, value) + const char *msg; + u_long value; +{ + /* + * Two formats: if less than 10 million, display as the number, if + * greater than 10 million display as ###M. + */ + if (value < 10000000) + printf("%lu\t%s", value, msg); + else + printf("%luM\t%s", value / 1000000, msg); +} + +/* * prflags -- * Print out flag values. */ void -prflags(flags, fn) +prflags(flags, fnp) u_int32_t flags; - FN const *fn; -{ const FN *fnp; - int found; +{ const char *sep; sep = " "; printf("Flags:"); - for (found = 0, fnp = fn; fnp->mask != 0; ++fnp) + for (; fnp->mask != 0; ++fnp) if (fnp->mask & flags) { printf("%s%s", sep, fnp->name); sep = ", "; - found = 1; } printf("\n"); } @@ -450,7 +534,7 @@ db_init(home, ttype) test_t ttype; { DB_ENV *dbenv; - int flags; + u_int32_t flags; if ((dbenv = (DB_ENV *)malloc(sizeof(DB_ENV))) == NULL) { errno = ENOMEM; @@ -467,13 +551,16 @@ db_init(home, ttype) switch (ttype) { case T_DB: case T_MPOOL: - flags |= DB_INIT_MPOOL; + LF_SET(DB_INIT_MPOOL); + break; + case T_LOCK: + LF_SET(DB_INIT_LOCK); break; case T_LOG: - flags |= DB_INIT_LOG; + LF_SET(DB_INIT_LOG); break; case T_TXN: - flags |= DB_INIT_TXN; + LF_SET(DB_INIT_TXN); break; case T_NOTSET: abort(); @@ -493,7 +580,7 @@ db_init(home, ttype) /* Turn off the DB_INIT_MPOOL flag if it's a database. */ if (ttype == T_DB) - flags &= ~DB_INIT_MPOOL; + LF_CLR(DB_INIT_MPOOL); /* Set the error output options -- this time we want a message. */ memset(dbenv, 0, sizeof(*dbenv)); @@ -508,6 +595,21 @@ db_init(home, ttype) } /* + * argcheck -- + * Return if argument flags are okay. + */ +int +argcheck(arg, ok_args) + char *arg; + const char *ok_args; +{ + for (; *arg != '\0'; ++arg) + if (strchr(ok_args, *arg) == NULL) + return (0); + return (1); +} + +/* * oninit -- * Interrupt signal handler. */ @@ -523,6 +625,7 @@ onint(signo) void usage() { - fprintf(stderr, "usage: db_stat [-mlt] [-d file] [-h home]\n"); + fprintf(stderr, + "usage: db_stat [-clmNt] [-C Acflmo] [-d file] [-h home] [-M Ahlm]\n"); exit (1); } diff --git a/db2/txn/txn.c b/db2/txn/txn.c index 2a2e3da97b..4f3ffd8ed2 100644 --- a/db2/txn/txn.c +++ b/db2/txn/txn.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -43,27 +43,20 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)txn.c 10.39 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)txn.c 10.58 (Sleepycat) 5/31/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <sys/mman.h> -#include <sys/stat.h> #include <errno.h> -#include <fcntl.h> -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> #include <time.h> -#include <unistd.h> #endif -#include "shqueue.h" #include "db_int.h" +#include "shqueue.h" #include "db_page.h" #include "db_shash.h" #include "txn.h" @@ -74,9 +67,9 @@ static const char sccsid[] = "@(#)txn.c 10.39 (Sleepycat) 1/8/98"; #include "common_ext.h" static int __txn_check_running __P((const DB_TXN *)); -static int __txn_create __P((DB_ENV *, const char *, u_int)); static int __txn_end __P((DB_TXN *, int)); static int __txn_grow_region __P((DB_TXNMGR *)); +static int __txn_init __P((DB_TXNREGION *)); static int __txn_undo __P((DB_TXN *)); static int __txn_validate_region __P((DB_TXNMGR *)); @@ -85,30 +78,20 @@ static int __txn_validate_region __P((DB_TXNMGR *)); * It assumes that a lock manager and log manager that conform to the db_log(3) * and db_lock(3) interfaces exist. * - * Create and initialize a transaction region in shared memory. + * Initialize a transaction region in shared memory. * Return 0 on success, errno on failure. */ static int -__txn_create(dbenv, path, mode) - DB_ENV *dbenv; - const char *path; - u_int mode; -{ +__txn_init(txn_region) DB_TXNREGION *txn_region; +{ time_t now; - int fd, maxtxns, ret; - maxtxns = dbenv->tx_max != 0 ? dbenv->tx_max : 1000; (void)time(&now); - /* Region may have existed. If it didn't, the open will fail. */ - if ((ret = __db_rcreate(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE, - mode, TXN_REGION_SIZE(maxtxns), 0, &fd, &txn_region)) != 0) - return (ret); - + /* maxtxns is already initialized. */ txn_region->magic = DB_TXNMAGIC; txn_region->version = DB_TXNVERSION; - txn_region->maxtxns = maxtxns; txn_region->last_txnid = TXN_MINIMUM; /* XXX If we ever do more types of locking and logging, this changes. */ txn_region->logtype = 0; @@ -118,33 +101,22 @@ __txn_create(dbenv, path, mode) ZERO_LSN(txn_region->pending_ckp); SH_TAILQ_INIT(&txn_region->active_txn); __db_shalloc_init((void *)&txn_region[1], - TXN_REGION_SIZE(maxtxns) - sizeof(DB_TXNREGION)); - - /* Unlock the region. */ - (void)__db_mutex_unlock(&txn_region->hdr.lock, fd); + TXN_REGION_SIZE(txn_region->maxtxns) - sizeof(DB_TXNREGION)); - /* Now unmap and close the region. */ - if ((ret = __db_rclose(dbenv, fd, txn_region)) != 0) { - (void)txn_unlink(path, 1 /* force */, dbenv); - return (ret); - } return (0); } int txn_open(path, flags, mode, dbenv, mgrpp) const char *path; - int flags, mode; + u_int32_t flags; + int mode; DB_ENV *dbenv; DB_TXNMGR **mgrpp; { DB_TXNMGR *tmgrp; - DB_TXNREGION *txn_regionp; - int fd, ret, retry_cnt; - - tmgrp = NULL; - txn_regionp = NULL; - fd = -1; + u_int32_t maxtxns; + int ret; /* Validate arguments. */ if (dbenv == NULL) @@ -157,52 +129,57 @@ txn_open(path, flags, mode, dbenv, mgrpp) if ((ret = __db_fchk(dbenv, "txn_open", flags, OKFLAGS)) != 0) return (ret); - retry_cnt = 0; -retry: if (LF_ISSET(DB_CREATE) && (ret = __txn_create(dbenv, path, mode)) != 0) - if (ret == EAGAIN && ++retry_cnt < 0) { - (void)__db_sleep(1, 0); - goto retry; - } else /* We did not really create the region */ - flags &= ~DB_CREATE; - - retry_cnt = 0; -retry1: if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE, - flags & ~(DB_CREATE | DB_THREAD | DB_TXN_NOSYNC), - &fd, &txn_regionp)) != 0) { - if (ret == EAGAIN && ++retry_cnt < 3) { - (void)__db_sleep(1, 0); - goto retry1; - } - goto out; - } - - - /* Check if valid region. */ - if (txn_regionp->magic != DB_TXNMAGIC) { - __db_err(dbenv, "txn_open: Bad magic number"); - ret = EINVAL; - goto out; - } + maxtxns = dbenv->tx_max != 0 ? dbenv->tx_max : 20; /* Now, create the transaction manager structure and set its fields. */ - if ((tmgrp = (DB_TXNMGR *)__db_malloc(sizeof(DB_TXNMGR))) == NULL) { + if ((tmgrp = (DB_TXNMGR *)__db_calloc(1, sizeof(DB_TXNMGR))) == NULL) { __db_err(dbenv, "txn_open: %s", strerror(ENOMEM)); - ret = ENOMEM; - goto out; + return (ENOMEM); } + /* Initialize the transaction manager structure. */ + tmgrp->mutexp = NULL; tmgrp->dbenv = dbenv; tmgrp->recover = dbenv->tx_recover == NULL ? __db_dispatch : dbenv->tx_recover; - tmgrp->region = txn_regionp; - tmgrp->reg_size = txn_regionp->hdr.size; - tmgrp->fd = fd; tmgrp->flags = LF_ISSET(DB_TXN_NOSYNC | DB_THREAD); - tmgrp->mem = &txn_regionp[1]; - tmgrp->mutexp = NULL; TAILQ_INIT(&tmgrp->txn_chain); + + /* Join/create the txn region. */ + tmgrp->reginfo.dbenv = dbenv; + tmgrp->reginfo.appname = DB_APP_NONE; + if (path == NULL) + tmgrp->reginfo.path = NULL; + else + if ((tmgrp->reginfo.path = (char *)__db_strdup(path)) == NULL) + goto err; + tmgrp->reginfo.file = DEFAULT_TXN_FILE; + tmgrp->reginfo.mode = mode; + tmgrp->reginfo.size = TXN_REGION_SIZE(maxtxns); + tmgrp->reginfo.dbflags = flags; + tmgrp->reginfo.addr = NULL; + tmgrp->reginfo.fd = -1; + tmgrp->reginfo.flags = dbenv->tx_max == 0 ? REGION_SIZEDEF : 0; + if ((ret = __db_rattach(&tmgrp->reginfo)) != 0) + goto err; + + /* Fill in region-related fields. */ + tmgrp->region = tmgrp->reginfo.addr; + tmgrp->mem = &tmgrp->region[1]; + + if (F_ISSET(&tmgrp->reginfo, REGION_CREATED)) { + tmgrp->region->maxtxns = maxtxns; + if ((ret = __txn_init(tmgrp->region)) != 0) + goto err; + + } else if (tmgrp->region->magic != DB_TXNMAGIC) { + /* Check if valid region. */ + __db_err(dbenv, "txn_open: Bad magic number"); + ret = EINVAL; + goto err; + } + if (LF_ISSET(DB_THREAD)) { - LOCK_TXNREGION(tmgrp); if ((ret = __db_shalloc(tmgrp->mem, sizeof(db_mutex_t), MUTEX_ALIGNMENT, &tmgrp->mutexp)) == 0) /* @@ -211,25 +188,27 @@ retry1: if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE, * to be ignored. We put 0 here as a valid placeholder. */ __db_mutex_init(tmgrp->mutexp, 0); - UNLOCK_TXNREGION(tmgrp); if (ret != 0) - goto out; + goto err; } + + UNLOCK_TXNREGION(tmgrp); *mgrpp = tmgrp; return (0); -out: if (txn_regionp != NULL) - (void)__db_rclose(dbenv, fd, txn_regionp); - if (flags & DB_CREATE) - (void)txn_unlink(path, 1, dbenv); - if (tmgrp != NULL) { - if (tmgrp->mutexp != NULL) { - LOCK_TXNREGION(tmgrp); +err: if (tmgrp->reginfo.addr != NULL) { + if (tmgrp->mutexp != NULL) __db_shalloc_free(tmgrp->mem, tmgrp->mutexp); - UNLOCK_TXNREGION(tmgrp); - } - __db_free(tmgrp); + + UNLOCK_TXNREGION(tmgrp); + (void)__db_rdetach(&tmgrp->reginfo); + if (F_ISSET(&tmgrp->reginfo, REGION_CREATED)) + (void)txn_unlink(path, 1, dbenv); } + + if (tmgrp->reginfo.path != NULL) + FREES(tmgrp->reginfo.path); + FREE(tmgrp, sizeof(*tmgrp)); return (ret); } @@ -244,77 +223,83 @@ txn_begin(tmgrp, parent, txnpp) DB_TXN *parent; DB_TXN **txnpp; { - TXN_DETAIL *txnp; + DB_LSN begin_lsn; DB_TXN *retp; - int id, ret; + TXN_DETAIL *txnp; + size_t off; + u_int32_t id; + int ret; + + txnp = NULL; + *txnpp = NULL; + + if ((retp = (DB_TXN *)__db_malloc(sizeof(DB_TXN))) == NULL) { + __db_err(tmgrp->dbenv, "txn_begin : %s", strerror(ENOMEM)); + return (ENOMEM); + } + + /* + * We do not have to write begin records (and if we do not, then we + * need never write records for read-only transactions). However, + * we do need to find the current LSN so that we can store it in the + * transaction structure, so we can know where to take checkpoints. + */ + if (tmgrp->dbenv->lg_info != NULL && (ret = + log_put(tmgrp->dbenv->lg_info, &begin_lsn, NULL, DB_CURLSN)) != 0) + goto err2; LOCK_TXNREGION(tmgrp); + /* Make sure that last_txnid is not going to wrap around. */ + if (tmgrp->region->last_txnid == TXN_INVALID) { + __db_err(tmgrp->dbenv, "txn_begin: %s %s", + "Transaction ID wrapping.", + "Snapshot your database and start a new log."); + ret = EINVAL; + goto err1; + } + if ((ret = __txn_validate_region(tmgrp)) != 0) - goto err; + goto err1; /* Allocate a new transaction detail structure. */ if ((ret = __db_shalloc(tmgrp->mem, sizeof(TXN_DETAIL), 0, &txnp)) != 0 && ret == ENOMEM && (ret = __txn_grow_region(tmgrp)) == 0) ret = __db_shalloc(tmgrp->mem, sizeof(TXN_DETAIL), 0, &txnp); - if (ret != 0) - goto err; - - /* Make sure that last_txnid is not going to wrap around. */ - if (tmgrp->region->last_txnid == TXN_INVALID) - return (EINVAL); - - if ((retp = (DB_TXN *)__db_malloc(sizeof(DB_TXN))) == NULL) { - __db_err(tmgrp->dbenv, "txn_begin : %s", strerror(ENOMEM)); - ret = ENOMEM; goto err1; - } + + /* Place transaction on active transaction list. */ + SH_TAILQ_INSERT_HEAD(&tmgrp->region->active_txn, + txnp, links, __txn_detail); id = ++tmgrp->region->last_txnid; tmgrp->region->nbegins++; txnp->txnid = id; + txnp->begin_lsn = begin_lsn; ZERO_LSN(txnp->last_lsn); - ZERO_LSN(txnp->begin_lsn); txnp->last_lock = 0; txnp->status = TXN_RUNNING; - SH_TAILQ_INSERT_HEAD(&tmgrp->region->active_txn, - txnp, links, __txn_detail); - + off = (u_int8_t *)txnp - (u_int8_t *)tmgrp->region; UNLOCK_TXNREGION(tmgrp); ZERO_LSN(retp->last_lsn); retp->txnid = id; retp->parent = parent; - retp->off = (u_int8_t *)txnp - (u_int8_t *)tmgrp->region; retp->mgrp = tmgrp; - - if (tmgrp->dbenv->lg_info != NULL && - (ret = __txn_regop_log(tmgrp->dbenv->lg_info, - retp, &txnp->begin_lsn, 0, TXN_BEGIN)) != 0) { - - /* Deallocate transaction. */ - LOCK_TXNREGION(tmgrp); - SH_TAILQ_REMOVE(&tmgrp->region->active_txn, - txnp, links, __txn_detail); - __db_shalloc_free(tmgrp->mem, txnp); - UNLOCK_TXNREGION(tmgrp); - __db_free(retp); - return (ret); - } + retp->off = off; LOCK_TXNTHREAD(tmgrp); TAILQ_INSERT_TAIL(&tmgrp->txn_chain, retp, links); UNLOCK_TXNTHREAD(tmgrp); - *txnpp = retp; + *txnpp = retp; return (0); -err1: - __db_shalloc_free(tmgrp->mem, txnp); -err: - UNLOCK_TXNREGION(tmgrp); +err1: UNLOCK_TXNREGION(tmgrp); + +err2: __db_free(retp); return (ret); } @@ -332,12 +317,15 @@ txn_commit(txnp) if ((ret = __txn_check_running(txnp)) != 0) return (ret); - /* Sync the log. */ + /* + * If there are any log records, write a log record and sync + * the log, else do no log writes. + */ if ((logp = txnp->mgrp->dbenv->lg_info) != NULL && - (ret = __txn_regop_log(logp, - txnp, &txnp->last_lsn, - F_ISSET(txnp->mgrp, DB_TXN_NOSYNC) ? 0 : DB_FLUSH, TXN_COMMIT)) - != 0) + !IS_ZERO_LSN(txnp->last_lsn) && + (ret = __txn_regop_log(logp, txnp, &txnp->last_lsn, + F_ISSET(txnp->mgrp, DB_TXN_NOSYNC) ? 0 : DB_FLUSH, + TXN_COMMIT)) != 0) return (ret); return (__txn_end(txnp, 1)); @@ -371,8 +359,8 @@ int txn_prepare(txnp) DB_TXN *txnp; { - int ret; TXN_DETAIL *tp; + int ret; if ((ret = __txn_check_running(txnp)) != 0) return (ret); @@ -414,21 +402,23 @@ txn_close(tmgrp) DB_TXN *txnp; int ret, t_ret; + ret = 0; + /* * This function had better only be called once per process * (i.e., not per thread), so there should be no synchronization * required. */ - for (ret = 0, txnp = TAILQ_FIRST(&tmgrp->txn_chain); - txnp != TAILQ_END(&tmgrp->txn_chain); - txnp = TAILQ_FIRST(&tmgrp->txn_chain)) { - if ((t_ret = txn_abort(txnp)) != 0 && ret == 0) - ret = t_ret; - } + while ((txnp = + TAILQ_FIRST(&tmgrp->txn_chain)) != TAILQ_END(&tmgrp->txn_chain)) + if ((t_ret = txn_abort(txnp)) != 0) { + __txn_end(txnp, 0); + if (ret == 0) + ret = t_ret; + } - if (tmgrp->dbenv->lg_info && (t_ret = - log_flush(tmgrp->dbenv->lg_info, NULL)) != 0 && - ret == 0) + if (tmgrp->dbenv->lg_info && + (t_ret = log_flush(tmgrp->dbenv->lg_info, NULL)) != 0 && ret == 0) ret = t_ret; if (tmgrp->mutexp != NULL) { @@ -437,12 +427,12 @@ txn_close(tmgrp) UNLOCK_TXNREGION(tmgrp); } - if ((t_ret = __db_rclose(tmgrp->dbenv, tmgrp->fd, tmgrp->region)) != 0 - && ret == 0) + if ((t_ret = __db_rdetach(&tmgrp->reginfo)) != 0 && ret == 0) ret = t_ret; - if (ret == 0) - __db_free(tmgrp); + if (tmgrp->reginfo.path != NULL) + FREES(tmgrp->reginfo.path); + FREE(tmgrp, sizeof(*tmgrp)); return (ret); } @@ -457,8 +447,19 @@ txn_unlink(path, force, dbenv) int force; DB_ENV *dbenv; { - return (__db_runlink(dbenv, - DB_APP_NONE, path, DEFAULT_TXN_FILE, force)); + REGINFO reginfo; + int ret; + + memset(®info, 0, sizeof(reginfo)); + reginfo.dbenv = dbenv; + reginfo.appname = DB_APP_NONE; + if (path != NULL && (reginfo.path = (char *)__db_strdup(path)) == NULL) + return (ENOMEM); + reginfo.file = DEFAULT_TXN_FILE; + ret = __db_runlink(®info, force); + if (reginfo.path != NULL) + FREES(reginfo.path); + return (ret); } /* Internal routines. */ @@ -540,10 +541,10 @@ static int __txn_undo(txnp) DB_TXN *txnp; { - DB_TXNMGR *mgr; - DB_LOG *logp; DBT rdbt; + DB_LOG *logp; DB_LSN key_lsn; + DB_TXNMGR *mgr; int ret; mgr = txnp->mgrp; @@ -594,7 +595,7 @@ __txn_undo(txnp) int txn_checkpoint(mgr, kbytes, minutes) const DB_TXNMGR *mgr; - int kbytes, minutes; + u_int32_t kbytes, minutes; { TXN_DETAIL *txnp; DB_LSN ckp_lsn, last_ckp; @@ -603,10 +604,6 @@ txn_checkpoint(mgr, kbytes, minutes) time_t last_ckp_time, now; int ret; - /* Check usage. */ - if (kbytes < 0 || minutes < 0) - return (EINVAL); - /* * Check if we need to run recovery. */ @@ -678,8 +675,8 @@ do_ckp: if (mgr->dbenv->mp_info != NULL && (ret = memp_sync(mgr->dbenv->mp_info, &ckp_lsn)) != 0) { /* - * ret < 0 means that there are still buffers to flush; - * the checkpoint is not complete. Back off and try again. + * ret == DB_INCOMPLETE means that there are still buffers to + * flush, the checkpoint is not complete. Wait and try again. */ if (ret > 0) __db_err(mgr->dbenv, @@ -711,9 +708,9 @@ do_ckp: } /* - * This is called at every interface to verify if the region - * has changed size, and if so, to remap the region in and - * reset the process pointers. + * __txn_validate_region -- + * Called at every interface to verify if the region has changed size, + * and if so, to remap the region in and reset the process' pointers. */ static int __txn_validate_region(tp) @@ -721,15 +718,15 @@ __txn_validate_region(tp) { int ret; - if (tp->reg_size == tp->region->hdr.size) + if (tp->reginfo.size == tp->region->hdr.size) return (0); - /* Grow the region. */ - if ((ret = __db_rremap(tp->dbenv, tp->region, - tp->reg_size, tp->region->hdr.size, tp->fd, &tp->region)) != 0) + /* Detach/reattach the region. */ + if ((ret = __db_rreattach(&tp->reginfo, tp->region->hdr.size)) != 0) return (ret); - tp->reg_size = tp->region->hdr.size; + /* Reset region information. */ + tp->region = tp->reginfo.addr; tp->mem = &tp->region[1]; return (0); @@ -739,27 +736,26 @@ static int __txn_grow_region(tp) DB_TXNMGR *tp; { - size_t incr; + size_t incr, oldsize; u_int32_t mutex_offset, oldmax; u_int8_t *curaddr; int ret; oldmax = tp->region->maxtxns; incr = oldmax * sizeof(DB_TXN); - mutex_offset = (u_int8_t *)tp->mutexp - (u_int8_t *)tp->region; + mutex_offset = tp->mutexp != NULL ? + (u_int8_t *)tp->mutexp - (u_int8_t *)tp->region : 0; - if ((ret = __db_rgrow(tp->dbenv, tp->fd, incr)) != 0) - return (ret); - - if ((ret = __db_rremap(tp->dbenv, tp->region, - tp->reg_size, tp->reg_size + incr, tp->fd, &tp->region)) != 0) + oldsize = tp->reginfo.size; + if ((ret = __db_rgrow(&tp->reginfo, oldsize + incr)) != 0) return (ret); + tp->region = tp->reginfo.addr; /* Throw the new space on the free list. */ - curaddr = (u_int8_t *)tp->region + tp->reg_size; + curaddr = (u_int8_t *)tp->region + oldsize; tp->mem = &tp->region[1]; - tp->reg_size += incr; - tp->mutexp = (db_mutex_t *)((u_int8_t *)tp->region + mutex_offset); + tp->mutexp = mutex_offset != 0 ? + (db_mutex_t *)((u_int8_t *)tp->region + mutex_offset) : NULL; *((size_t *)curaddr) = incr - sizeof(size_t); curaddr += sizeof(size_t); @@ -826,6 +822,11 @@ txn_stat(mgr, statp, db_malloc) break; } + stats->st_region_wait = mgr->region->hdr.lock.mutex_set_wait; + stats->st_region_nowait = mgr->region->hdr.lock.mutex_set_nowait; + stats->st_refcnt = mgr->region->hdr.refcnt; + stats->st_regsize = mgr->region->hdr.size; + UNLOCK_TXNREGION(mgr); *statp = stats; return (0); diff --git a/db2/txn/txn.src b/db2/txn/txn.src index 40bb63ecb6..04809b69d6 100644 --- a/db2/txn/txn.src +++ b/db2/txn/txn.src @@ -1,14 +1,12 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)txn.src 10.1 (Sleepycat) 4/12/97 - * - * This is the source file used to create the logging functions for the - * transaction system. + * @(#)txn.src 10.3 (Sleepycat) 4/10/98 */ + PREFIX txn /* diff --git a/db2/txn/txn_auto.c b/db2/txn/txn_auto.c index 38627466a8..f03a52991f 100644 --- a/db2/txn/txn_auto.c +++ b/db2/txn/txn_auto.c @@ -15,8 +15,6 @@ #include "db_dispatch.h" #include "txn.h" #include "db_am.h" -#include "common_ext.h" - /* * PUBLIC: int __txn_regop_log * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, @@ -58,7 +56,7 @@ int __txn_regop_log(logp, txnid, ret_lsnp, flags, bp += sizeof(DB_LSN); memcpy(bp, &opcode, sizeof(opcode)); bp += sizeof(opcode); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -74,22 +72,23 @@ int __txn_regop_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__txn_regop_print(notused1, dbtp, lsnp, notused3, notused4) +__txn_regop_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __txn_regop_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __txn_regop_read(dbtp->data, &argp)) != 0) return (ret); @@ -186,7 +185,7 @@ int __txn_ckp_log(logp, txnid, ret_lsnp, flags, else memset(bp, 0, sizeof(*last_ckp)); bp += sizeof(*last_ckp); -#ifdef DEBUG +#ifdef DIAGNOSTIC if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) fprintf(stderr, "Error in log record length"); #endif @@ -202,22 +201,23 @@ int __txn_ckp_log(logp, txnid, ret_lsnp, flags, * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ int -__txn_ckp_print(notused1, dbtp, lsnp, notused3, notused4) +__txn_ckp_print(notused1, dbtp, lsnp, notused2, notused3) DB_LOG *notused1; DBT *dbtp; DB_LSN *lsnp; - int notused3; - void *notused4; + int notused2; + void *notused3; { __txn_ckp_args *argp; u_int32_t i; - int c, ret; + u_int ch; + int ret; i = 0; - c = 0; + ch = 0; notused1 = NULL; - notused3 = 0; - notused4 = NULL; + notused2 = 0; + notused3 = NULL; if ((ret = __txn_ckp_read(dbtp->data, &argp)) != 0) return (ret); diff --git a/db2/txn/txn_rec.c b/db2/txn/txn_rec.c index 679cffb567..e53dc5f3b7 100644 --- a/db2/txn/txn_rec.c +++ b/db2/txn/txn_rec.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ /* @@ -40,24 +40,20 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)txn_rec.c 10.6 (Sleepycat) 10/25/97"; +static const char sccsid[] = "@(#)txn_rec.c 10.11 (Sleepycat) 5/3/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <stddef.h> -#include <stdlib.h> -#include <string.h> +#include <errno.h> #endif #include "db_int.h" #include "db_page.h" #include "shqueue.h" #include "txn.h" -#include "db_dispatch.h" #include "db_am.h" -#include "common_ext.h" /* * PUBLIC: int __txn_regop_recover @@ -69,7 +65,7 @@ __txn_regop_recover(logp, dbtp, lsnp, redo, info) DBT *dbtp; DB_LSN *lsnp; int redo; - void *info; + void *info; { __txn_regop_args *argp; int ret; @@ -77,8 +73,8 @@ __txn_regop_recover(logp, dbtp, lsnp, redo, info) #ifdef DEBUG_RECOVER (void)__txn_regop_print(logp, dbtp, lsnp, redo, info); #endif - logp = logp; /* XXX: Shut the compiler up. */ - redo = redo; + COMPQUIET(redo, 0); + COMPQUIET(logp, NULL); if ((ret = __txn_regop_read(dbtp->data, &argp)) != 0) return (ret); @@ -90,10 +86,12 @@ __txn_regop_recover(logp, dbtp, lsnp, redo, info) __db_txnlist_add(info, argp->txnid->txnid); break; case TXN_PREPARE: /* Nothing to do. */ - case TXN_BEGIN: - /* Call find so that we update the maxid. */ + /* Call __db_txnlist_find so that we update the maxid. */ (void)__db_txnlist_find(info, argp->txnid->txnid); break; + default: + ret = EINVAL; + break; } *lsnp = argp->prev_lsn; @@ -118,13 +116,20 @@ __txn_ckp_recover(logp, dbtp, lsnp, redo, info) #ifdef DEBUG_RECOVER __txn_ckp_print(logp, dbtp, lsnp, redo, info); #endif - logp = logp; /* XXX: Shut the compiler up. */ - redo = redo; - info = info; + COMPQUIET(logp, NULL); if ((ret = __txn_ckp_read(dbtp->data, &argp)) != 0) return (ret); + /* + * Check for 'restart' checkpoint record. This occurs when the + * checkpoint lsn is equal to the lsn of the checkpoint record + * and means that we could set the transaction ID back to 1, so + * that we don't exhaust the transaction ID name space. + */ + if (argp->ckp_lsn.file == lsnp->file && + argp->ckp_lsn.offset == lsnp->offset) + __db_txnlist_gen(info, redo ? -1 : 1); *lsnp = argp->last_ckp; __db_free(argp); return (DB_TXN_CKP); |