diff options
Diffstat (limited to 'db2/db')
-rw-r--r-- | db2/db/db.c | 313 | ||||
-rw-r--r-- | db2/db/db.src | 13 | ||||
-rw-r--r-- | db2/db/db_am.c | 430 | ||||
-rw-r--r-- | db2/db/db_auto.c | 299 | ||||
-rw-r--r-- | db2/db/db_dispatch.c | 41 | ||||
-rw-r--r-- | db2/db/db_dup.c | 511 | ||||
-rw-r--r-- | db2/db/db_iface.c | 488 | ||||
-rw-r--r-- | db2/db/db_join.c | 271 | ||||
-rw-r--r-- | db2/db/db_overflow.c | 129 | ||||
-rw-r--r-- | db2/db/db_pr.c | 110 | ||||
-rw-r--r-- | db2/db/db_rec.c | 155 | ||||
-rw-r--r-- | db2/db/db_ret.c | 21 | ||||
-rw-r--r-- | db2/db/db_thread.c | 121 |
13 files changed, 1997 insertions, 905 deletions
diff --git a/db2/db/db.c b/db2/db/db.c index 70c6c5443b..2b4c270324 100644 --- a/db2/db/db.c +++ b/db2/db/db.c @@ -44,7 +44,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db.c 10.57 (Sleepycat) 5/7/98"; +static const char sccsid[] = "@(#)db.c 10.75 (Sleepycat) 12/3/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -67,9 +67,6 @@ static const char sccsid[] = "@(#)db.c 10.57 (Sleepycat) 5/7/98"; #include "db_am.h" #include "common_ext.h" -static int db_close __P((DB *, u_int32_t)); -static int db_fd __P((DB *, int *)); - /* * If the metadata page has the flag set, set the local flag. If the page * does NOT have the flag set, return EINVAL if the user's dbinfo argument @@ -87,11 +84,6 @@ static int db_fd __P((DB *, int *)); } \ } -#ifdef _LIBC -#define db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) \ - __nss_db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) -#endif - /* * db_open -- * Main library interface to the DB access methods. @@ -141,9 +133,10 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) /* * Specifying a cachesize to db_open(3), after creating an - * environment, is a common mistake. + * environment with DB_INIT_MPOOL, is a common mistake. */ - if (dbinfo != NULL && dbinfo->db_cachesize != 0) { + if (dbenv->mp_info != NULL && + dbinfo != NULL && dbinfo->db_cachesize != 0) { __db_err(dbenv, "cachesize will be ignored if environment exists"); return (EINVAL); @@ -156,12 +149,16 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) real_name = NULL; /* Allocate the DB structure, reference the DB_ENV structure. */ - if ((dbp = (DB *)__db_calloc(1, sizeof(DB))) == NULL) { - __db_err(dbenv, "%s", strerror(ENOMEM)); - return (ENOMEM); - } + if ((ret = __os_calloc(1, sizeof(DB), &dbp)) != 0) + return (ret); dbp->dbenv = dbenv; + /* Random initialization. */ + TAILQ_INIT(&dbp->free_queue); + TAILQ_INIT(&dbp->active_queue); + if ((ret = __db_init_wrapper(dbp)) != 0) + goto err; + /* Convert the db_open(3) flags. */ if (LF_ISSET(DB_RDONLY)) F_SET(dbp, DB_AM_RDONLY); @@ -192,21 +189,16 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) } /* - * Always set the master and initialize the queues, so we can - * use these fields without checking the thread bit. - */ - dbp->master = dbp; - LIST_INIT(&dbp->handleq); - LIST_INSERT_HEAD(&dbp->handleq, dbp, links); - TAILQ_INIT(&dbp->curs_queue); - - /* * Set based on the dbenv fields, although no logging or transactions * are possible for temporary files. */ if (dbenv != NULL) { - if (dbenv->lk_info != NULL) - F_SET(dbp, DB_AM_LOCKING); + if (dbenv->lk_info != NULL) { + if (F_ISSET(dbenv, DB_ENV_CDB)) + F_SET(dbp, DB_AM_CDB); + else + F_SET(dbp, DB_AM_LOCKING); + } if (fname != NULL && dbenv->lg_info != NULL) F_SET(dbp, DB_AM_LOGGING); } @@ -215,9 +207,29 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) if (dbinfo == NULL) { dbp->pgsize = 0; dbp->db_malloc = NULL; + dbp->dup_compare = NULL; } else { + /* + * We don't want anything that's not a power-of-2, as we rely + * on that for alignment of various types on the pages. + */ + if ((dbp->pgsize = dbinfo->db_pagesize) != 0 && + (u_int32_t)1 << __db_log2(dbp->pgsize) != dbp->pgsize) { + __db_err(dbenv, "page sizes must be a power-of-2"); + goto einval; + } dbp->pgsize = dbinfo->db_pagesize; dbp->db_malloc = dbinfo->db_malloc; + if (F_ISSET(dbinfo, DB_DUPSORT)) { + if (F_ISSET(dbinfo, DB_DUP)) + dbp->dup_compare = dbinfo->dup_compare == NULL ? + __bam_defcmp : dbinfo->dup_compare; + else { + __db_err(dbenv, "DB_DUPSORT requires DB_DUP"); + goto einval; + } + F_CLR(dbinfo, DB_DUPSORT); + } } /* Fill in the default file mode. */ @@ -235,6 +247,7 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) default: goto err; } + dbp->byteswapped = F_ISSET(dbp, DB_AM_SWAP) ? 1 : 0; /* * If we have a file name, try and read the first page, figure out @@ -289,7 +302,7 @@ open_retry: if (LF_ISSET(DB_CREATE)) { * sizes, we limit the default pagesize to 16K. */ if (dbp->pgsize == 0) { - if ((ret = __db_ioinfo(real_name, + if ((ret = __os_ioinfo(real_name, fd, NULL, NULL, &iopsize)) != 0) { __db_err(dbenv, "%s: %s", real_name, strerror(ret)); @@ -299,6 +312,14 @@ open_retry: if (LF_ISSET(DB_CREATE)) { iopsize = 512; if (iopsize > 16 * 1024) iopsize = 16 * 1024; + + /* + * Sheer paranoia, but we don't want anything that's + * not a power-of-2, as we rely on that for alignment + * of various types on the pages. + */ + DB_ROUNDOFF(iopsize, 512); + dbp->pgsize = iopsize; F_SET(dbp, DB_AM_PGDEF); } @@ -308,11 +329,11 @@ open_retry: if (LF_ISSET(DB_CREATE)) { * that the meta-data for all access methods fits in 512 * bytes, and that no database will be smaller than that. */ - if ((ret = __db_read(fd, mbuf, sizeof(mbuf), &nr)) != 0) + if ((ret = __os_read(fd, mbuf, sizeof(mbuf), &nr)) != 0) goto err; /* The fd is no longer needed. */ - (void)__db_close(fd); + (void)__os_close(fd); fd = -1; if (nr != sizeof(mbuf)) { @@ -337,7 +358,7 @@ open_retry: if (LF_ISSET(DB_CREATE)) { */ if (retry_cnt++ < 3 && !LF_ISSET(DB_CREATE | DB_TRUNCATE)) { - __db_sleep(1, 0); + __os_sleep(1, 0); goto open_retry; } if (type == DB_UNKNOWN) { @@ -396,7 +417,7 @@ retry: switch (((BTMETA *)mbuf)->magic) { /* Copy the file's unique id. */ need_fileid = 0; - memcpy(dbp->lock.fileid, btm->uid, DB_FILE_ID_LEN); + memcpy(dbp->fileid, btm->uid, DB_FILE_ID_LEN); break; case DB_HASHMAGIC: if (type != DB_HASH && type != DB_UNKNOWN) @@ -425,7 +446,7 @@ retry: switch (((BTMETA *)mbuf)->magic) { /* Copy the file's unique id. */ need_fileid = 0; - memcpy(dbp->lock.fileid, hashm->uid, DB_FILE_ID_LEN); + memcpy(dbp->fileid, hashm->uid, DB_FILE_ID_LEN); break; default: if (swapped) { @@ -489,11 +510,9 @@ empty: /* F_SET(dbp, DB_AM_MLOCAL); if (dbenv == NULL) { - if ((dbp->mp_dbenv = - (DB_ENV *)__db_calloc(sizeof(DB_ENV), 1)) == NULL) { - ret = ENOMEM; + if ((ret = __os_calloc(1, + sizeof(DB_ENV), &dbp->mp_dbenv)) != 0) goto err; - } envp = dbp->mp_dbenv; restore = 0; @@ -554,20 +573,20 @@ empty: /* */ if (need_fileid) { if (fname == NULL) { - memset(dbp->lock.fileid, 0, DB_FILE_ID_LEN); + memset(dbp->fileid, 0, DB_FILE_ID_LEN); if (F_ISSET(dbp, DB_AM_LOCKING) && (ret = lock_id(dbenv->lk_info, - (u_int32_t *)dbp->lock.fileid)) != 0) + (u_int32_t *)dbp->fileid)) != 0) goto err; } else - if ((ret = __db_fileid(dbenv, - real_name, 1, dbp->lock.fileid)) != 0) + if ((ret = __os_fileid(dbenv, + real_name, 1, dbp->fileid)) != 0) goto err; } /* No further use for the real name. */ if (real_name != NULL) - FREES(real_name); + __os_freestr(real_name); real_name = NULL; /* @@ -595,7 +614,7 @@ empty: /* memset(&finfo, 0, sizeof(finfo)); finfo.ftype = ftype; finfo.pgcookie = &pgcookie; - finfo.fileid = dbp->lock.fileid; + finfo.fileid = dbp->fileid; finfo.lsn_offset = 0; finfo.clear_len = DB_PAGE_CLEAR_LEN; if ((ret = memp_fopen(dbp->mp, fname, @@ -605,12 +624,21 @@ empty: /* /* * XXX - * Truly spectacular layering violation. We need a per-thread mutex - * that lives in shared memory (thanks, HP-UX!) and so we acquire a - * pointer to the mpool one. + * We need a per-thread mutex that lives in shared memory -- HP-UX + * can't allocate mutexes in malloc'd memory. Allocate it from the + * shared memory region, since it's the only one that is guaranteed + * to exist. */ - if (F_ISSET(dbp, DB_AM_THREAD)) - dbp->mutexp = dbp->mpf->mutexp; + if (F_ISSET(dbp, DB_AM_THREAD)) { + if ((ret = __memp_reg_alloc(dbp->mp, + sizeof(db_mutex_t), NULL, &dbp->mutexp)) != 0) + goto err; + /* + * Since we only get here if DB_THREAD was specified, we know + * we have spinlocks and no file offset argument is needed. + */ + (void)__db_mutex_init(dbp->mutexp, 0); + } /* Get a log file id. */ if (F_ISSET(dbp, DB_AM_LOGGING) && @@ -618,18 +646,6 @@ empty: /* dbp, fname, type, &dbp->log_fileid)) != 0) goto err; - /* - * Get a locker id for this DB, and build the lock cookie: the first - * db_pgno_t bytes are the page number, the next N bytes are the file - * id. - */ - if (F_ISSET(dbp, DB_AM_LOCKING)) { - if ((ret = lock_id(dbenv->lk_info, &dbp->locker)) != 0) - goto err; - dbp->lock_dbt.size = sizeof(dbp->lock); - dbp->lock_dbt.data = &dbp->lock; - } - /* Call the real open function. */ switch (type) { case DB_BTREE: @@ -639,7 +655,7 @@ empty: /* if (dbinfo != NULL && (ret = __db_fcchk(dbenv, "db_open", dbinfo->flags, DB_DUP, DB_RECNUM)) != 0) goto err; - if ((ret = __bam_open(dbp, type, dbinfo)) != 0) + if ((ret = __bam_open(dbp, dbinfo)) != 0) goto err; break; case DB_HASH: @@ -655,24 +671,20 @@ empty: /* if (dbinfo != NULL && (ret = __db_fchk(dbenv, "db_open", dbinfo->flags, DB_INFO_FLAGS)) != 0) goto err; - if ((ret = __ram_open(dbp, type, dbinfo)) != 0) + if ((ret = __ram_open(dbp, dbinfo)) != 0) goto err; break; default: abort(); } - /* Call a local close routine. */ - dbp->close = db_close; - dbp->fd = db_fd; - *dbpp = dbp; return (0); einval: ret = EINVAL; err: /* Close the file descriptor. */ if (fd != -1) - (void)__db_close(fd); + (void)__os_close(fd); /* Discard the log file id. */ if (dbp->log_fileid != 0) @@ -688,90 +700,60 @@ err: /* Close the file descriptor. */ /* If we allocated a DB_ENV, discard it. */ if (dbp->mp_dbenv != NULL) - FREE(dbp->mp_dbenv, sizeof(DB_ENV)); + __os_free(dbp->mp_dbenv, sizeof(DB_ENV)); if (real_name != NULL) - FREES(real_name); + __os_freestr(real_name); if (dbp != NULL) - FREE(dbp, sizeof(DB)); + __os_free(dbp, sizeof(DB)); return (ret); } -#ifdef _LIBC -# undef db_open -weak_alias (__nss_db_open, db_open) -#endif - /* - * db_close -- + * __db_close -- * Close a DB tree. + * + * PUBLIC: int __db_close __P((DB *, u_int32_t)); */ -static int -db_close(dbp, flags) +int +__db_close(dbp, flags) DB *dbp; u_int32_t flags; { DBC *dbc; - DB *tdbp; int ret, t_ret; + DB_PANIC_CHECK(dbp); + /* Validate arguments. */ - if ((ret = __db_fchk(dbp->dbenv, "db_close", flags, DB_NOSYNC)) != 0) + if ((ret = __db_closechk(dbp, flags)) != 0) return (ret); /* Sync the underlying file. */ - if (!LF_ISSET(DB_NOSYNC) && + if (flags != DB_NOSYNC && (t_ret = dbp->sync(dbp, 0)) != 0 && ret == 0) ret = t_ret; /* - * Call the underlying access method close routine for all the - * cursors and handles. + * Go through the active cursors and call the cursor recycle routine, + * which resolves pending operations and moves the cursors onto the + * free list. Then, walk the free list and call the cursor destroy + * routine. */ - for (tdbp = LIST_FIRST(&dbp->handleq); - tdbp != NULL; tdbp = LIST_NEXT(tdbp, links)) { - while ((dbc = TAILQ_FIRST(&tdbp->curs_queue)) != NULL) - switch (tdbp->type) { - case DB_BTREE: - if ((t_ret = - __bam_c_iclose(tdbp, dbc)) != 0 && ret == 0) - ret = t_ret; - break; - case DB_HASH: - if ((t_ret = - __ham_c_iclose(tdbp, dbc)) != 0 && ret == 0) - ret = t_ret; - break; - case DB_RECNO: - if ((t_ret = - __ram_c_iclose(tdbp, dbc)) != 0 && ret == 0) - ret = t_ret; - break; - default: - abort(); - } - - switch (tdbp->type) { - case DB_BTREE: - if ((t_ret = __bam_close(tdbp)) != 0 && ret == 0) - ret = t_ret; - break; - case DB_HASH: - if ((t_ret = __ham_close(tdbp)) != 0 && ret == 0) - ret = t_ret; - break; - case DB_RECNO: - if ((t_ret = __ram_close(tdbp)) != 0 && ret == 0) - ret = t_ret; - break; - default: - abort(); - } - } + while ((dbc = TAILQ_FIRST(&dbp->active_queue)) != NULL) + if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) + ret = t_ret; + while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL) + if ((t_ret = __db_c_destroy(dbc)) != 0 && ret == 0) + ret = t_ret; + + /* Call the access specific close function. */ + if ((t_ret = dbp->am_close(dbp)) != 0 && ret == 0) + ret = t_ret; /* Sync the memory pool. */ - if (!LF_ISSET(DB_NOSYNC) && (t_ret = memp_fsync(dbp->mpf)) != 0 && + if (flags != DB_NOSYNC && (t_ret = memp_fsync(dbp->mpf)) != 0 && t_ret != DB_INCOMPLETE && ret == 0) ret = t_ret; @@ -788,91 +770,12 @@ db_close(dbp, flags) if (F_ISSET(dbp, DB_AM_LOGGING)) (void)log_unregister(dbp->dbenv->lg_info, dbp->log_fileid); - /* Discard the lock cookie for all handles. */ - for (tdbp = LIST_FIRST(&dbp->handleq); - tdbp != NULL; tdbp = LIST_NEXT(tdbp, links)) - if (F_ISSET(tdbp, DB_AM_LOCKING)) { -#ifdef DEBUG - DB_LOCKREQ request; - - /* - * If we're running tests, display any locks currently - * held. It's possible that some applications may hold - * locks for long periods, e.g., conference room locks, - * but the DB tests should never close holding locks. - */ - request.op = DB_LOCK_DUMP; - if ((t_ret = lock_vec(tdbp->dbenv->lk_info, - tdbp->locker, 0, &request, 1, NULL)) != 0 && - ret == 0) - ret = EAGAIN; -#endif - } - /* If we allocated a DB_ENV, discard it. */ if (dbp->mp_dbenv != NULL) - FREE(dbp->mp_dbenv, sizeof(DB_ENV)); + __os_free(dbp->mp_dbenv, sizeof(DB_ENV)); - /* Free all of the DB's. */ - LIST_REMOVE(dbp, links); - while ((tdbp = LIST_FIRST(&dbp->handleq)) != NULL) { - LIST_REMOVE(tdbp, links); - FREE(tdbp, sizeof(*tdbp)); - } - FREE(dbp, sizeof(*dbp)); + /* Free the DB. */ + __os_free(dbp, sizeof(*dbp)); return (ret); } - -/* - * db_fd -- - * Return a file descriptor for flock'ing. - */ -static int -db_fd(dbp, fdp) - DB *dbp; - int *fdp; -{ - /* - * XXX - * Truly spectacular layering violation. - */ - return (__mp_xxx_fd(dbp->mpf, fdp)); -} - -/* - * __db_pgerr -- - * Error when unable to retrieve a specified page. - * - * PUBLIC: int __db_pgerr __P((DB *, db_pgno_t)); - */ -int -__db_pgerr(dbp, pgno) - DB *dbp; - db_pgno_t pgno; -{ - /* - * Three things are certain: - * Death, taxes, and lost data. - * Guess which has occurred. - */ - __db_err(dbp->dbenv, - "unable to create/retrieve page %lu", (u_long)pgno); - return (__db_panic(dbp)); -} - -/* - * __db_pgfmt -- - * Error when a page has the wrong format. - * - * PUBLIC: int __db_pgfmt __P((DB *, db_pgno_t)); - */ -int -__db_pgfmt(dbp, pgno) - DB *dbp; - db_pgno_t pgno; -{ - __db_err(dbp->dbenv, - "page %lu: illegal page type or format", (u_long)pgno); - return (__db_panic(dbp)); -} diff --git a/db2/db/db.src b/db2/db/db.src index 91d8b390a1..26557e10ac 100644 --- a/db2/db/db.src +++ b/db2/db/db.src @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. * - * @(#)db.src 10.6 (Sleepycat) 4/28/98 + * @(#)db.src 10.8 (Sleepycat) 9/20/98 */ PREFIX db @@ -98,6 +98,7 @@ END /* * relink -- Handles relinking around a page. * + * opcode: indicates if this is an addpage or delete page * pgno: the page being changed. * lsn the page's original lsn. * prev: the previous page. @@ -106,6 +107,7 @@ END * lsn_next: the previous page's original lsn. */ BEGIN relink +ARG opcode u_int32_t lu ARG fileid u_int32_t lu ARG pgno db_pgno_t lu POINTER lsn DB_LSN * lu @@ -148,12 +150,3 @@ DBT key DBT s DBT data DBT s ARG arg_flags u_int32_t lu END - -/* - * noop -- do nothing, but get an LSN. - */ -BEGIN noop -ARG fileid u_int32_t lu -ARG pgno db_pgno_t lu -POINTER prevlsn DB_LSN * lu -END diff --git a/db2/db/db_am.c b/db2/db/db_am.c new file mode 100644 index 0000000000..e02ad57f53 --- /dev/null +++ b/db2/db/db_am.c @@ -0,0 +1,430 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)db_am.c 10.15 (Sleepycat) 12/30/98"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "shqueue.h" +#include "db_page.h" +#include "db_shash.h" +#include "mp.h" +#include "btree.h" +#include "hash.h" +#include "db_am.h" +#include "db_ext.h" + +static int __db_c_close __P((DBC *)); +static int __db_cursor __P((DB *, DB_TXN *, DBC **, u_int32_t)); +static int __db_fd __P((DB *, int *)); +static int __db_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); +static int __db_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + +/* + * __db_init_wrapper -- + * Wrapper layer to implement generic DB functions. + * + * PUBLIC: int __db_init_wrapper __P((DB *)); + */ +int +__db_init_wrapper(dbp) + DB *dbp; +{ + dbp->close = __db_close; + dbp->cursor = __db_cursor; + dbp->del = NULL; /* !!! Must be set by access method. */ + dbp->fd = __db_fd; + dbp->get = __db_get; + dbp->join = __db_join; + dbp->put = __db_put; + dbp->stat = NULL; /* !!! Must be set by access method. */ + dbp->sync = __db_sync; + + return (0); +} + +/* + * __db_cursor -- + * Allocate and return a cursor. + */ +static int +__db_cursor(dbp, txn, dbcp, flags) + DB *dbp; + DB_TXN *txn; + DBC **dbcp; + u_int32_t flags; +{ + DBC *dbc, *adbc; + int ret; + db_lockmode_t mode; + u_int32_t op; + + DB_PANIC_CHECK(dbp); + + /* Take one from the free list if it's available. */ + DB_THREAD_LOCK(dbp); + if ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL) + TAILQ_REMOVE(&dbp->free_queue, dbc, links); + else { + DB_THREAD_UNLOCK(dbp); + + if ((ret = __os_calloc(1, sizeof(DBC), &dbc)) != 0) + return (ret); + + dbc->dbp = dbp; + dbc->c_close = __db_c_close; + + /* Set up locking information. */ + if (F_ISSET(dbp, DB_AM_LOCKING | DB_AM_CDB)) { + /* + * If we are not threaded, then there is no need to + * create new locker ids. We know that no one else + * is running concurrently using this DB, so we can + * take a peek at any cursors on the active queue. + */ + if (!F_ISSET(dbp, DB_AM_THREAD) && + (adbc = TAILQ_FIRST(&dbp->active_queue)) != NULL) + dbc->lid = adbc->lid; + else + if ((ret = lock_id(dbp->dbenv->lk_info, + &dbc->lid)) != 0) + goto err; + + memcpy(dbc->lock.fileid, dbp->fileid, DB_FILE_ID_LEN); + if (F_ISSET(dbp, DB_AM_CDB)) { + dbc->lock_dbt.size = DB_FILE_ID_LEN; + dbc->lock_dbt.data = dbc->lock.fileid; + } else { + dbc->lock_dbt.size = sizeof(dbc->lock); + dbc->lock_dbt.data = &dbc->lock; + } + } + + switch (dbp->type) { + case DB_BTREE: + case DB_RECNO: + if ((ret = __bam_c_init(dbc)) != 0) + goto err; + break; + case DB_HASH: + if ((ret = __ham_c_init(dbc)) != 0) + goto err; + break; + default: + ret = EINVAL; + goto err; + } + + DB_THREAD_LOCK(dbp); + } + + if ((dbc->txn = txn) == NULL) + dbc->locker = dbc->lid; + else + dbc->locker = txn->txnid; + + TAILQ_INSERT_TAIL(&dbp->active_queue, dbc, links); + DB_THREAD_UNLOCK(dbp); + + /* + * If this is the concurrent DB product, then we do all locking + * in the interface, which is right here. + */ + if (F_ISSET(dbp, DB_AM_CDB)) { + op = LF_ISSET(DB_OPFLAGS_MASK); + mode = (op == DB_WRITELOCK) ? DB_LOCK_WRITE : + (LF_ISSET(DB_RMW) ? DB_LOCK_IWRITE : DB_LOCK_READ); + if ((ret = lock_get(dbp->dbenv->lk_info, dbc->locker, 0, + &dbc->lock_dbt, mode, &dbc->mylock)) != 0) { + (void)__db_c_close(dbc); + return (EAGAIN); + } + if (LF_ISSET(DB_RMW)) + F_SET(dbc, DBC_RMW); + if (op == DB_WRITELOCK) + F_SET(dbc, DBC_WRITER); + } + + *dbcp = dbc; + return (0); + +err: __os_free(dbc, sizeof(*dbc)); + return (ret); +} + +/* + * __db_c_close -- + * Close the cursor (recycle for later use). + */ +static int +__db_c_close(dbc) + DBC *dbc; +{ + DB *dbp; + int ret, t_ret; + + dbp = dbc->dbp; + + DB_PANIC_CHECK(dbp); + + ret = 0; + + /* + * We cannot release the lock until after we've called the + * access method specific routine, since btrees may have pending + * deletes. + */ + + /* Remove the cursor from the active queue. */ + DB_THREAD_LOCK(dbp); + TAILQ_REMOVE(&dbp->active_queue, dbc, links); + DB_THREAD_UNLOCK(dbp); + + /* Call the access specific cursor close routine. */ + if ((t_ret = dbc->c_am_close(dbc)) != 0 && ret == 0) + t_ret = ret; + + /* Release the lock. */ + if (F_ISSET(dbc->dbp, DB_AM_CDB) && dbc->mylock != LOCK_INVALID) { + ret = lock_put(dbc->dbp->dbenv->lk_info, dbc->mylock); + dbc->mylock = LOCK_INVALID; + } + + /* Clean up the cursor. */ + dbc->flags = 0; + +#ifdef DEBUG + /* + * Check for leftover locks, unless we're running with transactions. + * + * If we're running tests, display any locks currently held. It's + * possible that some applications may hold locks for long periods, + * e.g., conference room locks, but the DB tests should never close + * holding locks. + */ + if (F_ISSET(dbp, DB_AM_LOCKING) && dbc->lid == dbc->locker) { + DB_LOCKREQ request; + + request.op = DB_LOCK_DUMP; + if ((t_ret = lock_vec(dbp->dbenv->lk_info, + dbc->locker, 0, &request, 1, NULL)) != 0 && ret == 0) + ret = EAGAIN; + } +#endif + /* Move the cursor to the free queue. */ + DB_THREAD_LOCK(dbp); + TAILQ_INSERT_TAIL(&dbp->free_queue, dbc, links); + DB_THREAD_UNLOCK(dbp); + + return (ret); +} + +#ifdef DEBUG +/* + * __db_cprint -- + * Display the current cursor list. + * + * PUBLIC: int __db_cprint __P((DB *)); + */ +int +__db_cprint(dbp) + DB *dbp; +{ + static const FN fn[] = { + { DBC_RECOVER, "recover" }, + { DBC_RMW, "read-modify-write" }, + { 0 }, + }; + DBC *dbc; + + DB_THREAD_LOCK(dbp); + for (dbc = TAILQ_FIRST(&dbp->active_queue); + dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { + fprintf(stderr, + "%#0x: dbp: %#0x txn: %#0x lid: %lu locker: %lu", + (u_int)dbc, (u_int)dbc->dbp, (u_int)dbc->txn, + (u_long)dbc->lid, (u_long)dbc->locker); + __db_prflags(dbc->flags, fn, stderr); + fprintf(stderr, "\n"); + } + DB_THREAD_UNLOCK(dbp); + + return (0); +} +#endif /* DEBUG */ + +/* + * __db_c_destroy -- + * Destroy the cursor. + * + * PUBLIC: int __db_c_destroy __P((DBC *)); + */ +int +__db_c_destroy(dbc) + DBC *dbc; +{ + DB *dbp; + int ret; + + dbp = dbc->dbp; + + /* Remove the cursor from the free queue. */ + DB_THREAD_LOCK(dbp); + TAILQ_REMOVE(&dbp->free_queue, dbc, links); + DB_THREAD_UNLOCK(dbp); + + /* Call the access specific cursor destroy routine. */ + ret = dbc->c_am_destroy == NULL ? 0 : dbc->c_am_destroy(dbc); + + /* Free up allocated memory. */ + if (dbc->rkey.data != NULL) + __os_free(dbc->rkey.data, dbc->rkey.ulen); + if (dbc->rdata.data != NULL) + __os_free(dbc->rdata.data, dbc->rdata.ulen); + __os_free(dbc, sizeof(*dbc)); + + return (0); +} + +/* + * db_fd -- + * Return a file descriptor for flock'ing. + */ +static int +__db_fd(dbp, fdp) + DB *dbp; + int *fdp; +{ + DB_PANIC_CHECK(dbp); + + /* + * XXX + * Truly spectacular layering violation. + */ + return (__mp_xxx_fd(dbp->mpf, fdp)); +} + +/* + * __db_get -- + * Return a key/data pair. + */ +static int +__db_get(dbp, txn, key, data, flags) + DB *dbp; + DB_TXN *txn; + DBT *key, *data; + u_int32_t flags; +{ + DBC *dbc; + int ret, t_ret; + + DB_PANIC_CHECK(dbp); + + if ((ret = __db_getchk(dbp, key, data, flags)) != 0) + return (ret); + + if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0) + return (ret); + + DEBUG_LREAD(dbc, txn, "__db_get", key, NULL, flags); + + ret = dbc->c_get(dbc, key, data, + flags == 0 || flags == DB_RMW ? flags | DB_SET : flags); + + if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_put -- + * Store a key/data pair. + */ +static int +__db_put(dbp, txn, key, data, flags) + DB *dbp; + DB_TXN *txn; + DBT *key, *data; + u_int32_t flags; +{ + DBC *dbc; + DBT tdata; + int ret, t_ret; + + DB_PANIC_CHECK(dbp); + + if ((ret = __db_putchk(dbp, key, data, + flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0) + return (ret); + + if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) + return (ret); + + DEBUG_LWRITE(dbc, txn, "__db_put", key, data, flags); + + if (flags == DB_NOOVERWRITE) { + /* + * Set DB_DBT_USERMEM, this might be a threaded application and + * the flags checking will catch us. We don't want the actual + * data, so request a partial of length 0. + */ + memset(&tdata, 0, sizeof(tdata)); + F_SET(&tdata, DB_DBT_USERMEM | DB_DBT_PARTIAL); + if ((ret = dbc->c_get(dbc, key, &tdata, DB_SET | DB_RMW)) == 0) + ret = DB_KEYEXIST; + else + ret = 0; + } + if (ret == 0) + ret = dbc->c_put(dbc, key, data, DB_KEYLAST); + + if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * __db_sync -- + * Flush the database cache. + * + * PUBLIC: int __db_sync __P((DB *, u_int32_t)); + */ +int +__db_sync(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + int ret; + + DB_PANIC_CHECK(dbp); + + if ((ret = __db_syncchk(dbp, flags)) != 0) + return (ret); + + /* If it wasn't possible to modify the file, we're done. */ + if (F_ISSET(dbp, DB_AM_INMEM | DB_AM_RDONLY)) + return (0); + + /* Flush any dirty pages from the cache to the backing file. */ + if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE) + ret = 0; + + return (ret); +} diff --git a/db2/db/db_auto.c b/db2/db/db_auto.c index 5203e0a94c..e3dba23c8b 100644 --- a/db2/db/db_auto.c +++ b/db2/db/db_auto.c @@ -10,7 +10,6 @@ #endif #include "db_int.h" -#include "shqueue.h" #include "db_page.h" #include "db_dispatch.h" #include "db_am.h" @@ -46,8 +45,7 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags, rectype = DB_db_addrem; txn_num = txnid == NULL ? 0 : txnid->txnid; if (txnid == NULL) { - null_lsn.file = 0; - null_lsn.offset = 0; + ZERO_LSN(null_lsn); lsnp = &null_lsn; } else lsnp = &txnid->last_lsn; @@ -60,8 +58,8 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags, + sizeof(u_int32_t) + (hdr == NULL ? 0 : hdr->size) + sizeof(u_int32_t) + (dbt == NULL ? 0 : dbt->size) + sizeof(*pagelsn); - if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) + return (ret); bp = logrec.data; memcpy(bp, &rectype, sizeof(rectype)); @@ -112,7 +110,7 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - __db_free(logrec.data); + __os_free(logrec.data, 0); return (ret); } @@ -174,7 +172,7 @@ __db_addrem_print(notused1, dbtp, lsnp, notused2, notused3) printf("\tpagelsn: [%lu][%lu]\n", (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); printf("\n"); - __db_free(argp); + __os_free(argp, 0); return (0); } @@ -188,11 +186,12 @@ __db_addrem_read(recbuf, argpp) { __db_addrem_args *argp; u_int8_t *bp; + int ret; - argp = (__db_addrem_args *)__db_malloc(sizeof(__db_addrem_args) + - sizeof(DB_TXN)); - if (argp == NULL) - return (ENOMEM); + ret = __os_malloc(sizeof(__db_addrem_args) + + sizeof(DB_TXN), NULL, &argp); + if (ret != 0) + return (ret); argp->txnid = (DB_TXN *)&argp[1]; bp = recbuf; memcpy(&argp->type, bp, sizeof(argp->type)); @@ -253,8 +252,7 @@ int __db_split_log(logp, txnid, ret_lsnp, flags, rectype = DB_db_split; txn_num = txnid == NULL ? 0 : txnid->txnid; if (txnid == NULL) { - null_lsn.file = 0; - null_lsn.offset = 0; + ZERO_LSN(null_lsn); lsnp = &null_lsn; } else lsnp = &txnid->last_lsn; @@ -264,8 +262,8 @@ int __db_split_log(logp, txnid, ret_lsnp, flags, + sizeof(pgno) + sizeof(u_int32_t) + (pageimage == NULL ? 0 : pageimage->size) + sizeof(*pagelsn); - if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) + return (ret); bp = logrec.data; memcpy(bp, &rectype, sizeof(rectype)); @@ -302,7 +300,7 @@ int __db_split_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - __db_free(logrec.data); + __os_free(logrec.data, 0); return (ret); } @@ -353,7 +351,7 @@ __db_split_print(notused1, dbtp, lsnp, notused2, notused3) printf("\tpagelsn: [%lu][%lu]\n", (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); printf("\n"); - __db_free(argp); + __os_free(argp, 0); return (0); } @@ -367,11 +365,12 @@ __db_split_read(recbuf, argpp) { __db_split_args *argp; u_int8_t *bp; + int ret; - argp = (__db_split_args *)__db_malloc(sizeof(__db_split_args) + - sizeof(DB_TXN)); - if (argp == NULL) - return (ENOMEM); + ret = __os_malloc(sizeof(__db_split_args) + + sizeof(DB_TXN), NULL, &argp); + if (ret != 0) + return (ret); argp->txnid = (DB_TXN *)&argp[1]; bp = recbuf; memcpy(&argp->type, bp, sizeof(argp->type)); @@ -430,8 +429,7 @@ int __db_big_log(logp, txnid, ret_lsnp, flags, rectype = DB_db_big; txn_num = txnid == NULL ? 0 : txnid->txnid; if (txnid == NULL) { - null_lsn.file = 0; - null_lsn.offset = 0; + ZERO_LSN(null_lsn); lsnp = &null_lsn; } else lsnp = &txnid->last_lsn; @@ -445,8 +443,8 @@ int __db_big_log(logp, txnid, ret_lsnp, flags, + sizeof(*pagelsn) + sizeof(*prevlsn) + sizeof(*nextlsn); - if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) + return (ret); bp = logrec.data; memcpy(bp, &rectype, sizeof(rectype)); @@ -497,7 +495,7 @@ int __db_big_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - __db_free(logrec.data); + __os_free(logrec.data, 0); return (ret); } @@ -554,7 +552,7 @@ __db_big_print(notused1, dbtp, lsnp, notused2, notused3) printf("\tnextlsn: [%lu][%lu]\n", (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset); printf("\n"); - __db_free(argp); + __os_free(argp, 0); return (0); } @@ -568,11 +566,12 @@ __db_big_read(recbuf, argpp) { __db_big_args *argp; u_int8_t *bp; + int ret; - argp = (__db_big_args *)__db_malloc(sizeof(__db_big_args) + - sizeof(DB_TXN)); - if (argp == NULL) - return (ENOMEM); + ret = __os_malloc(sizeof(__db_big_args) + + sizeof(DB_TXN), NULL, &argp); + if (ret != 0) + return (ret); argp->txnid = (DB_TXN *)&argp[1]; bp = recbuf; memcpy(&argp->type, bp, sizeof(argp->type)); @@ -630,8 +629,7 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags, rectype = DB_db_ovref; txn_num = txnid == NULL ? 0 : txnid->txnid; if (txnid == NULL) { - null_lsn.file = 0; - null_lsn.offset = 0; + ZERO_LSN(null_lsn); lsnp = &null_lsn; } else lsnp = &txnid->last_lsn; @@ -640,8 +638,8 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags, + sizeof(pgno) + sizeof(adjust) + sizeof(*lsn); - if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) + return (ret); bp = logrec.data; memcpy(bp, &rectype, sizeof(rectype)); @@ -668,7 +666,7 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - __db_free(logrec.data); + __os_free(logrec.data, 0); return (ret); } @@ -710,7 +708,7 @@ __db_ovref_print(notused1, dbtp, lsnp, notused2, notused3) printf("\tlsn: [%lu][%lu]\n", (u_long)argp->lsn.file, (u_long)argp->lsn.offset); printf("\n"); - __db_free(argp); + __os_free(argp, 0); return (0); } @@ -724,11 +722,12 @@ __db_ovref_read(recbuf, argpp) { __db_ovref_args *argp; u_int8_t *bp; + int ret; - argp = (__db_ovref_args *)__db_malloc(sizeof(__db_ovref_args) + - sizeof(DB_TXN)); - if (argp == NULL) - return (ENOMEM); + ret = __os_malloc(sizeof(__db_ovref_args) + + sizeof(DB_TXN), NULL, &argp); + if (ret != 0) + return (ret); argp->txnid = (DB_TXN *)&argp[1]; bp = recbuf; memcpy(&argp->type, bp, sizeof(argp->type)); @@ -752,16 +751,17 @@ __db_ovref_read(recbuf, argpp) /* * PUBLIC: int __db_relink_log * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, - * PUBLIC: DB_LSN *, db_pgno_t, DB_LSN *)); + * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, DB_LSN *, + * PUBLIC: db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *)); */ int __db_relink_log(logp, txnid, ret_lsnp, flags, - fileid, pgno, lsn, prev, lsn_prev, next, - lsn_next) + opcode, fileid, pgno, lsn, prev, lsn_prev, + next, lsn_next) DB_LOG *logp; DB_TXN *txnid; DB_LSN *ret_lsnp; u_int32_t flags; + u_int32_t opcode; u_int32_t fileid; db_pgno_t pgno; DB_LSN * lsn; @@ -779,12 +779,12 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags, rectype = DB_db_relink; txn_num = txnid == NULL ? 0 : txnid->txnid; if (txnid == NULL) { - null_lsn.file = 0; - null_lsn.offset = 0; + ZERO_LSN(null_lsn); lsnp = &null_lsn; } else lsnp = &txnid->last_lsn; logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(opcode) + sizeof(fileid) + sizeof(pgno) + sizeof(*lsn) @@ -792,8 +792,8 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags, + sizeof(*lsn_prev) + sizeof(next) + sizeof(*lsn_next); - if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) + return (ret); bp = logrec.data; memcpy(bp, &rectype, sizeof(rectype)); @@ -802,6 +802,8 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags, bp += sizeof(txn_num); memcpy(bp, lsnp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); + memcpy(bp, &opcode, sizeof(opcode)); + bp += sizeof(opcode); memcpy(bp, &fileid, sizeof(fileid)); bp += sizeof(fileid); memcpy(bp, &pgno, sizeof(pgno)); @@ -832,7 +834,7 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - __db_free(logrec.data); + __os_free(logrec.data, 0); return (ret); } @@ -868,6 +870,7 @@ __db_relink_print(notused1, dbtp, lsnp, notused2, notused3) (u_long)argp->txnid->txnid, (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); + printf("\topcode: %lu\n", (u_long)argp->opcode); printf("\tfileid: %lu\n", (u_long)argp->fileid); printf("\tpgno: %lu\n", (u_long)argp->pgno); printf("\tlsn: [%lu][%lu]\n", @@ -879,7 +882,7 @@ __db_relink_print(notused1, dbtp, lsnp, notused2, notused3) printf("\tlsn_next: [%lu][%lu]\n", (u_long)argp->lsn_next.file, (u_long)argp->lsn_next.offset); printf("\n"); - __db_free(argp); + __os_free(argp, 0); return (0); } @@ -893,11 +896,12 @@ __db_relink_read(recbuf, argpp) { __db_relink_args *argp; u_int8_t *bp; + int ret; - argp = (__db_relink_args *)__db_malloc(sizeof(__db_relink_args) + - sizeof(DB_TXN)); - if (argp == NULL) - return (ENOMEM); + ret = __os_malloc(sizeof(__db_relink_args) + + sizeof(DB_TXN), NULL, &argp); + if (ret != 0) + return (ret); argp->txnid = (DB_TXN *)&argp[1]; bp = recbuf; memcpy(&argp->type, bp, sizeof(argp->type)); @@ -906,6 +910,8 @@ __db_relink_read(recbuf, argpp) bp += sizeof(argp->txnid->txnid); memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); + memcpy(&argp->opcode, bp, sizeof(argp->opcode)); + bp += sizeof(argp->opcode); memcpy(&argp->fileid, bp, sizeof(argp->fileid)); bp += sizeof(argp->fileid); memcpy(&argp->pgno, bp, sizeof(argp->pgno)); @@ -951,8 +957,7 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags, rectype = DB_db_addpage; txn_num = txnid == NULL ? 0 : txnid->txnid; if (txnid == NULL) { - null_lsn.file = 0; - null_lsn.offset = 0; + ZERO_LSN(null_lsn); lsnp = &null_lsn; } else lsnp = &txnid->last_lsn; @@ -962,8 +967,8 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags, + sizeof(*lsn) + sizeof(nextpgno) + sizeof(*nextlsn); - if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) + return (ret); bp = logrec.data; memcpy(bp, &rectype, sizeof(rectype)); @@ -995,7 +1000,7 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - __db_free(logrec.data); + __os_free(logrec.data, 0); return (ret); } @@ -1039,7 +1044,7 @@ __db_addpage_print(notused1, dbtp, lsnp, notused2, notused3) printf("\tnextlsn: [%lu][%lu]\n", (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset); printf("\n"); - __db_free(argp); + __os_free(argp, 0); return (0); } @@ -1053,11 +1058,12 @@ __db_addpage_read(recbuf, argpp) { __db_addpage_args *argp; u_int8_t *bp; + int ret; - argp = (__db_addpage_args *)__db_malloc(sizeof(__db_addpage_args) + - sizeof(DB_TXN)); - if (argp == NULL) - return (ENOMEM); + ret = __os_malloc(sizeof(__db_addpage_args) + + sizeof(DB_TXN), NULL, &argp); + if (ret != 0) + return (ret); argp->txnid = (DB_TXN *)&argp[1]; bp = recbuf; memcpy(&argp->type, bp, sizeof(argp->type)); @@ -1108,8 +1114,7 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags, rectype = DB_db_debug; txn_num = txnid == NULL ? 0 : txnid->txnid; if (txnid == NULL) { - null_lsn.file = 0; - null_lsn.offset = 0; + ZERO_LSN(null_lsn); lsnp = &null_lsn; } else lsnp = &txnid->last_lsn; @@ -1119,8 +1124,8 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags, + sizeof(u_int32_t) + (key == NULL ? 0 : key->size) + sizeof(u_int32_t) + (data == NULL ? 0 : data->size) + sizeof(arg_flags); - if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(logrec.size, NULL, &logrec.data)) != 0) + return (ret); bp = logrec.data; memcpy(bp, &rectype, sizeof(rectype)); @@ -1170,7 +1175,7 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - __db_free(logrec.data); + __os_free(logrec.data, 0); return (ret); } @@ -1236,7 +1241,7 @@ __db_debug_print(notused1, dbtp, lsnp, notused2, notused3) printf("\n"); printf("\targ_flags: %lu\n", (u_long)argp->arg_flags); printf("\n"); - __db_free(argp); + __os_free(argp, 0); return (0); } @@ -1250,11 +1255,12 @@ __db_debug_read(recbuf, argpp) { __db_debug_args *argp; u_int8_t *bp; + int ret; - argp = (__db_debug_args *)__db_malloc(sizeof(__db_debug_args) + - sizeof(DB_TXN)); - if (argp == NULL) - return (ENOMEM); + ret = __os_malloc(sizeof(__db_debug_args) + + sizeof(DB_TXN), NULL, &argp); + if (ret != 0) + return (ret); argp->txnid = (DB_TXN *)&argp[1]; bp = recbuf; memcpy(&argp->type, bp, sizeof(argp->type)); @@ -1284,143 +1290,6 @@ __db_debug_read(recbuf, argpp) } /* - * PUBLIC: int __db_noop_log - * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *)); - */ -int __db_noop_log(logp, txnid, ret_lsnp, flags, - fileid, pgno, prevlsn) - DB_LOG *logp; - DB_TXN *txnid; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t fileid; - db_pgno_t pgno; - DB_LSN * prevlsn; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn; - u_int32_t rectype, txn_num; - int ret; - u_int8_t *bp; - - rectype = DB_db_noop; - txn_num = txnid == NULL ? 0 : txnid->txnid; - if (txnid == NULL) { - null_lsn.file = 0; - null_lsn.offset = 0; - lsnp = &null_lsn; - } else - lsnp = &txnid->last_lsn; - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) - + sizeof(pgno) - + sizeof(*prevlsn); - if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) - return (ENOMEM); - - bp = logrec.data; - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); - memcpy(bp, &pgno, sizeof(pgno)); - bp += sizeof(pgno); - if (prevlsn != NULL) - memcpy(bp, prevlsn, sizeof(*prevlsn)); - else - memset(bp, 0, sizeof(*prevlsn)); - bp += sizeof(*prevlsn); -#ifdef DIAGNOSTIC - if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) - fprintf(stderr, "Error in log record length"); -#endif - ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __db_free(logrec.data); - return (ret); -} - -/* - * PUBLIC: int __db_noop_print - * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__db_noop_print(notused1, dbtp, lsnp, notused2, notused3) - DB_LOG *notused1; - DBT *dbtp; - DB_LSN *lsnp; - int notused2; - void *notused3; -{ - __db_noop_args *argp; - u_int32_t i; - u_int ch; - int ret; - - i = 0; - ch = 0; - notused1 = NULL; - notused2 = 0; - notused3 = NULL; - - if ((ret = __db_noop_read(dbtp->data, &argp)) != 0) - return (ret); - printf("[%lu][%lu]db_noop: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, - (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); - printf("\tfileid: %lu\n", (u_long)argp->fileid); - printf("\tpgno: %lu\n", (u_long)argp->pgno); - printf("\tprevlsn: [%lu][%lu]\n", - (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset); - printf("\n"); - __db_free(argp); - return (0); -} - -/* - * PUBLIC: int __db_noop_read __P((void *, __db_noop_args **)); - */ -int -__db_noop_read(recbuf, argpp) - void *recbuf; - __db_noop_args **argpp; -{ - __db_noop_args *argp; - u_int8_t *bp; - - argp = (__db_noop_args *)__db_malloc(sizeof(__db_noop_args) + - sizeof(DB_TXN)); - if (argp == NULL) - return (ENOMEM); - argp->txnid = (DB_TXN *)&argp[1]; - bp = recbuf; - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); - memcpy(&argp->pgno, bp, sizeof(argp->pgno)); - bp += sizeof(argp->pgno); - memcpy(&argp->prevlsn, bp, sizeof(argp->prevlsn)); - bp += sizeof(argp->prevlsn); - *argpp = argp; - return (0); -} - -/* * PUBLIC: int __db_init_print __P((DB_ENV *)); */ int @@ -1450,9 +1319,6 @@ __db_init_print(dbenv) if ((ret = __db_add_recovery(dbenv, __db_debug_print, DB_db_debug)) != 0) return (ret); - if ((ret = __db_add_recovery(dbenv, - __db_noop_print, DB_db_noop)) != 0) - return (ret); return (0); } @@ -1486,9 +1352,6 @@ __db_init_recover(dbenv) if ((ret = __db_add_recovery(dbenv, __db_debug_recover, DB_db_debug)) != 0) return (ret); - if ((ret = __db_add_recovery(dbenv, - __db_noop_recover, DB_db_noop)) != 0) - return (ret); return (0); } diff --git a/db2/db/db_dispatch.c b/db2/db/db_dispatch.c index 8645948614..616d08c3ff 100644 --- a/db2/db/db_dispatch.c +++ b/db2/db/db_dispatch.c @@ -43,13 +43,14 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_dispatch.c 10.14 (Sleepycat) 5/3/98"; +static const char sccsid[] = "@(#)db_dispatch.c 10.20 (Sleepycat) 10/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> +#include <shqueue.h> #include <stddef.h> #include <stdlib.h> #include <string.h> @@ -61,6 +62,7 @@ static const char sccsid[] = "@(#)db_dispatch.c 10.14 (Sleepycat) 5/3/98"; #include "db_am.h" #include "common_ext.h" #include "log_auto.h" +#include "txn.h" #include "txn_auto.h" /* @@ -148,27 +150,16 @@ __db_add_recovery(dbenv, func, ndx) u_int32_t ndx; { u_int32_t i; + int ret; - /* Check if function is already registered. */ - if (dispatch_table && ndx < dispatch_size && - dispatch_table[ndx] != 0 && dispatch_table[ndx] != func) - return (DB_REGISTERED); + COMPQUIET(dbenv, NULL); /* !!!: not currently used. */ /* Check if we have to grow the table. */ if (ndx >= dispatch_size) { - if (dispatch_table == NULL) - dispatch_table = (int (**) - __P((DB_LOG *, DBT *, DB_LSN *, int, void *))) - __db_malloc(DB_user_BEGIN * sizeof(dispatch_table[0])); - else - dispatch_table = (int (**) - __P((DB_LOG *, DBT *, DB_LSN *, int, void *))) - __db_realloc(dispatch_table, (DB_user_BEGIN + - dispatch_size) * sizeof(dispatch_table[0])); - if (dispatch_table == NULL) { - __db_err(dbenv, "%s", strerror(ENOMEM)); - return (ENOMEM); - } + if ((ret = __os_realloc(&dispatch_table, + (DB_user_BEGIN + dispatch_size) * + sizeof(dispatch_table[0]))) != 0) + return (ret); for (i = dispatch_size, dispatch_size += DB_user_BEGIN; i < dispatch_size; ++i) dispatch_table[i] = NULL; @@ -189,9 +180,10 @@ __db_txnlist_init(retp) void *retp; { DB_TXNHEAD *headp; + int ret; - if ((headp = (DB_TXNHEAD *)__db_malloc(sizeof(DB_TXNHEAD))) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(sizeof(DB_TXNHEAD), NULL, &headp)) != 0) + return (ret); LIST_INIT(&headp->head); headp->maxid = 0; @@ -214,9 +206,10 @@ __db_txnlist_add(listp, txnid) { DB_TXNHEAD *hp; DB_TXNLIST *elp; + int ret; - if ((elp = (DB_TXNLIST *)__db_malloc(sizeof(DB_TXNLIST))) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(sizeof(DB_TXNLIST), NULL, &elp)) != 0) + return (ret); elp->txnid = txnid; hp = (DB_TXNHEAD *)listp; @@ -269,9 +262,9 @@ __db_txnlist_end(listp) hp = (DB_TXNHEAD *)listp; while ((p = LIST_FIRST(&hp->head)) != LIST_END(&hp->head)) { LIST_REMOVE(p, links); - __db_free(p); + __os_free(p, 0); } - __db_free(listp); + __os_free(listp, sizeof(DB_TXNHEAD)); } /* diff --git a/db2/db/db_dup.c b/db2/db/db_dup.c index 6379fc1729..2673bbcd61 100644 --- a/db2/db/db_dup.c +++ b/db2/db/db_dup.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_dup.c 10.18 (Sleepycat) 5/31/98"; +static const char sccsid[] = "@(#)db_dup.c 10.35 (Sleepycat) 12/2/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -23,25 +23,25 @@ static const char sccsid[] = "@(#)db_dup.c 10.18 (Sleepycat) 5/31/98"; #include "btree.h" #include "db_am.h" -static int __db_addpage __P((DB *, - PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **))); -static int __db_dsplit __P((DB *, - PAGE **, db_indx_t *, u_int32_t, int (*)(DB *, u_int32_t, PAGE **))); +static int __db_addpage __P((DBC *, + PAGE **, db_indx_t *, int (*)(DBC *, u_int32_t, PAGE **))); +static int __db_dsplit __P((DBC *, + PAGE **, db_indx_t *, u_int32_t, int (*)(DBC *, u_int32_t, PAGE **))); /* * __db_dput -- * Put a duplicate item onto a duplicate page at the given index. * - * PUBLIC: int __db_dput __P((DB *, - * PUBLIC: DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **))); + * PUBLIC: int __db_dput __P((DBC *, DBT *, + * PUBLIC: PAGE **, db_indx_t *, int (*)(DBC *, u_int32_t, PAGE **))); */ int -__db_dput(dbp, dbt, pp, indxp, newfunc) - DB *dbp; +__db_dput(dbc, dbt, pp, indxp, newfunc) + DBC *dbc; DBT *dbt; PAGE **pp; db_indx_t *indxp; - int (*newfunc) __P((DB *, u_int32_t, PAGE **)); + int (*newfunc) __P((DBC *, u_int32_t, PAGE **)); { BOVERFLOW bo; DBT *data_dbtp, hdr_dbt, *hdr_dbtp; @@ -54,10 +54,12 @@ __db_dput(dbp, dbt, pp, indxp, newfunc) * We need some access method independent threshold for when we put * a duplicate item onto an overflow page. */ - if (dbt->size > 0.25 * dbp->pgsize) { - if ((ret = __db_poff(dbp, dbt, &pgno, newfunc)) != 0) + if (dbt->size > 0.25 * dbc->dbp->pgsize) { + if ((ret = __db_poff(dbc, dbt, &pgno, newfunc)) != 0) return (ret); + UMRW(bo.unused1); B_TSET(bo.type, B_OVERFLOW, 0); + UMRW(bo.unused2); bo.tlen = dbt->size; bo.pgno = pgno; hdr_dbt.data = &bo; @@ -75,11 +77,14 @@ __db_dput(dbp, dbt, pp, indxp, newfunc) pagep = *pp; if (size > P_FREESPACE(pagep)) { if (*indxp == NUM_ENT(*pp) && NEXT_PGNO(*pp) == PGNO_INVALID) - ret = __db_addpage(dbp, pp, indxp, newfunc); + ret = __db_addpage(dbc, pp, indxp, newfunc); else - ret = __db_dsplit(dbp, pp, indxp, isize, newfunc); + ret = __db_dsplit(dbc, pp, indxp, isize, newfunc); if (ret != 0) - /* XXX: Pages not returned to free list. */ + /* + * XXX + * Pages not returned to free list. + */ return (ret); pagep = *pp; } @@ -88,11 +93,11 @@ __db_dput(dbp, dbt, pp, indxp, newfunc) * Now, pagep references the page on which to insert and indx is the * the location to insert. */ - if ((ret = __db_pitem(dbp, + if ((ret = __db_pitem(dbc, pagep, (u_int32_t)*indxp, isize, hdr_dbtp, data_dbtp)) != 0) return (ret); - (void)memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY); + (void)memp_fset(dbc->dbp->mpf, pagep, DB_MPOOL_DIRTY); return (0); } @@ -100,15 +105,15 @@ __db_dput(dbp, dbt, pp, indxp, newfunc) * __db_drem -- * Remove a duplicate at the given index on the given page. * - * PUBLIC: int __db_drem __P((DB *, - * PUBLIC: PAGE **, u_int32_t, int (*)(DB *, PAGE *))); + * PUBLIC: int __db_drem __P((DBC *, + * PUBLIC: PAGE **, u_int32_t, int (*)(DBC *, PAGE *))); */ int -__db_drem(dbp, pp, indx, freefunc) - DB *dbp; +__db_drem(dbc, pp, indx, freefunc) + DBC *dbc; PAGE **pp; u_int32_t indx; - int (*freefunc) __P((DB *, PAGE *)); + int (*freefunc) __P((DBC *, PAGE *)); { PAGE *pagep; int ret; @@ -117,12 +122,12 @@ __db_drem(dbp, pp, indx, freefunc) /* Check if we are freeing a big item. */ if (B_TYPE(GET_BKEYDATA(pagep, indx)->type) == B_OVERFLOW) { - if ((ret = __db_doff(dbp, + if ((ret = __db_doff(dbc, GET_BOVERFLOW(pagep, indx)->pgno, freefunc)) != 0) return (ret); - ret = __db_ditem(dbp, pagep, indx, BOVERFLOW_SIZE); + ret = __db_ditem(dbc, pagep, indx, BOVERFLOW_SIZE); } else - ret = __db_ditem(dbp, pagep, indx, + ret = __db_ditem(dbc, pagep, indx, BKEYDATA_SIZE(GET_BKEYDATA(pagep, indx)->len)); if (ret != 0) return (ret); @@ -137,12 +142,12 @@ __db_drem(dbp, pp, indx, freefunc) * !!! * __db_relink will set the dirty bit for us. */ - if ((ret = __db_relink(dbp, pagep, pp, 0)) != 0) + if ((ret = __db_relink(dbc, DB_REM_PAGE, pagep, pp, 0)) != 0) return (ret); - if ((ret = freefunc(dbp, pagep)) != 0) + if ((ret = freefunc(dbc, pagep)) != 0) return (ret); } else - (void)memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY); + (void)memp_fset(dbc->dbp->mpf, pagep, DB_MPOOL_DIRTY); return (0); } @@ -151,32 +156,41 @@ __db_drem(dbp, pp, indx, freefunc) * __db_dend -- * Find the last page in a set of offpage duplicates. * - * PUBLIC: int __db_dend __P((DB *, db_pgno_t, PAGE **)); + * PUBLIC: int __db_dend __P((DBC *, db_pgno_t, PAGE **)); */ int -__db_dend(dbp, pgno, pagep) - DB *dbp; +__db_dend(dbc, pgno, pp) + DBC *dbc; db_pgno_t pgno; - PAGE **pagep; + PAGE **pp; { + DB *dbp; PAGE *h; int ret; + dbp = dbc->dbp; + /* * This implements DB_KEYLAST. The last page is returned in pp; pgno * should be the page number of the first page of the duplicate chain. + * + * *pp may be non-NULL -- if given a valid page use it. */ + if (*pp != NULL) + goto started; for (;;) { - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) { + if ((ret = memp_fget(dbp->mpf, &pgno, 0, pp)) != 0) { (void)__db_pgerr(dbp, pgno); return (ret); } +started: h = *pp; + if ((pgno = NEXT_PGNO(h)) == PGNO_INVALID) break; - (void)memp_fput(dbp->mpf, h, 0); - } - *pagep = h; + if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) + return (ret); + } return (0); } @@ -191,41 +205,44 @@ __db_dend(dbp, pgno, pagep) * the page on which the insert should happen, not yet put. */ static int -__db_dsplit(dbp, hp, indxp, size, newfunc) - DB *dbp; +__db_dsplit(dbc, hp, indxp, size, newfunc) + DBC *dbc; PAGE **hp; db_indx_t *indxp; u_int32_t size; - int (*newfunc) __P((DB *, u_int32_t, PAGE **)); + int (*newfunc) __P((DBC *, u_int32_t, PAGE **)); { PAGE *h, *np, *tp; BKEYDATA *bk; DBT page_dbt; + DB *dbp; + size_t pgsize; db_indx_t halfbytes, i, indx, lastsum, nindex, oindex, s, sum; - int did_indx, ret; + int did_indx, ret, t_ret; h = *hp; indx = *indxp; + ret = 0; + dbp = dbc->dbp; + pgsize = dbp->pgsize; /* Create a temporary page to do compaction onto. */ - if ((tp = (PAGE *)__db_malloc(dbp->pgsize)) == NULL) - return (ENOMEM); -#ifdef DIAGNOSTIC - memset(tp, 0xff, dbp->pgsize); -#endif + if ((ret = __os_malloc(pgsize, NULL, &tp)) != 0) + return (ret); + /* Create new page for the split. */ - if ((ret = newfunc(dbp, P_DUPLICATE, &np)) != 0) { - FREE(tp, dbp->pgsize); + if ((ret = newfunc(dbc, P_DUPLICATE, &np)) != 0) { + __os_free(tp, pgsize); return (ret); } - P_INIT(np, dbp->pgsize, PGNO(np), PGNO(h), NEXT_PGNO(h), 0, + P_INIT(np, pgsize, PGNO(np), PGNO(h), NEXT_PGNO(h), 0, P_DUPLICATE); - P_INIT(tp, dbp->pgsize, PGNO(h), PREV_PGNO(h), PGNO(np), 0, + P_INIT(tp, pgsize, PGNO(h), PREV_PGNO(h), PGNO(np), 0, P_DUPLICATE); /* Figure out the split point */ - halfbytes = (dbp->pgsize - HOFFSET(h)) / 2; + halfbytes = (pgsize - HOFFSET(h)) / 2; did_indx = 0; for (sum = 0, lastsum = 0, i = 0; i < NUM_ENT(h); i++) { if (i == indx) { @@ -237,7 +254,6 @@ __db_dsplit(dbp, hp, indxp, size, newfunc) (db_indx_t)(sum - halfbytes)) { *hp = np; *indxp = 0; - i--; } else *indxp = i; break; @@ -252,29 +268,28 @@ __db_dsplit(dbp, hp, indxp, size, newfunc) if (lastsum < halfbytes && sum >= halfbytes) { /* We've crossed the halfway point. */ - if ((db_indx_t)(halfbytes - lastsum) < - (db_indx_t)(sum - halfbytes)) - i--; + if ((db_indx_t)(sum - halfbytes) < + (db_indx_t)(halfbytes - lastsum)) + i++; break; } } - /* * Check if we have set the return values of the index pointer and * page pointer. */ if (!did_indx) { *hp = np; - *indxp = indx - i - 1; + *indxp = indx - i; } - if (DB_LOGGING(dbp)) { + if (DB_LOGGING(dbc)) { page_dbt.size = dbp->pgsize; page_dbt.data = h; if ((ret = __db_split_log(dbp->dbenv->lg_info, - dbp->txn, &LSN(h), 0, DB_SPLITOLD, dbp->log_fileid, + dbc->txn, &LSN(h), 0, DB_SPLITOLD, dbp->log_fileid, PGNO(h), &page_dbt, &LSN(h))) != 0) { - FREE(tp, dbp->pgsize); + __os_free(tp, pgsize); return (ret); } LSN(tp) = LSN(h); @@ -283,12 +298,12 @@ __db_dsplit(dbp, hp, indxp, size, newfunc) /* * If it's a btree, adjust the cursors. * - * i is the index of the last element to stay on the page. + * i is the index of the first element to move onto the new page. */ - if (dbp->type == DB_BTREE || dbp->type == DB_RECNO) - __bam_ca_split(dbp, PGNO(h), PGNO(h), PGNO(np), i + 1, 0); + if (dbp->type == DB_BTREE) + __bam_ca_split(dbp, PGNO(h), PGNO(h), PGNO(np), i, 0); - for (nindex = 0, oindex = i + 1; oindex < NUM_ENT(h); oindex++) { + for (nindex = 0, oindex = i; oindex < NUM_ENT(h); oindex++) { bk = GET_BKEYDATA(h, oindex); if (B_TYPE(bk->type) == B_KEYDATA) s = BKEYDATA_SIZE(bk->len); @@ -304,7 +319,7 @@ __db_dsplit(dbp, hp, indxp, size, newfunc) * Now do data compaction by copying the remaining stuff onto the * temporary page and then copying it back to the real page. */ - for (nindex = 0, oindex = 0; oindex <= i; oindex++) { + for (nindex = 0, oindex = 0; oindex < i; oindex++) { bk = GET_BKEYDATA(h, oindex); if (B_TYPE(bk->type) == B_KEYDATA) s = BKEYDATA_SIZE(bk->len); @@ -324,59 +339,73 @@ __db_dsplit(dbp, hp, indxp, size, newfunc) */ memcpy(h, tp, LOFFSET(tp)); memcpy((u_int8_t *)h + HOFFSET(tp), - (u_int8_t *)tp + HOFFSET(tp), dbp->pgsize - HOFFSET(tp)); - FREE(tp, dbp->pgsize); + (u_int8_t *)tp + HOFFSET(tp), pgsize - HOFFSET(tp)); + __os_free(tp, pgsize); - if (DB_LOGGING(dbp)) { - page_dbt.size = dbp->pgsize; + if (DB_LOGGING(dbc)) { + /* + * XXX + * If either of these fails, are we leaving pages pinned? + * Yes, but it seems like this happens in error case. + */ + page_dbt.size = pgsize; page_dbt.data = h; if ((ret = __db_split_log(dbp->dbenv->lg_info, - dbp->txn, &LSN(h), 0, DB_SPLITNEW, dbp->log_fileid, + dbc->txn, &LSN(h), 0, DB_SPLITNEW, dbp->log_fileid, PGNO(h), &page_dbt, &LSN(h))) != 0) return (ret); - page_dbt.size = dbp->pgsize; + page_dbt.size = pgsize; page_dbt.data = np; if ((ret = __db_split_log(dbp->dbenv->lg_info, - dbp->txn, &LSN(np), 0, DB_SPLITNEW, dbp->log_fileid, + dbc->txn, &LSN(np), 0, DB_SPLITNEW, dbp->log_fileid, PGNO(np), &page_dbt, &LSN(np))) != 0) return (ret); } /* + * Finally, if there was a next page after the page being + * split, fix its prev pointer. + */ + if (np->next_pgno != PGNO_INVALID) + ret = __db_relink(dbc, DB_ADD_PAGE, np, NULL, 1); + + /* * Figure out if the location we're interested in is on the new * page, and if so, reset the callers' pointer. Push the other * page back to the store. */ if (*hp == h) - ret = memp_fput(dbp->mpf, np, DB_MPOOL_DIRTY); + t_ret = memp_fput(dbp->mpf, np, DB_MPOOL_DIRTY); else - ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY); + t_ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY); - return (ret); + return (ret != 0 ? ret : t_ret); } /* * __db_ditem -- * Remove an item from a page. * - * PUBLIC: int __db_ditem __P((DB *, PAGE *, u_int32_t, u_int32_t)); + * PUBLIC: int __db_ditem __P((DBC *, PAGE *, u_int32_t, u_int32_t)); */ int -__db_ditem(dbp, pagep, indx, nbytes) - DB *dbp; +__db_ditem(dbc, pagep, indx, nbytes) + DBC *dbc; PAGE *pagep; u_int32_t indx, nbytes; { + DB *dbp; DBT ldbt; db_indx_t cnt, offset; int ret; u_int8_t *from; - if (DB_LOGGING(dbp)) { + dbp = dbc->dbp; + if (DB_LOGGING(dbc)) { ldbt.data = P_ENTRY(pagep, indx); ldbt.size = nbytes; - if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbp->txn, + if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbc->txn, &LSN(pagep), 0, DB_REM_DUP, dbp->log_fileid, PGNO(pagep), (u_int32_t)indx, nbytes, &ldbt, NULL, &LSN(pagep))) != 0) return (ret); @@ -413,7 +442,7 @@ __db_ditem(dbp, pagep, indx, nbytes) sizeof(db_indx_t) * (NUM_ENT(pagep) - indx)); /* If it's a btree, adjust the cursors. */ - if (dbp->type == DB_BTREE || dbp->type == DB_RECNO) + if (dbp->type == DB_BTREE) __bam_ca_di(dbp, PGNO(pagep), indx, -1); return (0); @@ -424,16 +453,17 @@ __db_ditem(dbp, pagep, indx, nbytes) * Put an item on a page. * * PUBLIC: int __db_pitem - * PUBLIC: __P((DB *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); + * PUBLIC: __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); */ int -__db_pitem(dbp, pagep, indx, nbytes, hdr, data) - DB *dbp; +__db_pitem(dbc, pagep, indx, nbytes, hdr, data) + DBC *dbc; PAGE *pagep; u_int32_t indx; u_int32_t nbytes; DBT *hdr, *data; { + DB *dbp; BKEYDATA bk; DBT thdr; int ret; @@ -456,8 +486,9 @@ __db_pitem(dbp, pagep, indx, nbytes, hdr, data) * the passed in header sizes must be adjusted for the structure's * placeholder for the trailing variable-length data field. */ - if (DB_LOGGING(dbp)) - if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbp->txn, + dbp = dbc->dbp; + if (DB_LOGGING(dbc)) + if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbc->txn, &LSN(pagep), 0, DB_ADD_DUP, dbp->log_fileid, PGNO(pagep), (u_int32_t)indx, nbytes, hdr, data, &LSN(pagep))) != 0) return (ret); @@ -485,7 +516,7 @@ __db_pitem(dbp, pagep, indx, nbytes, hdr, data) memcpy(p + hdr->size, data->data, data->size); /* If it's a btree, adjust the cursors. */ - if (dbp->type == DB_BTREE || dbp->type == DB_RECNO) + if (dbp->type == DB_BTREE) __bam_ca_di(dbp, PGNO(pagep), indx, 1); return (0); @@ -495,14 +526,16 @@ __db_pitem(dbp, pagep, indx, nbytes, hdr, data) * __db_relink -- * Relink around a deleted page. * - * PUBLIC: int __db_relink __P((DB *, PAGE *, PAGE **, int)); + * PUBLIC: int __db_relink __P((DBC *, u_int32_t, PAGE *, PAGE **, int)); */ int -__db_relink(dbp, pagep, new_next, needlock) - DB *dbp; +__db_relink(dbc, add_rem, pagep, new_next, needlock) + DBC *dbc; + u_int32_t add_rem; PAGE *pagep, **new_next; int needlock; { + DB *dbp; PAGE *np, *pp; DB_LOCK npl, ppl; DB_LSN *nlsnp, *plsnp; @@ -512,10 +545,15 @@ __db_relink(dbp, pagep, new_next, needlock) np = pp = NULL; npl = ppl = LOCK_INVALID; nlsnp = plsnp = NULL; + dbp = dbc->dbp; - /* Retrieve and lock the two pages. */ + /* + * Retrieve and lock the one/two pages. For a remove, we may need + * two pages (the before and after). For an add, we only need one + * because, the split took care of the prev. + */ if (pagep->next_pgno != PGNO_INVALID) { - if (needlock && (ret = __bam_lget(dbp, + if (needlock && (ret = __bam_lget(dbc, 0, pagep->next_pgno, DB_LOCK_WRITE, &npl)) != 0) goto err; if ((ret = memp_fget(dbp->mpf, @@ -525,8 +563,8 @@ __db_relink(dbp, pagep, new_next, needlock) } nlsnp = &np->lsn; } - if (pagep->prev_pgno != PGNO_INVALID) { - if (needlock && (ret = __bam_lget(dbp, + if (add_rem == DB_REM_PAGE && pagep->prev_pgno != PGNO_INVALID) { + if (needlock && (ret = __bam_lget(dbc, 0, pagep->prev_pgno, DB_LOCK_WRITE, &ppl)) != 0) goto err; if ((ret = memp_fget(dbp->mpf, @@ -538,9 +576,10 @@ __db_relink(dbp, pagep, new_next, needlock) } /* Log the change. */ - if (DB_LOGGING(dbp)) { - if ((ret = __db_relink_log(dbp->dbenv->lg_info, dbp->txn, - &pagep->lsn, 0, dbp->log_fileid, pagep->pgno, &pagep->lsn, + if (DB_LOGGING(dbc)) { + if ((ret = __db_relink_log(dbp->dbenv->lg_info, dbc->txn, + &pagep->lsn, 0, add_rem, dbp->log_fileid, + pagep->pgno, &pagep->lsn, pagep->prev_pgno, plsnp, pagep->next_pgno, nlsnp)) != 0) goto err; if (np != NULL) @@ -558,7 +597,10 @@ __db_relink(dbp, pagep, new_next, needlock) * set to NULL. */ if (np != NULL) { - np->prev_pgno = pagep->prev_pgno; + if (add_rem == DB_ADD_PAGE) + np->prev_pgno = pagep->pgno; + else + np->prev_pgno = pagep->prev_pgno; if (new_next == NULL) ret = memp_fput(dbp->mpf, np, DB_MPOOL_DIRTY); else { @@ -568,7 +610,7 @@ __db_relink(dbp, pagep, new_next, needlock) if (ret != 0) goto err; if (needlock) - (void)__bam_lput(dbp, npl); + (void)__bam_lput(dbc, npl); } else if (new_next != NULL) *new_next = NULL; @@ -577,18 +619,18 @@ __db_relink(dbp, pagep, new_next, needlock) if ((ret = memp_fput(dbp->mpf, pp, DB_MPOOL_DIRTY)) != 0) goto err; if (needlock) - (void)__bam_lput(dbp, ppl); + (void)__bam_lput(dbc, ppl); } return (0); err: if (np != NULL) (void)memp_fput(dbp->mpf, np, 0); if (needlock && npl != LOCK_INVALID) - (void)__bam_lput(dbp, npl); + (void)__bam_lput(dbc, npl); if (pp != NULL) (void)memp_fput(dbp->mpf, pp, 0); if (needlock && ppl != LOCK_INVALID) - (void)__bam_lput(dbp, ppl); + (void)__bam_lput(dbc, ppl); return (ret); } @@ -596,34 +638,37 @@ err: if (np != NULL) * __db_ddup -- * Delete an offpage chain of duplicates. * - * PUBLIC: int __db_ddup __P((DB *, db_pgno_t, int (*)(DB *, PAGE *))); + * PUBLIC: int __db_ddup __P((DBC *, db_pgno_t, int (*)(DBC *, PAGE *))); */ int -__db_ddup(dbp, pgno, freefunc) - DB *dbp; +__db_ddup(dbc, pgno, freefunc) + DBC *dbc; db_pgno_t pgno; - int (*freefunc) __P((DB *, PAGE *)); + int (*freefunc) __P((DBC *, PAGE *)); { + DB *dbp; PAGE *pagep; DBT tmp_dbt; int ret; + dbp = dbc->dbp; do { if ((ret = memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0) { (void)__db_pgerr(dbp, pgno); return (ret); } - if (DB_LOGGING(dbp)) { + if (DB_LOGGING(dbc)) { tmp_dbt.data = pagep; tmp_dbt.size = dbp->pgsize; - if ((ret = __db_split_log(dbp->dbenv->lg_info, dbp->txn, - &LSN(pagep), 0, DB_SPLITOLD, dbp->log_fileid, - PGNO(pagep), &tmp_dbt, &LSN(pagep))) != 0) + if ((ret = __db_split_log(dbp->dbenv->lg_info, + dbc->txn, &LSN(pagep), 0, DB_SPLITOLD, + dbp->log_fileid, PGNO(pagep), &tmp_dbt, + &LSN(pagep))) != 0) return (ret); } pgno = pagep->next_pgno; - if ((ret = freefunc(dbp, pagep)) != 0) + if ((ret = freefunc(dbc, pagep)) != 0) return (ret); } while (pgno != PGNO_INVALID); @@ -636,21 +681,23 @@ __db_ddup(dbp, pgno, freefunc) * current page. */ static int -__db_addpage(dbp, hp, indxp, newfunc) - DB *dbp; +__db_addpage(dbc, hp, indxp, newfunc) + DBC *dbc; PAGE **hp; db_indx_t *indxp; - int (*newfunc) __P((DB *, u_int32_t, PAGE **)); + int (*newfunc) __P((DBC *, u_int32_t, PAGE **)); { + DB *dbp; PAGE *newpage; int ret; - if ((ret = newfunc(dbp, P_DUPLICATE, &newpage)) != 0) + dbp = dbc->dbp; + if ((ret = newfunc(dbc, P_DUPLICATE, &newpage)) != 0) return (ret); - if (DB_LOGGING(dbp)) { + if (DB_LOGGING(dbc)) { if ((ret = __db_addpage_log(dbp->dbenv->lg_info, - dbp->txn, &LSN(*hp), 0, dbp->log_fileid, + dbc->txn, &LSN(*hp), 0, dbp->log_fileid, PGNO(*hp), &LSN(*hp), PGNO(newpage), &LSN(newpage))) != 0) { return (ret); } @@ -666,3 +713,235 @@ __db_addpage(dbp, hp, indxp, newfunc) *indxp = 0; return (0); } + +/* + * __db_dsearch -- + * Search a set of duplicates for the proper position for a new duplicate. + * + * + pgno is the page number of the page on which to begin searching. + * Since we can continue duplicate searches, it might not be the first + * page. + * + * + If we are continuing a search, then *pp may be non-NULL in which + * case we do not have to retrieve the page. + * + * + If we are continuing a search, then *indxp contains the first + * on pgno of where we should begin the search. + * + * NOTE: if there is no comparison function, then continuing is + * meaningless, and *pp should always be NULL and *indxp will be + * ignored. + * + * 3 return values:: + * + * + pp is the returned page pointer of where this element should go. + * + indxp is the returned index on that page + * + cmpp is the returned final comparison result. + * + * PUBLIC: int __db_dsearch __P((DBC *, + * PUBLIC: int, DBT *, db_pgno_t, db_indx_t *, PAGE **, int *)); + */ +int +__db_dsearch(dbc, is_insert, dbt, pgno, indxp, pp, cmpp) + DBC *dbc; + int is_insert, *cmpp; + DBT *dbt; + db_pgno_t pgno; + db_indx_t *indxp; + PAGE **pp; +{ + DB *dbp; + PAGE *h; + db_indx_t base, indx, lim, save_indx; + db_pgno_t save_pgno; + int ret; + + dbp = dbc->dbp; + + if (dbp->dup_compare == NULL) { + /* + * We may have been given a valid page, but we may not be + * able to use it. The problem is that the application is + * doing a join and we're trying to continue the search, + * but since the items aren't sorted, we can't. Discard + * the page if it's not the one we're going to start with + * anyway. + */ + if (*pp != NULL && (*pp)->pgno != pgno) { + if ((ret = memp_fput(dbp->mpf, *pp, 0)) != 0) + return (ret); + *pp = NULL; + } + + /* + * If no duplicate function is specified, just go to the end + * of the duplicate set. + */ + if (is_insert) { + if ((ret = __db_dend(dbc, pgno, pp)) != 0) + return (ret); + *indxp = NUM_ENT(*pp); + return (0); + } + + /* + * We are looking for a specific duplicate, so do a linear + * search. + */ + if (*pp != NULL) + goto nocmp_started; + for (;;) { + if ((ret = memp_fget(dbp->mpf, &pgno, 0, pp)) != 0) + goto pg_err; +nocmp_started: h = *pp; + + for (*indxp = 0; *indxp < NUM_ENT(h); ++*indxp) { + if ((*cmpp = __bam_cmp(dbp, + dbt, h, *indxp, __bam_defcmp)) != 0) + continue; + /* + * The duplicate may have already been deleted, + * if it's a btree page, in which case we skip + * it. + */ + if (dbp->type == DB_BTREE && + B_DISSET(GET_BKEYDATA(h, *indxp)->type)) + continue; + + return (0); + } + + if ((pgno = h->next_pgno) == PGNO_INVALID) + break; + + if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) + return (ret); + } + *cmpp = 1; /* We didn't succeed... */ + return (0); + } + + /* + * We have a comparison routine, i.e., the duplicates are sorted. + * Walk through the chain of duplicates, checking the last entry + * on each page to decide if it's the page we want to search. + * + * *pp may be non-NULL -- if we were given a valid page (e.g., are + * in mid-search), then use the provided page. + */ + if (*pp != NULL) + goto cmp_started; + for (;;) { + if ((ret = memp_fget(dbp->mpf, &pgno, 0, pp)) != 0) + goto pg_err; +cmp_started: h = *pp; + + if ((pgno = h->next_pgno) == PGNO_INVALID || __bam_cmp(dbp, + dbt, h, h->entries - 1, dbp->dup_compare) <= 0) + break; + /* + * Even when continuing a search, make sure we don't skip + * entries on a new page + */ + *indxp = 0; + + if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) + return (ret); + } + + /* Next, do a binary search on the page. */ + base = F_ISSET(dbc, DBC_CONTINUE) ? *indxp : 0; + for (lim = NUM_ENT(h) - base; lim != 0; lim >>= 1) { + indx = base + (lim >> 1); + if ((*cmpp = __bam_cmp(dbp, + dbt, h, indx, dbp->dup_compare)) == 0) { + *indxp = indx; + + if (dbp->type != DB_BTREE || + !B_DISSET(GET_BKEYDATA(h, *indxp)->type)) + return (0); + goto check_delete; + } + if (*cmpp > 0) { + base = indx + 1; + lim--; + } + } + + /* + * Base references the smallest index larger than the supplied DBT's + * data item, potentially both 0 and NUM_ENT. + */ + *indxp = base; + return (0); + +check_delete: + /* + * The duplicate may have already been deleted, if it's a btree page, + * in which case we wander around, hoping to find an entry that hasn't + * been deleted. First, wander in a forwardly direction. + */ + save_pgno = (*pp)->pgno; + save_indx = *indxp; + for (++*indxp;;) { + for (; *indxp < NUM_ENT(h); ++*indxp) { + if ((*cmpp = __bam_cmp(dbp, + dbt, h, *indxp, dbp->dup_compare)) != 0) + goto check_delete_rev; + + if (!B_DISSET(GET_BKEYDATA(h, *indxp)->type)) + return (0); + } + if ((pgno = h->next_pgno) == PGNO_INVALID) + break; + + if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) + return (ret); + + if ((ret = memp_fget(dbp->mpf, &pgno, 0, pp)) != 0) + goto pg_err; + h = *pp; + + *indxp = 0; + } + +check_delete_rev: + /* Go back to where we started, and wander in a backwardly direction. */ + if (h->pgno != save_pgno) { + if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) + return (ret); + if ((ret = memp_fget(dbp->mpf, &save_pgno, 0, pp)) != 0) + goto pg_err; + h = *pp; + } + + for (;;) { + while (*indxp > 0) { + --*indxp; + if ((*cmpp = __bam_cmp(dbp, + dbt, h, *indxp, dbp->dup_compare)) != 0) + goto check_delete_fail; + + if (!B_DISSET(GET_BKEYDATA(h, *indxp)->type)) + return (0); + } + if ((pgno = h->prev_pgno) == PGNO_INVALID) + break; + + if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) + return (ret); + + if ((ret = memp_fget(dbp->mpf, &pgno, 0, pp)) != 0) + goto pg_err; + h = *pp; + + *indxp = NUM_ENT(h); + } + +check_delete_fail: + *cmpp = 1; /* We didn't succeed... */ + return (0); + +pg_err: __db_pgerr(dbp, pgno); + return (ret); +} diff --git a/db2/db/db_iface.c b/db2/db/db_iface.c new file mode 100644 index 0000000000..4ebf3ba019 --- /dev/null +++ b/db2/db/db_iface.c @@ -0,0 +1,488 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)db_iface.c 10.40 (Sleepycat) 12/19/98"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_auto.h" +#include "db_ext.h" +#include "common_ext.h" + +static int __db_keyempty __P((const DB_ENV *)); +static int __db_rdonly __P((const DB_ENV *, const char *)); +static int __dbt_ferr __P((const DB *, const char *, const DBT *, int)); + +/* + * __db_cdelchk -- + * Common cursor delete argument checking routine. + * + * PUBLIC: int __db_cdelchk __P((const DB *, u_int32_t, int, int)); + */ +int +__db_cdelchk(dbp, flags, isrdonly, isvalid) + const DB *dbp; + u_int32_t flags; + int isrdonly, isvalid; +{ + /* Check for changes to a read-only tree. */ + if (isrdonly) + return (__db_rdonly(dbp->dbenv, "c_del")); + + /* Check for invalid function flags. */ + switch (flags) { + case 0: + break; + default: + return (__db_ferr(dbp->dbenv, "DBcursor->c_del", 0)); + } + + /* + * The cursor must be initialized, return -1 for an invalid cursor, + * otherwise 0. + */ + return (isvalid ? 0 : EINVAL); +} + +/* + * __db_cgetchk -- + * Common cursor get argument checking routine. + * + * PUBLIC: int __db_cgetchk __P((const DB *, DBT *, DBT *, u_int32_t, int)); + */ +int +__db_cgetchk(dbp, key, data, flags, isvalid) + const DB *dbp; + DBT *key, *data; + u_int32_t flags; + int isvalid; +{ + int key_einval, key_flags, ret; + + key_einval = key_flags = 0; + + /* Check for invalid function flags. */ + LF_CLR(DB_RMW); + switch (flags) { + case DB_NEXT_DUP: + if (dbp->type == DB_RECNO) + goto err; + /* FALLTHROUGH */ + case DB_CURRENT: + case DB_FIRST: + case DB_LAST: + case DB_NEXT: + case DB_PREV: + key_flags = 1; + break; + case DB_GET_BOTH: + case DB_SET_RANGE: + key_einval = key_flags = 1; + break; + case DB_SET: + key_einval = 1; + break; + case DB_GET_RECNO: + if (!F_ISSET(dbp, DB_BT_RECNUM)) + goto err; + break; + case DB_SET_RECNO: + if (!F_ISSET(dbp, DB_BT_RECNUM)) + goto err; + key_einval = key_flags = 1; + break; + default: +err: return (__db_ferr(dbp->dbenv, "DBcursor->c_get", 0)); + } + + /* Check for invalid key/data flags. */ + if ((ret = __dbt_ferr(dbp, "key", key, 0)) != 0) + return (ret); + if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0) + return (ret); + + /* Check for missing keys. */ + if (key_einval && (key->data == NULL || key->size == 0)) + return (__db_keyempty(dbp->dbenv)); + + /* + * The cursor must be initialized for DB_CURRENT, return -1 for an + * invalid cursor, otherwise 0. + */ + return (isvalid || flags != DB_CURRENT ? 0 : EINVAL); +} + +/* + * __db_cputchk -- + * Common cursor put argument checking routine. + * + * PUBLIC: int __db_cputchk __P((const DB *, + * PUBLIC: const DBT *, DBT *, u_int32_t, int, int)); + */ +int +__db_cputchk(dbp, key, data, flags, isrdonly, isvalid) + const DB *dbp; + const DBT *key; + DBT *data; + u_int32_t flags; + int isrdonly, isvalid; +{ + int key_einval, key_flags, ret; + + key_einval = key_flags = 0; + + /* Check for changes to a read-only tree. */ + if (isrdonly) + return (__db_rdonly(dbp->dbenv, "c_put")); + + /* Check for invalid function flags. */ + switch (flags) { + case DB_AFTER: + case DB_BEFORE: + if (dbp->dup_compare != NULL) + goto err; + if (dbp->type == DB_RECNO && !F_ISSET(dbp, DB_RE_RENUMBER)) + goto err; + if (dbp->type != DB_RECNO && !F_ISSET(dbp, DB_AM_DUP)) + goto err; + break; + case DB_CURRENT: + /* + * If there is a comparison function, doing a DB_CURRENT + * must not change the part of the data item that is used + * for the comparison. + */ + break; + case DB_KEYFIRST: + case DB_KEYLAST: + if (dbp->type == DB_RECNO) + goto err; + key_einval = key_flags = 1; + break; + default: +err: return (__db_ferr(dbp->dbenv, "DBcursor->c_put", 0)); + } + + /* Check for invalid key/data flags. */ + if (key_flags && (ret = __dbt_ferr(dbp, "key", key, 0)) != 0) + return (ret); + if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0) + return (ret); + + /* Check for missing keys. */ + if (key_einval && (key->data == NULL || key->size == 0)) + return (__db_keyempty(dbp->dbenv)); + + /* + * The cursor must be initialized for anything other than DB_KEYFIRST + * and DB_KEYLAST, return -1 for an invalid cursor, otherwise 0. + */ + return (isvalid || + flags == DB_KEYFIRST || flags == DB_KEYLAST ? 0 : EINVAL); +} + +/* + * __db_closechk -- + * DB->close flag check. + * + * PUBLIC: int __db_closechk __P((const DB *, u_int32_t)); + */ +int +__db_closechk(dbp, flags) + const DB *dbp; + u_int32_t flags; +{ + /* Check for invalid function flags. */ + if (flags != 0 && flags != DB_NOSYNC) + return (__db_ferr(dbp->dbenv, "DB->close", 0)); + + return (0); +} + +/* + * __db_delchk -- + * Common delete argument checking routine. + * + * PUBLIC: int __db_delchk __P((const DB *, DBT *, u_int32_t, int)); + */ +int +__db_delchk(dbp, key, flags, isrdonly) + const DB *dbp; + DBT *key; + u_int32_t flags; + int isrdonly; +{ + /* Check for changes to a read-only tree. */ + if (isrdonly) + return (__db_rdonly(dbp->dbenv, "delete")); + + /* Check for invalid function flags. */ + switch (flags) { + case 0: + break; + default: + return (__db_ferr(dbp->dbenv, "DB->del", 0)); + } + + /* Check for missing keys. */ + if (key->data == NULL || key->size == 0) + return (__db_keyempty(dbp->dbenv)); + + return (0); +} + +/* + * __db_getchk -- + * Common get argument checking routine. + * + * PUBLIC: int __db_getchk __P((const DB *, const DBT *, DBT *, u_int32_t)); + */ +int +__db_getchk(dbp, key, data, flags) + const DB *dbp; + const DBT *key; + DBT *data; + u_int32_t flags; +{ + int ret; + + /* Check for invalid function flags. */ + LF_CLR(DB_RMW); + switch (flags) { + case 0: + case DB_GET_BOTH: + break; + case DB_SET_RECNO: + if (!F_ISSET(dbp, DB_BT_RECNUM)) + goto err; + break; + default: +err: return (__db_ferr(dbp->dbenv, "DB->get", 0)); + } + + /* Check for invalid key/data flags. */ + if ((ret = __dbt_ferr(dbp, "key", key, flags == DB_SET_RECNO)) != 0) + return (ret); + if ((ret = __dbt_ferr(dbp, "data", data, 1)) != 0) + return (ret); + + /* Check for missing keys. */ + if (key->data == NULL || key->size == 0) + return (__db_keyempty(dbp->dbenv)); + + return (0); +} + +/* + * __db_joinchk -- + * Common join argument checking routine. + * + * PUBLIC: int __db_joinchk __P((const DB *, u_int32_t)); + */ +int +__db_joinchk(dbp, flags) + const DB *dbp; + u_int32_t flags; +{ + if (flags != 0) + return (__db_ferr(dbp->dbenv, "DB->join", 0)); + + return (0); +} + +/* + * __db_putchk -- + * Common put argument checking routine. + * + * PUBLIC: int __db_putchk + * PUBLIC: __P((const DB *, DBT *, const DBT *, u_int32_t, int, int)); + */ +int +__db_putchk(dbp, key, data, flags, isrdonly, isdup) + const DB *dbp; + DBT *key; + const DBT *data; + u_int32_t flags; + int isrdonly, isdup; +{ + int ret; + + /* Check for changes to a read-only tree. */ + if (isrdonly) + return (__db_rdonly(dbp->dbenv, "put")); + + /* Check for invalid function flags. */ + switch (flags) { + case 0: + case DB_NOOVERWRITE: + break; + case DB_APPEND: + if (dbp->type != DB_RECNO) + goto err; + break; + default: +err: return (__db_ferr(dbp->dbenv, "DB->put", 0)); + } + + /* Check for invalid key/data flags. */ + if ((ret = __dbt_ferr(dbp, "key", key, 0)) != 0) + return (ret); + if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0) + return (ret); + + /* Check for missing keys. */ + if (key->data == NULL || key->size == 0) + return (__db_keyempty(dbp->dbenv)); + + /* Check for partial puts in the presence of duplicates. */ + if (isdup && F_ISSET(data, DB_DBT_PARTIAL)) { + __db_err(dbp->dbenv, +"a partial put in the presence of duplicates requires a cursor operation"); + return (EINVAL); + } + + return (0); +} + +/* + * __db_statchk -- + * Common stat argument checking routine. + * + * PUBLIC: int __db_statchk __P((const DB *, u_int32_t)); + */ +int +__db_statchk(dbp, flags) + const DB *dbp; + u_int32_t flags; +{ + /* Check for invalid function flags. */ + switch (flags) { + case 0: + break; + case DB_RECORDCOUNT: + if (dbp->type == DB_RECNO) + break; + if (dbp->type == DB_BTREE && F_ISSET(dbp, DB_BT_RECNUM)) + break; + goto err; + default: +err: return (__db_ferr(dbp->dbenv, "DB->stat", 0)); + } + + return (0); +} + +/* + * __db_syncchk -- + * Common sync argument checking routine. + * + * PUBLIC: int __db_syncchk __P((const DB *, u_int32_t)); + */ +int +__db_syncchk(dbp, flags) + const DB *dbp; + u_int32_t flags; +{ + /* Check for invalid function flags. */ + switch (flags) { + case 0: + break; + default: + return (__db_ferr(dbp->dbenv, "DB->sync", 0)); + } + + return (0); +} + +/* + * __dbt_ferr -- + * Check a DBT for flag errors. + */ +static int +__dbt_ferr(dbp, name, dbt, check_thread) + const DB *dbp; + const char *name; + const DBT *dbt; + int check_thread; +{ + int ret; + + /* + * Check for invalid DBT flags. We allow any of the flags to be + * specified to any DB or DBcursor call so that applications can + * set DB_DBT_MALLOC when retrieving a data item from a secondary + * database and then specify that same DBT as a key to a primary + * database, without having to clear flags. + */ + if ((ret = __db_fchk(dbp->dbenv, name, dbt->flags, + DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL)) != 0) + return (ret); + if ((ret = __db_fcchk(dbp->dbenv, name, + dbt->flags, DB_DBT_MALLOC, DB_DBT_USERMEM)) != 0) + return (ret); + + if (check_thread && F_ISSET(dbp, DB_AM_THREAD) && + !F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_USERMEM)) { + __db_err(dbp->dbenv, + "missing flag thread flag for %s DBT", name); + return (EINVAL); + } + return (0); +} + +/* + * __db_eopnotsup -- + * Common operation not supported message. + * + * PUBLIC: int __db_eopnotsup __P((const DB_ENV *)); + */ +int +__db_eopnotsup(dbenv) + const DB_ENV *dbenv; +{ + __db_err(dbenv, "operation not supported"); +#ifdef EOPNOTSUPP + return (EOPNOTSUPP); +#else + return (EINVAL); +#endif +} + +/* + * __db_keyempty -- + * Common missing or empty key value message. + */ +static int +__db_keyempty(dbenv) + const DB_ENV *dbenv; +{ + __db_err(dbenv, "missing or empty key value specified"); + return (EINVAL); +} + +/* + * __db_rdonly -- + * Common readonly message. + */ +static int +__db_rdonly(dbenv, name) + const DB_ENV *dbenv; + const char *name; +{ + __db_err(dbenv, "%s: attempt to modify a read-only tree", name); + return (EACCES); +} diff --git a/db2/db/db_join.c b/db2/db/db_join.c new file mode 100644 index 0000000000..a4051c20b0 --- /dev/null +++ b/db2/db/db_join.c @@ -0,0 +1,271 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)db_join.c 10.10 (Sleepycat) 10/9/98"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_join.h" +#include "db_am.h" +#include "common_ext.h" + +static int __db_join_close __P((DBC *)); +static int __db_join_del __P((DBC *, u_int32_t)); +static int __db_join_get __P((DBC *, DBT *, DBT *, u_int32_t)); +static int __db_join_put __P((DBC *, DBT *, DBT *, u_int32_t)); + +/* + * This is the duplicate-assisted join functionality. Right now we're + * going to write it such that we return one item at a time, although + * I think we may need to optimize it to return them all at once. + * It should be easier to get it working this way, and I believe that + * changing it should be fairly straightforward. + * + * XXX + * Right now we do not maintain the number of duplicates so we do + * not optimize the join. If the caller does, then best performance + * will be achieved by putting the cursor with the smallest cardinality + * first. + * + * The first cursor moves sequentially through the duplicate set while + * the others search explicitly for the duplicate in question. + * + */ + +/* + * __db_join -- + * This is the interface to the duplicate-assisted join functionality. + * In the same way that cursors mark a position in a database, a cursor + * can mark a position in a join. While most cursors are created by the + * cursor method of a DB, join cursors are created through an explicit + * call to DB->join. + * + * The curslist is an array of existing, intialized cursors and primary + * is the DB of the primary file. The data item that joins all the + * cursors in the curslist is used as the key into the primary and that + * key and data are returned. When no more items are left in the join + * set, the c_next operation off the join cursor will return DB_NOTFOUND. + * + * PUBLIC: int __db_join __P((DB *, DBC **, u_int32_t, DBC **)); + */ +int +__db_join(primary, curslist, flags, dbcp) + DB *primary; + DBC **curslist, **dbcp; + u_int32_t flags; +{ + DBC *dbc; + JOIN_CURSOR *jc; + int i, ret; + + DB_PANIC_CHECK(primary); + + if ((ret = __db_joinchk(primary, flags)) != 0) + return (ret); + + if (curslist == NULL || curslist[0] == NULL) + return (EINVAL); + + dbc = NULL; + jc = NULL; + + if ((ret = __os_calloc(1, sizeof(DBC), &dbc)) != 0) + goto err; + + if ((ret = __os_calloc(1, sizeof(JOIN_CURSOR), &jc)) != 0) + goto err; + + if ((ret = __os_malloc(256, NULL, &jc->j_key.data)) != 0) + goto err; + jc->j_key.ulen = 256; + F_SET(&jc->j_key, DB_DBT_USERMEM); + + for (jc->j_curslist = curslist; + *jc->j_curslist != NULL; jc->j_curslist++) + ; + if ((ret = __os_calloc((jc->j_curslist - curslist + 1), + sizeof(DBC *), &jc->j_curslist)) != 0) + goto err; + for (i = 0; curslist[i] != NULL; i++) { + if (i != 0) + F_SET(curslist[i], DBC_KEYSET); + jc->j_curslist[i] = curslist[i]; + } + + dbc->c_close = __db_join_close; + dbc->c_del = __db_join_del; + dbc->c_get = __db_join_get; + dbc->c_put = __db_join_put; + dbc->internal = jc; + dbc->dbp = primary; + jc->j_init = 1; + jc->j_primary = primary; + + *dbcp = dbc; + + return (0); + +err: if (jc != NULL) { + if (jc->j_curslist != NULL) + __os_free(jc->j_curslist, + (jc->j_curslist - curslist + 1) * sizeof(DBC *)); + __os_free(jc, sizeof(JOIN_CURSOR)); + } + if (dbc != NULL) + __os_free(dbc, sizeof(DBC)); + return (ret); +} + +static int +__db_join_put(dbc, key, data, flags) + DBC *dbc; + DBT *key; + DBT *data; + u_int32_t flags; +{ + DB_PANIC_CHECK(dbc->dbp); + + COMPQUIET(key, NULL); + COMPQUIET(data, NULL); + COMPQUIET(flags, 0); + return (EINVAL); +} + +static int +__db_join_del(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + DB_PANIC_CHECK(dbc->dbp); + + COMPQUIET(flags, 0); + return (EINVAL); +} + +static int +__db_join_get(dbc, key, data, flags) + DBC *dbc; + DBT *key, *data; + u_int32_t flags; +{ + DB *dbp; + DBC **cpp; + JOIN_CURSOR *jc; + int ret; + u_int32_t operation; + + dbp = dbc->dbp; + + DB_PANIC_CHECK(dbp); + + operation = LF_ISSET(DB_OPFLAGS_MASK); + if (operation != 0 && operation != DB_JOIN_ITEM) + return (__db_ferr(dbp->dbenv, "DBcursor->c_get", 0)); + + LF_CLR(DB_OPFLAGS_MASK); + if ((ret = + __db_fchk(dbp->dbenv, "DBcursor->c_get", flags, DB_RMW)) != 0) + return (ret); + + jc = (JOIN_CURSOR *)dbc->internal; +retry: + ret = jc->j_curslist[0]->c_get(jc->j_curslist[0], + &jc->j_key, key, jc->j_init ? DB_CURRENT : DB_NEXT_DUP); + + if (ret == ENOMEM) { + jc->j_key.ulen <<= 1; + if ((ret = __os_realloc(&jc->j_key.data, jc->j_key.ulen)) != 0) + return (ret); + goto retry; + } + if (ret != 0) + return (ret); + + jc->j_init = 0; + do { + /* + * We have the first element; now look for it in the + * other cursors. + */ + for (cpp = jc->j_curslist + 1; *cpp != NULL; cpp++) { +retry2: if ((ret = ((*cpp)->c_get)(*cpp, + &jc->j_key, key, DB_GET_BOTH)) == DB_NOTFOUND) + break; + if (ret == ENOMEM) { + jc->j_key.ulen <<= 1; + if ((ret = __os_realloc(&jc->j_key.data, + jc->j_key.ulen)) != 0) + return (ret); + goto retry2; + } + if (F_ISSET(*cpp, DBC_KEYSET)) { + F_CLR(*cpp, DBC_KEYSET); + F_SET(*cpp, DBC_CONTINUE); + } + } + + /* + * If we got out of here with ret != 0, then we failed to + * find the duplicate in one of the files, so we go on to + * the next item in the outermost relation. If ret was + * equal to 0, then we've got something to return. + */ + if (ret == 0) + break; + } while ((ret = jc->j_curslist[0]->c_get(jc->j_curslist[0], + &jc->j_key, key, DB_NEXT_DUP)) == 0); + + /* + * If ret != 0 here, we've exhausted the first file. Otherwise, + * key and data are set and we need to do the lookup on the + * primary. + */ + if (ret != 0) + return (ret); + + if (operation == DB_JOIN_ITEM) + return (0); + else + return ((jc->j_primary->get)(jc->j_primary, + jc->j_curslist[0]->txn, key, data, 0)); +} + +static int +__db_join_close(dbc) + DBC *dbc; +{ + JOIN_CURSOR *jc; + int i; + + DB_PANIC_CHECK(dbc->dbp); + + jc = (JOIN_CURSOR *)dbc->internal; + + /* + * Clear the optimization flag in the cursors. + */ + for (i = 0; jc->j_curslist[i] != NULL; i++) + F_CLR(jc->j_curslist[i], DBC_CONTINUE | DBC_KEYSET); + + __os_free(jc->j_curslist, 0); + __os_free(jc->j_key.data, jc->j_key.ulen); + __os_free(jc, sizeof(JOIN_CURSOR)); + __os_free(dbc, sizeof(DBC)); + + return (0); +} diff --git a/db2/db/db_overflow.c b/db2/db/db_overflow.c index d28740dcbe..0efcc9de7f 100644 --- a/db2/db/db_overflow.c +++ b/db2/db/db_overflow.c @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_overflow.c 10.11 (Sleepycat) 5/7/98"; +static const char sccsid[] = "@(#)db_overflow.c 10.21 (Sleepycat) 9/27/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -60,6 +60,7 @@ static const char sccsid[] = "@(#)db_overflow.c 10.11 (Sleepycat) 5/7/98"; #include "db_int.h" #include "db_page.h" #include "db_am.h" +#include "common_ext.h" /* * Big key/data code. @@ -106,29 +107,20 @@ __db_goff(dbp, dbt, tlen, pgno, bpp, bpsz) needed = tlen; } - /* - * Allocate any necessary memory. - * - * XXX: Never allocate 0 bytes; - */ + /* Allocate any necessary memory. */ if (F_ISSET(dbt, DB_DBT_USERMEM)) { if (needed > dbt->ulen) { dbt->size = needed; return (ENOMEM); } } else if (F_ISSET(dbt, DB_DBT_MALLOC)) { - dbt->data = dbp->db_malloc == NULL ? - (void *)__db_malloc(needed + 1) : - (void *)dbp->db_malloc(needed + 1); - if (dbt->data == NULL) - return (ENOMEM); + if ((ret = + __os_malloc(needed, dbp->db_malloc, &dbt->data)) != 0) + return (ret); } else if (*bpsz == 0 || *bpsz < needed) { - *bpp = (*bpp == NULL ? - (void *)__db_malloc(needed + 1) : - (void *)__db_realloc(*bpp, needed + 1)); - if (*bpp == NULL) - return (ENOMEM); - *bpsz = needed + 1; + if ((ret = __os_realloc(bpp, needed)) != 0) + return (ret); + *bpsz = needed; dbt->data = *bpp; } else dbt->data = *bpp; @@ -168,16 +160,17 @@ __db_goff(dbp, dbt, tlen, pgno, bpp, bpsz) * __db_poff -- * Put an offpage item. * - * PUBLIC: int __db_poff __P((DB *, const DBT *, db_pgno_t *, - * PUBLIC: int (*)(DB *, u_int32_t, PAGE **))); + * PUBLIC: int __db_poff __P((DBC *, const DBT *, db_pgno_t *, + * PUBLIC: int (*)(DBC *, u_int32_t, PAGE **))); */ int -__db_poff(dbp, dbt, pgnop, newfunc) - DB *dbp; +__db_poff(dbc, dbt, pgnop, newfunc) + DBC *dbc; const DBT *dbt; db_pgno_t *pgnop; - int (*newfunc) __P((DB *, u_int32_t, PAGE **)); + int (*newfunc) __P((DBC *, u_int32_t, PAGE **)); { + DB *dbp; PAGE *pagep, *lastp; DB_LSN new_lsn, null_lsn; DBT tmp_dbt; @@ -191,6 +184,7 @@ __db_poff(dbp, dbt, pgnop, newfunc) * number of bytes we get for pages we fill completely with a single * item. */ + dbp = dbc->dbp; pagespace = P_MAXSPACE(dbp->pgsize); lastp = NULL; @@ -208,13 +202,13 @@ __db_poff(dbp, dbt, pgnop, newfunc) * the item onto the page. If sz is less than pagespace, we * have a partial record. */ - if ((ret = newfunc(dbp, P_OVERFLOW, &pagep)) != 0) + if ((ret = newfunc(dbc, P_OVERFLOW, &pagep)) != 0) return (ret); - if (DB_LOGGING(dbp)) { + if (DB_LOGGING(dbc)) { tmp_dbt.data = p; tmp_dbt.size = pagespace; ZERO_LSN(null_lsn); - if ((ret = __db_big_log(dbp->dbenv->lg_info, dbp->txn, + if ((ret = __db_big_log(dbp->dbenv->lg_info, dbc->txn, &new_lsn, 0, DB_ADD_BIG, dbp->log_fileid, PGNO(pagep), lastp ? PGNO(lastp) : PGNO_INVALID, PGNO_INVALID, &tmp_dbt, &LSN(pagep), @@ -256,24 +250,26 @@ __db_poff(dbp, dbt, pgnop, newfunc) * __db_ovref -- * Increment/decrement the reference count on an overflow page. * - * PUBLIC: int __db_ovref __P((DB *, db_pgno_t, int32_t)); + * PUBLIC: int __db_ovref __P((DBC *, db_pgno_t, int32_t)); */ int -__db_ovref(dbp, pgno, adjust) - DB *dbp; +__db_ovref(dbc, pgno, adjust) + DBC *dbc; db_pgno_t pgno; int32_t adjust; { + DB *dbp; PAGE *h; int ret; + dbp = dbc->dbp; if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) { (void)__db_pgerr(dbp, pgno); return (ret); } - if (DB_LOGGING(dbp)) - if ((ret = __db_ovref_log(dbp->dbenv->lg_info, dbp->txn, + if (DB_LOGGING(dbc)) + if ((ret = __db_ovref_log(dbp->dbenv->lg_info, dbc->txn, &LSN(h), 0, dbp->log_fileid, h->pgno, adjust, &LSN(h))) != 0) return (ret); @@ -287,19 +283,21 @@ __db_ovref(dbp, pgno, adjust) * __db_doff -- * Delete an offpage chain of overflow pages. * - * PUBLIC: int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *))); + * PUBLIC: int __db_doff __P((DBC *, db_pgno_t, int (*)(DBC *, PAGE *))); */ int -__db_doff(dbp, pgno, freefunc) - DB *dbp; +__db_doff(dbc, pgno, freefunc) + DBC *dbc; db_pgno_t pgno; - int (*freefunc) __P((DB *, PAGE *)); + int (*freefunc) __P((DBC *, PAGE *)); { + DB *dbp; PAGE *pagep; DB_LSN null_lsn; DBT tmp_dbt; int ret; + dbp = dbc->dbp; do { if ((ret = memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0) { (void)__db_pgerr(dbp, pgno); @@ -312,21 +310,21 @@ __db_doff(dbp, pgno, freefunc) */ if (TYPE(pagep) == P_OVERFLOW && OV_REF(pagep) > 1) { (void)memp_fput(dbp->mpf, pagep, 0); - return (__db_ovref(dbp, pgno, -1)); + return (__db_ovref(dbc, pgno, -1)); } - if (DB_LOGGING(dbp)) { + if (DB_LOGGING(dbc)) { tmp_dbt.data = (u_int8_t *)pagep + P_OVERHEAD; tmp_dbt.size = OV_LEN(pagep); ZERO_LSN(null_lsn); - if ((ret = __db_big_log(dbp->dbenv->lg_info, dbp->txn, + if ((ret = __db_big_log(dbp->dbenv->lg_info, dbc->txn, &LSN(pagep), 0, DB_REM_BIG, dbp->log_fileid, PGNO(pagep), PREV_PGNO(pagep), NEXT_PGNO(pagep), &tmp_dbt, &LSN(pagep), &null_lsn, &null_lsn)) != 0) return (ret); } pgno = pagep->next_pgno; - if ((ret = freefunc(dbp, pagep)) != 0) + if ((ret = freefunc(dbc, pagep)) != 0) return (ret); } while (pgno != PGNO_INVALID); @@ -339,44 +337,71 @@ __db_doff(dbp, pgno, freefunc) * * Given a starting page number and a key, return <0, 0, >0 to indicate if the * key on the page is less than, equal to or greater than the key specified. + * We optimize this by doing chunk at a time comparison unless the user has + * specified a comparison function. In this case, we need to materialize + * the entire object and call their comparison routine. * - * PUBLIC: int __db_moff __P((DB *, const DBT *, db_pgno_t)); + * PUBLIC: int __db_moff __P((DB *, const DBT *, db_pgno_t, u_int32_t, + * PUBLIC: int (*)(const DBT *, const DBT *), int *)); */ int -__db_moff(dbp, dbt, pgno) +__db_moff(dbp, dbt, pgno, tlen, cmpfunc, cmpp) DB *dbp; const DBT *dbt; db_pgno_t pgno; + u_int32_t tlen; + int (*cmpfunc) __P((const DBT *, const DBT *)), *cmpp; { PAGE *pagep; - u_int32_t cmp_bytes, key_left; + DBT local_dbt; + void *buf; + u_int32_t bufsize, cmp_bytes, key_left; u_int8_t *p1, *p2; int ret; + /* + * If there is a user-specified comparison function, build a + * contiguous copy of the key, and call it. + */ + if (cmpfunc != NULL) { + memset(&local_dbt, 0, sizeof(local_dbt)); + buf = NULL; + bufsize = 0; + + if ((ret = __db_goff(dbp, + &local_dbt, tlen, pgno, &buf, &bufsize)) != 0) + return (ret); + *cmpp = cmpfunc(&local_dbt, dbt); + __os_free(buf, bufsize); + return (0); + } + /* While there are both keys to compare. */ - for (ret = 0, p1 = dbt->data, + for (*cmpp = 0, p1 = dbt->data, key_left = dbt->size; key_left > 0 && pgno != PGNO_INVALID;) { - if (memp_fget(dbp->mpf, &pgno, 0, &pagep) != 0) { - (void)__db_pgerr(dbp, pgno); - return (0); /* No system error return. */ - } + if ((ret = memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0) + return (ret); cmp_bytes = OV_LEN(pagep) < key_left ? OV_LEN(pagep) : key_left; key_left -= cmp_bytes; for (p2 = (u_int8_t *)pagep + P_OVERHEAD; cmp_bytes-- > 0; ++p1, ++p2) if (*p1 != *p2) { - ret = (long)*p1 - (long)*p2; + *cmpp = (long)*p1 - (long)*p2; break; } pgno = NEXT_PGNO(pagep); - (void)memp_fput(dbp->mpf, pagep, 0); - if (ret != 0) + if ((ret = memp_fput(dbp->mpf, pagep, 0)) != 0) return (ret); + if (*cmpp != 0) + return (0); } if (key_left > 0) /* DBT is longer than page key. */ - return (-1); - if (pgno != PGNO_INVALID) /* DBT is shorter than page key. */ - return (1); + *cmpp = -1; + else if (pgno != PGNO_INVALID) /* DBT is shorter than page key. */ + *cmpp = 1; + else + *cmpp = 0; + return (0); } diff --git a/db2/db/db_pr.c b/db2/db/db_pr.c index a294cdd135..7f4364c6e1 100644 --- a/db2/db/db_pr.c +++ b/db2/db/db_pr.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_pr.c 10.29 (Sleepycat) 5/23/98"; +static const char sccsid[] = "@(#)db_pr.c 10.40 (Sleepycat) 11/22/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -126,11 +126,10 @@ __db_prdb(dbp) { DB_AM_MLOCAL, "local mpool" }, { DB_AM_PGDEF, "default page size" }, { DB_AM_RDONLY, "read-only" }, - { DB_AM_RECOVER, "recover" }, { DB_AM_SWAP, "needswap" }, { DB_AM_THREAD, "thread" }, - { DB_BT_RECNUM, "btree:records" }, - { DB_HS_DIRTYMETA, "hash:dirty-meta" }, + { DB_BT_RECNUM, "btree:recnum" }, + { DB_DBM_ERROR, "dbm/ndbm error" }, { DB_RE_DELIMITER, "recno:delimiter" }, { DB_RE_FIXEDLEN, "recno:fixed-length" }, { DB_RE_PAD, "recno:pad" }, @@ -178,42 +177,55 @@ __db_prbtree(dbp) static const FN mfn[] = { { BTM_DUP, "duplicates" }, { BTM_RECNO, "recno" }, - { BTM_RECNUM, "btree:records" }, + { BTM_RECNUM, "btree:recnum" }, { BTM_FIXEDLEN, "recno:fixed-length" }, { BTM_RENUMBER, "recno:renumber" }, { 0 }, }; + DBC *dbc; BTMETA *mp; BTREE *t; - EPG *epg; FILE *fp; PAGE *h; RECNO *rp; db_pgno_t i; - int ret; + int cnt, ret; + const char *sep; t = dbp->internal; fp = __db_prinit(NULL); + if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) + return (ret); (void)fprintf(fp, "%s\nOn-page metadata:\n", DB_LINE); i = PGNO_METADATA; - if ((ret = __bam_pget(dbp, (PAGE **)&mp, &i, 0)) != 0) + if ((ret = memp_fget(dbp->mpf, &i, 0, (PAGE **)&mp)) != 0) { + (void)dbc->c_close(dbc); return (ret); + } + fprintf(fp, "lsn.file: %lu lsn.offset: %lu\n", + (u_long)LSN(mp).file, (u_long)LSN(mp).offset); (void)fprintf(fp, "magic %#lx\n", (u_long)mp->magic); (void)fprintf(fp, "version %#lx\n", (u_long)mp->version); (void)fprintf(fp, "pagesize %lu\n", (u_long)mp->pagesize); (void)fprintf(fp, "maxkey: %lu minkey: %lu\n", (u_long)mp->maxkey, (u_long)mp->minkey); - (void)fprintf(fp, "free %lu", (u_long)mp->free); - for (i = mp->free; i != PGNO_INVALID;) { - if ((ret = __bam_pget(dbp, &h, &i, 0)) != 0) + (void)fprintf(fp, "free list: %lu", (u_long)mp->free); + for (i = mp->free, cnt = 0, sep = ", "; i != PGNO_INVALID;) { + if ((ret = memp_fget(dbp->mpf, &i, 0, &h)) != 0) return (ret); i = h->next_pgno; (void)memp_fput(dbp->mpf, h, 0); - (void)fprintf(fp, ", %lu", (u_long)i); + (void)fprintf(fp, "%s%lu", sep, (u_long)i); + if (++cnt % 10 == 0) { + (void)fprintf(fp, "\n"); + cnt = 0; + sep = ""; + } else + sep = ", "; } (void)fprintf(fp, "\n"); @@ -227,7 +239,7 @@ __db_prbtree(dbp) (u_long)t->bt_maxkey, (u_long)t->bt_minkey); (void)fprintf(fp, "bt_compare: %#lx bt_prefix: %#lx\n", (u_long)t->bt_compare, (u_long)t->bt_prefix); - if ((rp = t->bt_recno) != NULL) { + if ((rp = t->recno) != NULL) { (void)fprintf(fp, "re_delim: %#lx re_pad: %#lx re_len: %lu re_source: %s\n", (u_long)rp->re_delim, (u_long)rp->re_pad, @@ -238,13 +250,9 @@ __db_prbtree(dbp) (u_long)rp->re_cmap, (u_long)rp->re_smap, (u_long)rp->re_emap, (u_long)rp->re_msize); } - (void)fprintf(fp, "stack:"); - for (epg = t->bt_stack; epg < t->bt_sp; ++epg) - (void)fprintf(fp, " %lu", (u_long)epg->page->pgno); - (void)fprintf(fp, "\n"); (void)fprintf(fp, "ovflsize: %lu\n", (u_long)t->bt_ovflsize); (void)fflush(fp); - return (0); + return (dbc->c_close(dbc)); } /* @@ -258,51 +266,50 @@ __db_prhash(dbp) DB *dbp; { FILE *fp; - HTAB *t; + DBC *dbc; + HASH_CURSOR *hcp; int i, put_page, ret; db_pgno_t pgno; - t = dbp->internal; - fp = __db_prinit(NULL); + if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) + return (ret); + hcp = (HASH_CURSOR *)dbc->internal; - fprintf(fp, "\thash_accesses %lu\n", (u_long)t->hash_accesses); - fprintf(fp, "\thash_collisions %lu\n", (u_long)t->hash_collisions); - fprintf(fp, "\thash_expansions %lu\n", (u_long)t->hash_expansions); - fprintf(fp, "\thash_overflows %lu\n", (u_long)t->hash_overflows); - fprintf(fp, "\thash_bigpages %lu\n", (u_long)t->hash_bigpages); - fprintf(fp, "\n"); - - if (t->hdr == NULL) { + /* + * In this case, hcp->hdr will never be null, if we decide + * to pass dbc's to this routine instead, then it could be. + */ + if (hcp->hdr == NULL) { pgno = PGNO_METADATA; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &t->hdr)) != 0) + if ((ret = memp_fget(dbp->mpf, &pgno, 0, &hcp->hdr)) != 0) return (ret); put_page = 1; } else put_page = 0; - fprintf(fp, "\tmagic %#lx\n", (u_long)t->hdr->magic); - fprintf(fp, "\tversion %lu\n", (u_long)t->hdr->version); - fprintf(fp, "\tpagesize %lu\n", (u_long)t->hdr->pagesize); - fprintf(fp, "\tovfl_point %lu\n", (u_long)t->hdr->ovfl_point); - fprintf(fp, "\tlast_freed %lu\n", (u_long)t->hdr->last_freed); - fprintf(fp, "\tmax_bucket %lu\n", (u_long)t->hdr->max_bucket); - fprintf(fp, "\thigh_mask %#lx\n", (u_long)t->hdr->high_mask); - fprintf(fp, "\tlow_mask %#lx\n", (u_long)t->hdr->low_mask); - fprintf(fp, "\tffactor %lu\n", (u_long)t->hdr->ffactor); - fprintf(fp, "\tnelem %lu\n", (u_long)t->hdr->nelem); - fprintf(fp, "\th_charkey %#lx\n", (u_long)t->hdr->h_charkey); + fprintf(fp, "\tmagic %#lx\n", (u_long)hcp->hdr->magic); + fprintf(fp, "\tversion %lu\n", (u_long)hcp->hdr->version); + fprintf(fp, "\tpagesize %lu\n", (u_long)hcp->hdr->pagesize); + fprintf(fp, "\tovfl_point %lu\n", (u_long)hcp->hdr->ovfl_point); + fprintf(fp, "\tlast_freed %lu\n", (u_long)hcp->hdr->last_freed); + fprintf(fp, "\tmax_bucket %lu\n", (u_long)hcp->hdr->max_bucket); + fprintf(fp, "\thigh_mask %#lx\n", (u_long)hcp->hdr->high_mask); + fprintf(fp, "\tlow_mask %#lx\n", (u_long)hcp->hdr->low_mask); + fprintf(fp, "\tffactor %lu\n", (u_long)hcp->hdr->ffactor); + fprintf(fp, "\tnelem %lu\n", (u_long)hcp->hdr->nelem); + fprintf(fp, "\th_charkey %#lx\n", (u_long)hcp->hdr->h_charkey); for (i = 0; i < NCACHED; i++) - fprintf(fp, "%lu ", (u_long)t->hdr->spares[i]); + fprintf(fp, "%lu ", (u_long)hcp->hdr->spares[i]); fprintf(fp, "\n"); (void)fflush(fp); if (put_page) { - (void)memp_fput(dbp->mpf, (PAGE *)t->hdr, 0); - t->hdr = NULL; + (void)memp_fput(dbp->mpf, (PAGE *)hcp->hdr, 0); + hcp->hdr = NULL; } - return (0); + return (dbc->c_close(dbc)); } /* @@ -318,22 +325,18 @@ __db_prtree(mpf, all) { PAGE *h; db_pgno_t i; - int ret, t_ret; if (set_psize == PSIZE_BOUNDARY) __db_psize(mpf); - ret = 0; for (i = PGNO_ROOT;; ++i) { - if ((ret = memp_fget(mpf, &i, 0, &h)) != 0) + if (memp_fget(mpf, &i, 0, &h) != 0) break; - if (TYPE(h) != P_INVALID) - if ((t_ret = __db_prpage(h, all)) != 0 && ret == 0) - ret = t_ret; + (void)__db_prpage(h, all); (void)memp_fput(mpf, h, 0); } (void)fflush(__db_prinit(NULL)); - return (ret); + return (0); } /* @@ -425,8 +428,7 @@ __db_prpage(h, all) (TYPE(h) == P_LRECNO && h->pgno == PGNO_ROOT)) fprintf(fp, " total records: %4lu", (u_long)RE_NREC(h)); fprintf(fp, "\n"); - if (TYPE(h) == P_LBTREE || TYPE(h) == P_LRECNO || - TYPE(h) == P_DUPLICATE || TYPE(h) == P_OVERFLOW) + if (TYPE(h) != P_IBTREE && TYPE(h) != P_IRECNO) fprintf(fp, " prev: %4lu next: %4lu", (u_long)PREV_PGNO(h), (u_long)NEXT_PGNO(h)); if (TYPE(h) == P_IBTREE || TYPE(h) == P_LBTREE) diff --git a/db2/db/db_rec.c b/db2/db/db_rec.c index 1ef6f18e61..7f577b5855 100644 --- a/db2/db/db_rec.c +++ b/db2/db/db_rec.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_rec.c 10.16 (Sleepycat) 4/28/98"; +static const char sccsid[] = "@(#)db_rec.c 10.19 (Sleepycat) 9/27/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -40,7 +40,8 @@ __db_addrem_recover(logp, dbtp, lsnp, redo, info) void *info; { __db_addrem_args *argp; - DB *file_dbp, *mdbp; + DB *file_dbp; + DBC *dbc; DB_MPOOLFILE *mpf; PAGE *pagep; u_int32_t change; @@ -57,9 +58,7 @@ __db_addrem_recover(logp, dbtp, lsnp, redo, info) * would not have to undo anything. In this case, * don't bother creating a page. */ - *lsnp = argp->prev_lsn; - ret = 0; - goto out; + goto done; } else if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) @@ -73,7 +72,7 @@ __db_addrem_recover(logp, dbtp, lsnp, redo, info) (cmp_n == 0 && !redo && argp->opcode == DB_REM_DUP)) { /* Need to redo an add, or undo a delete. */ - if ((ret = __db_pitem(file_dbp, pagep, argp->indx, argp->nbytes, + if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes, argp->hdr.size == 0 ? NULL : &argp->hdr, argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0) goto out; @@ -83,7 +82,7 @@ __db_addrem_recover(logp, dbtp, lsnp, redo, info) } else if ((cmp_n == 0 && !redo && argp->opcode == DB_ADD_DUP) || (cmp_p == 0 && redo && argp->opcode == DB_REM_DUP)) { /* Need to undo an add, or redo a delete. */ - if ((ret = __db_ditem(file_dbp, + if ((ret = __db_ditem(dbc, pagep, argp->indx, argp->nbytes)) != 0) goto out; change = DB_MPOOL_DIRTY; @@ -96,8 +95,11 @@ __db_addrem_recover(logp, dbtp, lsnp, redo, info) LSN(pagep) = argp->pagelsn; } - if ((ret = memp_fput(mpf, pagep, change)) == 0) - *lsnp = argp->prev_lsn; + if ((ret = memp_fput(mpf, pagep, change)) != 0) + goto out; + +done: *lsnp = argp->prev_lsn; + ret = 0; out: REC_CLOSE; } @@ -114,7 +116,8 @@ __db_split_recover(logp, dbtp, lsnp, redo, info) void *info; { __db_split_args *argp; - DB *file_dbp, *mdbp; + DB *file_dbp; + DBC *dbc; DB_MPOOLFILE *mpf; PAGE *pagep; int change, cmp_n, cmp_p, ret; @@ -130,9 +133,7 @@ __db_split_recover(logp, dbtp, lsnp, redo, info) * would not have to undo anything. In this case, * don't bother creating a page. */ - *lsnp = argp->prev_lsn; - ret = 0; - goto out; + goto done; } else if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) @@ -169,8 +170,11 @@ __db_split_recover(logp, dbtp, lsnp, redo, info) LSN(pagep) = argp->pagelsn; change = DB_MPOOL_DIRTY; } - if ((ret = memp_fput(mpf, pagep, change)) == 0) - *lsnp = argp->prev_lsn; + if ((ret = memp_fput(mpf, pagep, change)) != 0) + goto out; + +done: *lsnp = argp->prev_lsn; + ret = 0; out: REC_CLOSE; } @@ -187,7 +191,8 @@ __db_big_recover(logp, dbtp, lsnp, redo, info) void *info; { __db_big_args *argp; - DB *file_dbp, *mdbp; + DB *file_dbp; + DBC *dbc; DB_MPOOLFILE *mpf; PAGE *pagep; u_int32_t change; @@ -209,7 +214,7 @@ __db_big_recover(logp, dbtp, lsnp, redo, info) } else if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; + goto out; } /* @@ -299,9 +304,7 @@ npage: if (argp->next_pgno != PGNO_INVALID) { * so we would not have to undo anything. In * this case, don't bother creating a page. */ - *lsnp = argp->prev_lsn; - ret = 0; - goto out; + goto done; } else if ((ret = memp_fget(mpf, &argp->next_pgno, DB_MPOOL_CREATE, &pagep)) != 0) @@ -323,7 +326,8 @@ npage: if (argp->next_pgno != PGNO_INVALID) { goto out; } - *lsnp = argp->prev_lsn; +done: *lsnp = argp->prev_lsn; + ret = 0; out: REC_CLOSE; } @@ -343,7 +347,8 @@ __db_ovref_recover(logp, dbtp, lsnp, redo, info) void *info; { __db_ovref_args *argp; - DB *file_dbp, *mdbp; + DB *file_dbp; + DBC *dbc; DB_MPOOLFILE *mpf; PAGE *pagep; int modified, ret; @@ -370,8 +375,11 @@ __db_ovref_recover(logp, dbtp, lsnp, redo, info) pagep->lsn = argp->lsn; modified = 1; } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0) - *lsnp = argp->prev_lsn; + if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) + goto out; + +done: *lsnp = argp->prev_lsn; + ret = 0; out: REC_CLOSE; } @@ -392,17 +400,20 @@ __db_relink_recover(logp, dbtp, lsnp, redo, info) void *info; { __db_relink_args *argp; - DB *file_dbp, *mdbp; + DB *file_dbp; + DBC *dbc; DB_MPOOLFILE *mpf; PAGE *pagep; - int modified, ret; + int cmp_n, cmp_p, modified, ret; REC_PRINT(__db_relink_print); REC_INTRO(__db_relink_read); /* - * There are three pages we need to check -- the page, and the - * previous and next pages, if they existed. + * There are up to three pages we need to check -- the page, and the + * previous and next pages, if they existed. For a page add operation, + * the current page is the result of a split and is being recovered + * elsewhere, so all we need do is recover the next page. */ if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { if (redo) { @@ -411,6 +422,9 @@ __db_relink_recover(logp, dbtp, lsnp, redo, info) } goto next; } + if (argp->opcode == DB_ADD_PAGE) + goto next; + modified = 0; if (log_compare(&LSN(pagep), &argp->lsn) == 0 && redo) { /* Redo the relink. */ @@ -424,10 +438,8 @@ __db_relink_recover(logp, dbtp, lsnp, redo, info) pagep->lsn = argp->lsn; modified = 1; } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) { - (void)__db_panic(file_dbp); + if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; - } next: if ((ret = memp_fget(mpf, &argp->next, 0, &pagep)) != 0) { if (redo) { @@ -437,23 +449,27 @@ next: if ((ret = memp_fget(mpf, &argp->next, 0, &pagep)) != 0) { goto prev; } modified = 0; - if (log_compare(&LSN(pagep), &argp->lsn_next) == 0 && redo) { - /* Redo the relink. */ + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->lsn_next); + if ((argp->opcode == DB_REM_PAGE && cmp_p == 0 && redo) || + (argp->opcode == DB_ADD_PAGE && cmp_n == 0 && !redo)) { + /* Redo the remove or undo the add. */ pagep->prev_pgno = argp->prev; pagep->lsn = *lsnp; modified = 1; - } else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) { - /* Undo the relink. */ + } else if ((argp->opcode == DB_REM_PAGE && cmp_n == 0 && !redo) || + (argp->opcode == DB_ADD_PAGE && cmp_p == 0 && redo)) { + /* Undo the remove or redo the add. */ pagep->prev_pgno = argp->pgno; pagep->lsn = argp->lsn_next; modified = 1; } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) { - (void)__db_panic(file_dbp); + if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; - } + if (argp->opcode == DB_ADD_PAGE) + goto done; prev: if ((ret = memp_fget(mpf, &argp->prev, 0, &pagep)) != 0) { if (redo) { @@ -476,10 +492,8 @@ prev: if ((ret = memp_fget(mpf, &argp->prev, 0, &pagep)) != 0) { pagep->lsn = argp->lsn_prev; modified = 1; } - if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) { - (void) __db_panic(file_dbp); + if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; - } done: *lsnp = argp->prev_lsn; ret = 0; @@ -500,7 +514,8 @@ __db_addpage_recover(logp, dbtp, lsnp, redo, info) void *info; { __db_addpage_args *argp; - DB *file_dbp, *mdbp; + DB *file_dbp; + DBC *dbc; DB_MPOOLFILE *mpf; PAGE *pagep; u_int32_t change; @@ -541,8 +556,7 @@ __db_addpage_recover(logp, dbtp, lsnp, redo, info) * would not have to undo anything. In this case, * don't bother creating a page. */ - ret = 0; - goto out; + goto done; } else if ((ret = memp_fget(mpf, &argp->nextpgno, DB_MPOOL_CREATE, &pagep)) != 0) @@ -563,11 +577,13 @@ __db_addpage_recover(logp, dbtp, lsnp, redo, info) LSN(pagep) = argp->nextlsn; change = DB_MPOOL_DIRTY; } - ret = memp_fput(mpf, pagep, change); + if ((ret = memp_fput(mpf, pagep, change)) != 0) + goto out; + +done: *lsnp = argp->prev_lsn; + ret = 0; -out: if (ret == 0) - *lsnp = argp->prev_lsn; - REC_CLOSE; +out: REC_CLOSE; } /* @@ -598,46 +614,3 @@ __db_debug_recover(logp, dbtp, lsnp, redo, info) REC_NOOP_CLOSE; } - -/* - * __db_noop_recover -- - * Recovery function for noop. - * - * PUBLIC: int __db_noop_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); - */ -int -__db_noop_recover(logp, dbtp, lsnp, redo, info) - DB_LOG *logp; - DBT *dbtp; - DB_LSN *lsnp; - int redo; - void *info; -{ - __db_noop_args *argp; - DB *file_dbp, *mdbp; - DB_MPOOLFILE *mpf; - PAGE *pagep; - u_int32_t change; - int cmp_n, cmp_p, ret; - - REC_PRINT(__db_noop_print); - REC_INTRO(__db_noop_read); - - if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) - goto out; - - cmp_n = log_compare(lsnp, &LSN(pagep)); - cmp_p = log_compare(&LSN(pagep), &argp->prevlsn); - change = 0; - if (cmp_p == 0 && redo) { - LSN(pagep) = *lsnp; - change = DB_MPOOL_DIRTY; - } else if (cmp_n == 0 && !redo) { - LSN(pagep) = argp->prevlsn; - change = DB_MPOOL_DIRTY; - } - *lsnp = argp->prev_lsn; - ret = memp_fput(mpf, pagep, change); - -out: REC_CLOSE; -} diff --git a/db2/db/db_ret.c b/db2/db/db_ret.c index 9d9b599ad6..9f0d0ecf8d 100644 --- a/db2/db/db_ret.c +++ b/db2/db/db_ret.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_ret.c 10.13 (Sleepycat) 5/7/98"; +static const char sccsid[] = "@(#)db_ret.c 10.16 (Sleepycat) 10/4/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -93,6 +93,8 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc) u_int32_t *memsize; void *(*db_malloc) __P((size_t)); { + int ret; + /* If returning a partial record, reset the length. */ if (F_ISSET(dbt, DB_DBT_PARTIAL)) { data = (u_int8_t *)data + dbt->doff; @@ -120,9 +122,6 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc) * guarantees consistency, i.e., the application can always free memory * without concern as to how many bytes of the record were requested. * - * XXX - * Never allocate 0 bytes, it's known to make malloc/realloc unhappy. - * * Use the memory specified by the application: DB_DBT_USERMEM. * * !!! @@ -130,11 +129,8 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc) * memory pointer is allowed to be NULL. */ if (F_ISSET(dbt, DB_DBT_MALLOC)) { - dbt->data = db_malloc == NULL ? - (void *)__db_malloc(len) : - (void *)db_malloc(len + 1); - if (dbt->data == NULL) - return (ENOMEM); + if ((ret = __os_malloc(len, db_malloc, &dbt->data)) != 0) + return (ret); } else if (F_ISSET(dbt, DB_DBT_USERMEM)) { if (len != 0 && (dbt->data == NULL || dbt->ulen < len)) return (ENOMEM); @@ -142,12 +138,9 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc) return (EINVAL); } else { if (len != 0 && (*memsize == 0 || *memsize < len)) { - *memp = *memp == NULL ? - (void *)__db_malloc(len) : - (void *)__db_realloc(*memp, len); - if (*memp == NULL) { + if ((ret = __os_realloc(memp, len)) != 0) { *memsize = 0; - return (ENOMEM); + return (ret); } *memsize = len; } diff --git a/db2/db/db_thread.c b/db2/db/db_thread.c deleted file mode 100644 index 73e2a51286..0000000000 --- a/db2/db/db_thread.c +++ /dev/null @@ -1,121 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997, 1998 - * Sleepycat Software. All rights reserved. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)db_thread.c 8.15 (Sleepycat) 4/26/98"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <errno.h> -#include <string.h> -#endif - -#include "db_int.h" -#include "db_page.h" -#include "db_am.h" - -static int __db_getlockid __P((DB *, DB *)); - -/* - * __db_gethandle -- - * Called by db access method routines when the DB_THREAD flag is set. - * This routine returns a handle, either an existing handle from the - * chain of handles, or creating one if necessary. - * - * PUBLIC: int __db_gethandle __P((DB *, int (*)(DB *, DB *), DB **)); - */ -int -__db_gethandle(dbp, am_func, dbpp) - DB *dbp, **dbpp; - int (*am_func) __P((DB *, DB *)); -{ - DB *ret_dbp; - int ret, t_ret; - - if ((ret = __db_mutex_lock((db_mutex_t *)dbp->mutexp, -1)) != 0) - return (ret); - - if ((ret_dbp = LIST_FIRST(&dbp->handleq)) != NULL) - /* Simply take one off the list. */ - LIST_REMOVE(ret_dbp, links); - else { - /* Allocate a new handle. */ - if ((ret_dbp = (DB *)__db_malloc(sizeof(*dbp))) == NULL) { - ret = ENOMEM; - goto err; - } - memcpy(ret_dbp, dbp, sizeof(*dbp)); - ret_dbp->internal = NULL; - TAILQ_INIT(&ret_dbp->curs_queue); - - /* Set the locker, the lock structure and the lock DBT. */ - if ((ret = __db_getlockid(dbp, ret_dbp)) != 0) - goto err; - - /* Finally, call the access method specific dup function. */ - if ((ret = am_func(dbp, ret_dbp)) != 0) - goto err; - } - - *dbpp = ret_dbp; - - if (0) { -err: if (ret_dbp != NULL) - FREE(ret_dbp, sizeof(*ret_dbp)); - } - if ((t_ret = - __db_mutex_unlock((db_mutex_t *)dbp->mutexp, -1)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -/* - * __db_puthandle -- - * Return a DB handle to the pool for later use. - * - * PUBLIC: int __db_puthandle __P((DB *)); - */ -int -__db_puthandle(dbp) - DB *dbp; -{ - DB *master; - int ret; - - master = dbp->master; - if ((ret = __db_mutex_lock((db_mutex_t *)master->mutexp, -1)) != 0) - return (ret); - - LIST_INSERT_HEAD(&master->handleq, dbp, links); - - return (__db_mutex_unlock((db_mutex_t *)master->mutexp, -1)); -} - -/* - * __db_getlockid -- - * Create a new locker ID and copy the file lock information from - * the old DB into the new one. - */ -static int -__db_getlockid(dbp, new_dbp) - DB *dbp, *new_dbp; -{ - int ret; - - if (F_ISSET(dbp, DB_AM_LOCKING)) { - if ((ret = lock_id(dbp->dbenv->lk_info, &new_dbp->locker)) != 0) - return (ret); - memcpy(new_dbp->lock.fileid, dbp->lock.fileid, DB_FILE_ID_LEN); - new_dbp->lock_dbt.size = sizeof(new_dbp->lock); - new_dbp->lock_dbt.data = &new_dbp->lock; - } - return (0); -} |