diff options
Diffstat (limited to 'db2/db')
-rw-r--r-- | db2/db/db.c | 818 | ||||
-rw-r--r-- | db2/db/db.src | 154 | ||||
-rw-r--r-- | db2/db/db_auto.c | 1462 | ||||
-rw-r--r-- | db2/db/db_conv.c | 219 | ||||
-rw-r--r-- | db2/db/db_dispatch.c | 270 | ||||
-rw-r--r-- | db2/db/db_dup.c | 680 | ||||
-rw-r--r-- | db2/db/db_overflow.c | 383 | ||||
-rw-r--r-- | db2/db/db_pr.c | 785 | ||||
-rw-r--r-- | db2/db/db_rec.c | 623 | ||||
-rw-r--r-- | db2/db/db_ret.c | 149 | ||||
-rw-r--r-- | db2/db/db_thread.c | 125 |
11 files changed, 5668 insertions, 0 deletions
diff --git a/db2/db/db.c b/db2/db/db.c new file mode 100644 index 0000000000..df3a9d2d21 --- /dev/null +++ b/db2/db/db.c @@ -0,0 +1,818 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)db.c 10.37 (Sleepycat) 8/23/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#endif + +#include "db_int.h" +#include "shqueue.h" +#include "db_page.h" +#include "db_shash.h" +#include "db_swap.h" +#include "btree.h" +#include "hash.h" +#include "mp.h" +#include "db_am.h" +#include "common_ext.h" + +static int db_close __P((DB *, int)); +static int db_fd __P((DB *, int *)); + +/* + * If the metadata page has the flag set, set the local flag. If the page + * does NOT have the flag set, return EINVAL if the user's dbinfo argument + * caused us to already set the local flag. + */ +#define DBINFO_FCHK(dbp, fn, meta_flags, m_name, dbp_name) { \ + if ((meta_flags) & (m_name)) \ + F_SET(dbp, dbp_name); \ + else \ + if (F_ISSET(dbp, dbp_name)) { \ + __db_err(dbenv, \ + "%s: %s specified in dbinfo argument but not set in file", \ + fname, fn); \ + goto einval; \ + } \ +} + +/* + * db_open -- + * Main library interface to the DB access methods. + */ +int +db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) + const char *fname; + DBTYPE type; + int flags, mode; + DB_ENV *dbenv; + DB_INFO *dbinfo; + DB **dbpp; +{ + BTMETA *btm; + DB *dbp; + DBT pgcookie; + DB_ENV *envp, t_dbenv; + DB_PGINFO pginfo; + HASHHDR *hashm; + off_t io; + size_t cachesize; + ssize_t nr; + int fd, ftype, need_fileid, restore, ret, retry_cnt, swapped; + char *real_name, mbuf[512]; + + /* Validate arguments. */ +#ifdef HAVE_SPINLOCKS +#define OKFLAGS (DB_CREATE | DB_NOMMAP | DB_RDONLY | DB_THREAD | DB_TRUNCATE) +#else +#define OKFLAGS (DB_CREATE | DB_NOMMAP | DB_RDONLY | DB_TRUNCATE) +#endif + if ((ret = __db_fchk(dbenv, "db_open", flags, OKFLAGS)) != 0) + return (ret); + + /* Initialize for error return. */ + fd = -1; + need_fileid = 1; + real_name = NULL; + + /* Allocate the DB structure, reference the DB_ENV structure. */ + if ((dbp = (DB *)calloc(1, sizeof(DB))) == NULL) { + __db_err(dbenv, "%s", strerror(ENOMEM)); + return (ENOMEM); + } + dbp->dbenv = dbenv; + + /* Convert the dbinfo flags. */ + if (dbinfo != NULL) { + /* + * !!! + * We can't check for illegal flags until we know what type + * of open we're doing. + */ + if (F_ISSET(dbinfo, DB_DELIMITER)) + F_SET(dbp, DB_RE_DELIMITER); + if (F_ISSET(dbinfo, DB_DUP)) + F_SET(dbp, DB_AM_DUP); + if (F_ISSET(dbinfo, DB_FIXEDLEN)) + F_SET(dbp, DB_RE_FIXEDLEN); + if (F_ISSET(dbinfo, DB_PAD)) + F_SET(dbp, DB_RE_PAD); + if (F_ISSET(dbinfo, DB_RECNUM)) + F_SET(dbp, DB_BT_RECNUM); + if (F_ISSET(dbinfo, DB_RENUMBER)) + F_SET(dbp, DB_RE_RENUMBER); + if (F_ISSET(dbinfo, DB_SNAPSHOT)) + F_SET(dbp, DB_RE_SNAPSHOT); + } + + /* Set based on the open(2) flags. */ + if (LF_ISSET(DB_RDONLY)) + F_SET(dbp, DB_AM_RDONLY); + + /* Check threading fields. */ + if (LF_ISSET(DB_THREAD)) { + if ((dbp->mutex = + (db_mutex_t *)malloc(sizeof(db_mutex_t))) == NULL) { + __db_err(dbenv, "%s", strerror(ENOMEM)); + ret = ENOMEM; + goto err; + } + __db_mutex_init(dbp->mutex, 0); + + F_SET(dbp, DB_AM_THREAD); + } + + /* + * Always set the master and initialize the queues, so we can + * use these fields without checking the thread bit. + */ + dbp->master = dbp; + LIST_INIT(&dbp->handleq); + LIST_INSERT_HEAD(&dbp->handleq, dbp, links); + TAILQ_INIT(&dbp->curs_queue); + + /* + * Set based on the dbenv fields, although no logging or transactions + * are possible for temporary files. + */ + if (dbp->dbenv != NULL) { + if (dbenv->lk_info != NULL) + F_SET(dbp, DB_AM_LOCKING); + if (fname != NULL && dbenv->lg_info != NULL) + F_SET(dbp, DB_AM_LOGGING); + } + + /* Set the common fields. */ + if (dbinfo == NULL) { + dbp->pgsize = 0; + dbp->db_malloc = NULL; + } else { + dbp->pgsize = dbinfo->db_pagesize; + dbp->db_malloc = dbinfo->db_malloc; + } + + /* Fill in the default file mode. */ + if (mode == 0) + mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + + /* Check if the user wants us to swap byte order. */ + if (dbinfo != NULL) + switch (ret = __db_byteorder(dbenv, dbinfo->db_lorder)) { + case 0: + break; + case DB_SWAPBYTES: + F_SET(dbp, DB_AM_SWAP); + break; + default: + goto err; + } + + /* + * If we have a file name, try and read the first page, figure out + * what type of file it is, and initialize everything we can based + * on that file's meta-data page. + * + * XXX + * We don't actually expect zero-length strings as arguments. We + * do the check, permitting them, because scripting languages, e.g., + * the Tcl test suite, doesn't know anything about passing NULL's. + */ + if (fname != NULL && fname[0] != '\0') { + /* Get the real file name. */ + if ((ret = __db_appname(dbenv, + DB_APP_DATA, NULL, fname, NULL, &real_name)) != 0) + goto err; + + /* + * Open the backing file. We need to make sure that multiple + * processes attempting to create the file at the same time + * are properly ordered so that only one of them creates the + * "unique" file id, so we open it O_EXCL and O_CREAT so two + * simultaneous attempts to create the region will return + * failure in one of the attempts. If we're one of the ones + * that fail, we simply retry without the O_CREAT flag, which + * will require that the meta-data page exist. + */ +#undef OKFLAGS +#define OKFLAGS \ + DB_CREATE | DB_NOMMAP | DB_RDONLY | DB_THREAD | DB_TRUNCATE + retry_cnt = 0; +open_retry: if (LF_ISSET(DB_CREATE)) { + if ((ret = __db_fdopen(real_name, flags | DB_EXCL, + OKFLAGS | DB_EXCL, mode, &fd)) != 0) + if (ret == EEXIST) { + LF_CLR(DB_CREATE); + goto open_retry; + } else { + __db_err(dbenv, + "%s: %s", fname, strerror(ret)); + goto err; + } + } else + if ((ret = __db_fdopen(real_name, + flags, OKFLAGS, mode, &fd)) != 0) { + __db_err(dbenv, "%s: %s", fname, strerror(ret)); + goto err; + } + + /* + * Use the optimum I/O size as the pagesize if a pagesize not + * specified. Some filesystems have 64K as their optimum I/O + * size, but as that results in impossibly large default cache + * sizes, we limit the default pagesize to 16K. + */ + if (dbp->pgsize == 0) { + if ((ret = __db_stat(dbp->dbenv, + real_name, fd, NULL, &io)) != 0) + goto err; + if (io < 512) + io = 512; + if (io > 16 * 1024) + io = 16 * 1024; + dbp->pgsize = io; + F_SET(dbp, DB_AM_PGDEF); + } + + /* + * Try and read the first disk sector -- this code assumes + * that the meta-data for all access methods fits in 512 + * bytes, and that no database will be smaller than that. + */ + if ((ret = __db_read(fd, mbuf, sizeof(mbuf), &nr)) != 0) + goto err; + + /* The fd is no longer needed. */ + (void)__db_close(fd); + fd = -1; + + if (nr != sizeof(mbuf)) { + if (nr != 0) { + __db_err(dbenv, + "%s: unexpected file format", fname); + goto einval; + } + /* + * The only way we can reach here with the DB_CREATE + * flag set is if we created the file. If we didn't + * create the file, there's a chance that someone else + * is busily doing so. Sleep and give them a chance, + * because we need the metadata page their going to + * write. + */ + if (!LF_ISSET(DB_CREATE) && retry_cnt++ < 3) { + __db_sleep(1, 0); + goto open_retry; + } + if (type == DB_UNKNOWN) { + __db_err(dbenv, + "%s: DBTYPE of unknown with empty file", + fname); + goto einval; + } + goto empty; + } + + /* + * A found file overrides some user information. We'll check + * for possible error conditions based on conflicts between + * the file and the user's arguments below. + */ + swapped = 0; + F_CLR(dbp, DB_AM_SWAP); + +retry: switch (((BTMETA *)mbuf)->magic) { + case DB_BTREEMAGIC: + if (type != DB_BTREE && + type != DB_RECNO && type != DB_UNKNOWN) + goto einval; + + btm = (BTMETA *)mbuf; + if (swapped && (ret = __bam_mswap((PAGE *)btm)) != 0) + goto err; + + if (btm->version < DB_BTREEOLDVER || + btm->version > DB_BTREEVERSION) { + __db_err(dbenv, + "%s: unsupported btree version number %lu", + fname, (u_long)btm->version); + goto einval; + } + dbp->pgsize = btm->pagesize; + F_CLR(dbp, DB_AM_PGDEF); + + if ((ret = __db_fchk(dbenv, + "db_open", btm->flags, BTM_MASK)) != 0) + goto err; + DBINFO_FCHK(dbp, "DB_DUP", + btm->flags, BTM_DUP, DB_AM_DUP); + if (F_ISSET(btm, BTM_RECNO)) { + DBINFO_FCHK(dbp, "DB_FIXEDLEN", + btm->flags, BTM_FIXEDLEN, DB_RE_FIXEDLEN); + DBINFO_FCHK(dbp, "DB_RENUMBER", + btm->flags, BTM_RENUMBER, DB_RE_RENUMBER); + type = DB_RECNO; + } else { + DBINFO_FCHK(dbp, "DB_RECNUM", + btm->flags, BTM_RECNUM, DB_BT_RECNUM); + type = DB_BTREE; + } + + /* Copy the file's unique id. */ + need_fileid = 0; + memcpy(dbp->lock.fileid, btm->uid, DB_FILE_ID_LEN); + break; + case DB_HASHMAGIC: + if (type != DB_HASH && type != DB_UNKNOWN) + goto einval; + + hashm = (HASHHDR *)mbuf; + if (swapped && (ret = __ham_mswap((PAGE *)hashm)) != 0) + goto err; + + if (hashm->version < DB_HASHOLDVER || + hashm->version > DB_HASHVERSION) { + __db_err(dbenv, + "%s: unsupported hash version number %lu", + fname, hashm->version); + goto einval; + } + dbp->pgsize = hashm->pagesize; + F_CLR(dbp, DB_AM_PGDEF); + + if ((ret = __db_fchk(dbenv, + "db_open", hashm->flags, DB_HASH_DUP)) != 0) + goto err; + DBINFO_FCHK(dbp, "DB_DUP", + hashm->flags, DB_HASH_DUP, DB_AM_DUP); + type = DB_HASH; + + /* Copy the file's unique id. */ + need_fileid = 0; + memcpy(dbp->lock.fileid, hashm->uid, DB_FILE_ID_LEN); + break; + default: + if (swapped) { + __db_err(dbenv, "unrecognized file type"); + goto einval; + } + M_32_SWAP(((BTMETA *)mbuf)->magic); + F_SET(dbp, DB_AM_SWAP); + + swapped = 1; + goto retry; + } + } else { + fname = real_name = NULL; + + if (type == DB_UNKNOWN) { + __db_err(dbenv, + "DBTYPE of unknown without existing file"); + goto einval; + } + F_SET(dbp, DB_AM_INMEM); + } + +empty: /* + * By the time we get here we've either set the type or we're taking + * it from the user. + */ + dbp->type = type; + + /* + * Set the page size to the best value for I/O to this file. Don't + * overflow the page offset type. The page size must be db_indx_t + * aligned and >= MIN_PAGE_SIZE. + * + * XXX + * Should we be checking for a page size that's not a multiple of 512? + */ + if (dbp->pgsize == 0) { + F_SET(dbp, DB_AM_PGDEF); + dbp->pgsize = 8 * 1024; + } + if (dbp->pgsize < DB_MIN_PGSIZE || + dbp->pgsize > DB_MAX_PGSIZE || + dbp->pgsize & (sizeof(db_indx_t) - 1)) { + __db_err(dbenv, "illegal page size"); + goto einval; + } + + /* + * Set and/or correct the cache size; must be a multiple of the + * page size. + */ + if (dbinfo == NULL || dbinfo->db_cachesize == 0) + cachesize = dbp->pgsize * DB_MINCACHE; + else { + cachesize = dbinfo->db_cachesize; + if (cachesize & (dbp->pgsize - 1)) + cachesize += (~cachesize & (dbp->pgsize - 1)) + 1; + if (cachesize < dbp->pgsize * DB_MINCACHE) + cachesize = dbp->pgsize * DB_MINCACHE; + if (cachesize < 20 * 1024) + cachesize = 20 * 1024; + } + + /* + * If no mpool supplied by the application, attach to a local, + * created buffer pool. + * + * XXX + * If the user has a DB_ENV structure, we have to use a temporary + * one so that we don't step on their values. If the user doesn't, + * we have to create one, and keep it around until the call to the + * memp_close() function. This is all so the mpool functions get + * the error stuff right. + */ + if (dbenv == NULL || dbenv->mp_info == NULL) { + F_SET(dbp, DB_AM_MLOCAL); + + if (dbenv == NULL) { + if ((dbp->mp_dbenv = + (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) { + ret = ENOMEM; + goto err; + } + + envp = dbp->mp_dbenv; + restore = 0; + } else { + t_dbenv = *dbenv; + + envp = dbenv; + restore = 1; + } + envp->mp_size = cachesize; + F_SET(envp, DB_MPOOL_PRIVATE); + if ((ret = memp_open(NULL, + DB_CREATE, S_IRUSR | S_IWUSR, envp, &dbp->mp)) != 0) + goto err; + if (restore) + *dbenv = t_dbenv; + } else + dbp->mp = dbenv->mp_info; + + /* Register DB's pgin/pgout functions. */ + if ((ret = memp_register(dbp->mp, + DB_FTYPE_BTREE, __bam_pgin, __bam_pgout)) != 0) + goto err; + if ((ret = memp_register(dbp->mp, + DB_FTYPE_HASH, __ham_pgin, __ham_pgout)) != 0) + goto err; + + /* + * If we don't already have one, get a unique file ID. If the file + * is a temporary file, then we have to create a unique file ID -- + * no backing file will be created until the mpool cache is filled + * forcing it to go to disk. The created ID must never match any + * potential real file ID -- we know it won't because real file IDs + * contain a time stamp after the dev/ino pair, and we're simply + * storing a 4-byte locker ID. + * + * XXX + * Store the file id in the locker structure -- we can get it from + * there as necessary, and it saves having two copies. + */ + if (need_fileid) + if (fname == NULL) { + memset(dbp->lock.fileid, 0, DB_FILE_ID_LEN); + if (F_ISSET(dbp, DB_AM_LOCKING) && + (ret = lock_id(dbenv->lk_info, + (u_int32_t *)dbp->lock.fileid)) != 0) + goto err; + } else + if ((ret = __db_fileid(dbenv, + real_name, 1, dbp->lock.fileid)) != 0) + goto err; + + /* No further use for the real name. */ + if (real_name != NULL) + FREES(real_name); + real_name = NULL; + + /* + * Open a backing file in the memory pool. + * + * If we need to process the file's pages on I/O, set the file type. + * If it's a hash file, always call pgin and pgout routines. This + * means that hash files can never be mapped into process memory. If + * it's a btree file and requires swapping, we need to page the file + * in and out. This has to be right -- we can't mmap files that are + * being paged in and out. + */ + if (type == DB_HASH) + ftype = DB_FTYPE_HASH; + else + ftype = F_ISSET(dbp, DB_AM_SWAP) ? DB_FTYPE_BTREE : 0; + pginfo.db_pagesize = dbp->pgsize; + pginfo.needswap = F_ISSET(dbp, DB_AM_SWAP); + pgcookie.data = &pginfo; + pgcookie.size = sizeof(DB_PGINFO); + + if ((ret = memp_fopen(dbp->mp, fname, ftype, + F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0, 0, dbp->pgsize, + 0, &pgcookie, dbp->lock.fileid, &dbp->mpf)) != 0) + goto err; + + /* Get a log file id. */ + if (F_ISSET(dbp, DB_AM_LOGGING) && + (ret = log_register(dbenv->lg_info, + dbp, fname, type, &dbp->log_fileid)) != 0) + goto err; + + /* + * Get a locker id for this DB, and build the lock cookie: the first + * db_pgno_t bytes are the page number, the next N bytes are the file + * id. + */ + if (F_ISSET(dbp, DB_AM_LOCKING)) { + if ((ret = lock_id(dbenv->lk_info, &dbp->locker)) != 0) + goto err; + dbp->lock_dbt.size = sizeof(dbp->lock); + dbp->lock_dbt.data = &dbp->lock; + } + + /* Call the real open function. */ + switch (type) { + case DB_BTREE: + if (dbinfo != NULL && (ret = __db_fchk(dbenv, + "db_open", dbinfo->flags, DB_RECNUM | DB_DUP)) != 0) + goto err; + if (dbinfo != NULL && (ret = __db_fcchk(dbenv, + "db_open", dbinfo->flags, DB_DUP, DB_RECNUM)) != 0) + goto err; + if ((ret = __bam_open(dbp, type, dbinfo)) != 0) + goto err; + break; + case DB_HASH: + if (dbinfo != NULL && (ret = __db_fchk(dbenv, + "db_open", dbinfo->flags, DB_DUP)) != 0) + goto err; + if ((ret = __ham_open(dbp, dbinfo)) != 0) + goto err; + break; + case DB_RECNO: +#define DB_INFO_FLAGS \ + (DB_DELIMITER | DB_FIXEDLEN | DB_PAD | DB_RENUMBER | DB_SNAPSHOT) + if (dbinfo != NULL && (ret = __db_fchk(dbenv, + "db_open", dbinfo->flags, DB_INFO_FLAGS)) != 0) + goto err; + if ((ret = __ram_open(dbp, type, dbinfo)) != 0) + goto err; + break; + default: + abort(); + } + + /* Call a local close routine. */ + dbp->close = db_close; + dbp->fd = db_fd; + + *dbpp = dbp; + return (0); + +einval: ret = EINVAL; +err: /* Close the file descriptor. */ + if (fd != -1) + (void)__db_close(fd); + + /* Discard the log file id. */ + if (dbp->log_fileid != 0) + (void)log_unregister(dbenv->lg_info, dbp->log_fileid); + + /* Close the memory pool file. */ + if (dbp->mpf != NULL) + (void)memp_fclose(dbp->mpf); + + /* If the memory pool was local, close it. */ + if (F_ISSET(dbp, DB_AM_MLOCAL) && dbp->mp != NULL) + (void)memp_close(dbp->mp); + + /* If we allocated a DB_ENV, discard it. */ + if (dbp->mp_dbenv != NULL) + FREE(dbp->mp_dbenv, sizeof(DB_ENV)); + + if (real_name != NULL) + FREES(real_name); + if (dbp != NULL) + FREE(dbp, sizeof(DB)); + + return (ret); +} + +/* + * db_close -- + * Close a DB tree. + */ +static int +db_close(dbp, flags) + DB *dbp; + int flags; +{ + DBC *dbc; + DB *tdbp; + int ret, t_ret; + + ret = 0; + + /* Sync the underlying file. */ + if (!LF_ISSET(DB_NOSYNC) && + (t_ret = dbp->sync(dbp, 0)) != 0 && ret == 0) + ret = t_ret; + + /* + * Call the underlying access method close routine for all the + * cursors and handles. + */ + for (tdbp = LIST_FIRST(&dbp->handleq); + tdbp != NULL; tdbp = LIST_NEXT(tdbp, links)) { + + while ((dbc = TAILQ_FIRST(&tdbp->curs_queue)) != NULL) + if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + switch (tdbp->type) { + case DB_BTREE: + if ((t_ret = __bam_close(tdbp)) != 0 && ret == 0) + ret = t_ret; + break; + case DB_HASH: + if ((t_ret = __ham_close(tdbp)) != 0 && ret == 0) + ret = t_ret; + break; + case DB_RECNO: + if ((t_ret = __ram_close(tdbp)) != 0 && ret == 0) + ret = t_ret; + break; + default: + abort(); + } + + } + + /* Sync the memory pool. */ + if ((t_ret = memp_fsync(dbp->mpf)) != 0 && ret == 0) + ret = t_ret; + + /* Close the memory pool file. */ + if ((t_ret = memp_fclose(dbp->mpf)) != 0 && ret == 0) + ret = t_ret; + + /* If the memory pool was local, close it. */ + if (F_ISSET(dbp, DB_AM_MLOCAL) && + (t_ret = memp_close(dbp->mp)) != 0 && ret == 0) + ret = t_ret; + + /* Discard the mutex. */ + if (dbp->mutex != NULL) + FREE(dbp->mutex, sizeof(db_mutex_t)); + + /* Discard the log file id. */ + if (F_ISSET(dbp, DB_AM_LOGGING)) + (void)log_unregister(dbp->dbenv->lg_info, dbp->log_fileid); + + /* Discard the lock cookie for all handles. */ + for (tdbp = LIST_FIRST(&dbp->handleq); + tdbp != NULL; tdbp = LIST_NEXT(tdbp, links)) + if (F_ISSET(tdbp, DB_AM_LOCKING)) { +#ifdef DEBUG + DB_LOCKREQ request; + + /* + * If we're running tests, display any locks currently + * held. It's possible that some applications may hold + * locks for long periods, e.g., conference room locks, + * but the DB tests should never close holding locks. + */ + request.op = DB_LOCK_DUMP; + if ((t_ret = lock_vec(tdbp->dbenv->lk_info, + tdbp->locker, 0, &request, 1, NULL)) != 0 && + ret == 0) + ret = EAGAIN; +#endif + } + + /* If we allocated a DB_ENV, discard it. */ + if (dbp->mp_dbenv != NULL) + FREE(dbp->mp_dbenv, sizeof(DB_ENV)); + + /* Free all of the DB's. */ + LIST_REMOVE(dbp, links); + while ((tdbp = LIST_FIRST(&dbp->handleq)) != NULL) { + LIST_REMOVE(tdbp, links); + FREE(tdbp, sizeof(*tdbp)); + } + FREE(dbp, sizeof(*dbp)); + + return (ret); +} + +/* + * db_fd -- + * Return a file descriptor for flock'ing. + */ +static int +db_fd(dbp, fdp) + DB *dbp; + int *fdp; +{ + /* In-memory database can't have a file descriptor. */ + if (F_ISSET(dbp, DB_AM_INMEM)) + return (ENOENT); + + /* + * XXX + * Truly spectacular layering violation. As we don't open the + * underlying file until we need it, it may not be initialized. + */ + if ((*fdp = dbp->mpf->fd) == -1) + return (ENOENT); + return (0); +} + +/* + * __db_pgerr -- + * Error when unable to retrieve a specified page. + * + * PUBLIC: int __db_pgerr __P((DB *, db_pgno_t)); + */ +int +__db_pgerr(dbp, pgno) + DB *dbp; + db_pgno_t pgno; +{ + __db_err(dbp->dbenv, + "unable to create/retrieve page %lu", (u_long)pgno); + return (__db_panic(dbp)); +} + +/* + * __db_pgfmt -- + * Error when a page has the wrong format. + * + * PUBLIC: int __db_pgfmt __P((DB *, db_pgno_t)); + */ +int +__db_pgfmt(dbp, pgno) + DB *dbp; + db_pgno_t pgno; +{ + __db_err(dbp->dbenv, + "page %lu: illegal page type or format", (u_long)pgno); + return (__db_panic(dbp)); +} diff --git a/db2/db/db.src b/db2/db/db.src new file mode 100644 index 0000000000..a3e2f7b75c --- /dev/null +++ b/db2/db/db.src @@ -0,0 +1,154 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + * @(#)db.src 10.3 (Sleepycat) 8/18/97 + */ +#include "config.h" + +PREFIX db + +/* + * addrem -- Add or remove an entry from a duplicate page. + * + * opcode: identifies if this is an add or delete. + * fileid: file identifier of the file being modified. + * pgno: duplicate page number. + * indx: location at which to insert or delete. + * nbytes: number of bytes added/removed to/from the page. + * hdr: header for the data item. + * dbt: data that is deleted or is to be added. + * pagelsn: former lsn of the page. + * + * If the hdr was NULL then, the dbt is a regular B_KEYDATA. + * If the dbt was NULL then the hdr is a complete item to be + * pasted on the page. + */ +BEGIN addrem +ARG opcode u_int32_t lu +ARG fileid u_int32_t lu +ARG pgno db_pgno_t lu +ARG indx u_int32_t lu +ARG nbytes size_t lu +DBT hdr DBT s +DBT dbt DBT s +POINTER pagelsn DB_LSN * lu +END + +/* + * split -- Handles the split of a duplicate page. + * + * opcode: defines whether we are splitting from or splitting onto + * fileid: file identifier of the file being modified. + * pgno: page number being split. + * pageimage: entire page contents. + * pagelsn: former lsn of the page. + */ +BEGIN split +ARG opcode u_int32_t lu +ARG fileid u_int32_t lu +ARG pgno db_pgno_t lu +DBT pageimage DBT s +POINTER pagelsn DB_LSN * lu +END + +/* + * big -- Handles addition and deletion of big key/data items. + * + * opcode: identifies get/put. + * fileid: file identifier of the file being modified. + * pgno: page onto which data is being added/removed. + * prev_pgno: the page before the one we are logging. + * next_pgno: the page after the one we are logging. + * dbt: data being written onto the page. + * pagelsn: former lsn of the orig_page. + * prevlsn: former lsn of the prev_pgno. + * nextlsn: former lsn of the next_pgno. This is not currently used, but + * may be used later if we actually do overwrites of big key/ + * data items in place. + */ +BEGIN big +ARG opcode u_int32_t lu +ARG fileid u_int32_t lu +ARG pgno db_pgno_t lu +ARG prev_pgno db_pgno_t lu +ARG next_pgno db_pgno_t lu +DBT dbt DBT s +POINTER pagelsn DB_LSN * lu +POINTER prevlsn DB_LSN * lu +POINTER nextlsn DB_LSN * lu +END + +/* + * ovref -- Handles increment of overflow page reference count. + * + * fileid: identifies the file being modified. + * pgno: page number being incremented. + * lsn the page's original lsn. + */ +BEGIN ovref +ARG fileid u_int32_t lu +ARG pgno db_pgno_t lu +POINTER lsn DB_LSN * lu +END + +/* + * relink -- Handles relinking around a page. + * + * pgno: the page being changed. + * lsn the page's original lsn. + * prev: the previous page. + * lsn_prev: the previous page's original lsn. + * next: the next page. + * lsn_next: the previous page's original lsn. + */ +BEGIN relink +ARG fileid u_int32_t lu +ARG pgno db_pgno_t lu +POINTER lsn DB_LSN * lu +ARG prev db_pgno_t lu +POINTER lsn_prev DB_LSN * lu +ARG next db_pgno_t lu +POINTER lsn_next DB_LSN * lu +END + +/* + * Addpage -- Handles adding a new duplicate page onto the end of + * an existing duplicate page. + * fileid: identifies the file being changed. + * pgno: page number to which a new page is being added. + * lsn: lsn of pgno + * nextpgno: new page number being added. + * nextlsn: lsn of nextpgno; + */ +BEGIN addpage +ARG fileid u_int32_t lu +ARG pgno db_pgno_t lu +POINTER lsn DB_LSN * lu +ARG nextpgno db_pgno_t lu +POINTER nextlsn DB_LSN * lu +END + +/* + * Debug -- log an operation upon entering an access method. + * op: Operation (cursor, c_close, c_get, c_put, c_del, + * get, put, delete). + * fileid: identifies the file being acted upon. + * key: key paramater + * data: data parameter + * flags: flags parameter + */ +BEGIN debug +DBT op DBT s +ARG fileid u_int32_t lu +DBT key DBT s +DBT data DBT s +ARG arg_flags u_int32_t lu +END + +/* + * noop -- do nothing, but get an LSN. + */ +BEGIN noop +END diff --git a/db2/db/db_auto.c b/db2/db/db_auto.c new file mode 100644 index 0000000000..4684f1a39f --- /dev/null +++ b/db2/db/db_auto.c @@ -0,0 +1,1462 @@ +/* Do not edit: automatically built by dist/db_gen.sh. */ +#include "config.h" + +#ifndef NO_SYSTEM_INCLUDES +#include <ctype.h> +#include <errno.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "shqueue.h" +#include "db_page.h" +#include "db_dispatch.h" +#include "db_am.h" +#include "common_ext.h" + +/* + * PUBLIC: int __db_addrem_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, u_int32_t, + * PUBLIC: size_t, DBT *, DBT *, DB_LSN *)); + */ +int __db_addrem_log(logp, txnid, ret_lsnp, flags, + opcode, fileid, pgno, indx, nbytes, hdr, + dbt, pagelsn) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t opcode; + u_int32_t fileid; + db_pgno_t pgno; + u_int32_t indx; + size_t nbytes; + DBT *hdr; + DBT *dbt; + DB_LSN * pagelsn; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t zero; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_db_addrem; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(opcode) + + sizeof(fileid) + + sizeof(pgno) + + sizeof(indx) + + sizeof(nbytes) + + sizeof(u_int32_t) + (hdr == NULL ? 0 : hdr->size) + + sizeof(u_int32_t) + (dbt == NULL ? 0 : dbt->size) + + sizeof(*pagelsn); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &opcode, sizeof(opcode)); + bp += sizeof(opcode); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &pgno, sizeof(pgno)); + bp += sizeof(pgno); + memcpy(bp, &indx, sizeof(indx)); + bp += sizeof(indx); + memcpy(bp, &nbytes, sizeof(nbytes)); + bp += sizeof(nbytes); + if (hdr == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &hdr->size, sizeof(hdr->size)); + bp += sizeof(hdr->size); + memcpy(bp, hdr->data, hdr->size); + bp += hdr->size; + } + if (dbt == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &dbt->size, sizeof(dbt->size)); + bp += sizeof(dbt->size); + memcpy(bp, dbt->data, dbt->size); + bp += dbt->size; + } + if (pagelsn != NULL) + memcpy(bp, pagelsn, sizeof(*pagelsn)); + else + memset(bp, 0, sizeof(*pagelsn)); + bp += sizeof(*pagelsn); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __db_addrem_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__db_addrem_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __db_addrem_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __db_addrem_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]db_addrem: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\topcode: %lu\n", (u_long)argp->opcode); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tindx: %lu\n", (u_long)argp->indx); + printf("\tnbytes: %lu\n", (u_long)argp->nbytes); + printf("\thdr: "); + for (i = 0; i < argp->hdr.size; i++) { + c = ((char *)argp->hdr.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\tdbt: "); + for (i = 0; i < argp->dbt.size; i++) { + c = ((char *)argp->dbt.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\tpagelsn: [%lu][%lu]\n", + (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __db_addrem_read __P((void *, __db_addrem_args **)); + */ +int +__db_addrem_read(recbuf, argpp) + void *recbuf; + __db_addrem_args **argpp; +{ + __db_addrem_args *argp; + u_int8_t *bp; + + argp = (__db_addrem_args *)malloc(sizeof(__db_addrem_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->opcode, bp, sizeof(argp->opcode)); + bp += sizeof(argp->opcode); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->pgno, bp, sizeof(argp->pgno)); + bp += sizeof(argp->pgno); + memcpy(&argp->indx, bp, sizeof(argp->indx)); + bp += sizeof(argp->indx); + memcpy(&argp->nbytes, bp, sizeof(argp->nbytes)); + bp += sizeof(argp->nbytes); + memcpy(&argp->hdr.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->hdr.data = bp; + bp += argp->hdr.size; + memcpy(&argp->dbt.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->dbt.data = bp; + bp += argp->dbt.size; + memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); + bp += sizeof(argp->pagelsn); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __db_split_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, DBT *, + * PUBLIC: DB_LSN *)); + */ +int __db_split_log(logp, txnid, ret_lsnp, flags, + opcode, fileid, pgno, pageimage, pagelsn) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t opcode; + u_int32_t fileid; + db_pgno_t pgno; + DBT *pageimage; + DB_LSN * pagelsn; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t zero; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_db_split; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(opcode) + + sizeof(fileid) + + sizeof(pgno) + + sizeof(u_int32_t) + (pageimage == NULL ? 0 : pageimage->size) + + sizeof(*pagelsn); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &opcode, sizeof(opcode)); + bp += sizeof(opcode); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &pgno, sizeof(pgno)); + bp += sizeof(pgno); + if (pageimage == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &pageimage->size, sizeof(pageimage->size)); + bp += sizeof(pageimage->size); + memcpy(bp, pageimage->data, pageimage->size); + bp += pageimage->size; + } + if (pagelsn != NULL) + memcpy(bp, pagelsn, sizeof(*pagelsn)); + else + memset(bp, 0, sizeof(*pagelsn)); + bp += sizeof(*pagelsn); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __db_split_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__db_split_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __db_split_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __db_split_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]db_split: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\topcode: %lu\n", (u_long)argp->opcode); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tpageimage: "); + for (i = 0; i < argp->pageimage.size; i++) { + c = ((char *)argp->pageimage.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\tpagelsn: [%lu][%lu]\n", + (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __db_split_read __P((void *, __db_split_args **)); + */ +int +__db_split_read(recbuf, argpp) + void *recbuf; + __db_split_args **argpp; +{ + __db_split_args *argp; + u_int8_t *bp; + + argp = (__db_split_args *)malloc(sizeof(__db_split_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->opcode, bp, sizeof(argp->opcode)); + bp += sizeof(argp->opcode); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->pgno, bp, sizeof(argp->pgno)); + bp += sizeof(argp->pgno); + memcpy(&argp->pageimage.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->pageimage.data = bp; + bp += argp->pageimage.size; + memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); + bp += sizeof(argp->pagelsn); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __db_big_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, db_pgno_t, + * PUBLIC: db_pgno_t, DBT *, DB_LSN *, DB_LSN *, + * PUBLIC: DB_LSN *)); + */ +int __db_big_log(logp, txnid, ret_lsnp, flags, + opcode, fileid, pgno, prev_pgno, next_pgno, dbt, + pagelsn, prevlsn, nextlsn) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t opcode; + u_int32_t fileid; + db_pgno_t pgno; + db_pgno_t prev_pgno; + db_pgno_t next_pgno; + DBT *dbt; + DB_LSN * pagelsn; + DB_LSN * prevlsn; + DB_LSN * nextlsn; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t zero; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_db_big; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(opcode) + + sizeof(fileid) + + sizeof(pgno) + + sizeof(prev_pgno) + + sizeof(next_pgno) + + sizeof(u_int32_t) + (dbt == NULL ? 0 : dbt->size) + + sizeof(*pagelsn) + + sizeof(*prevlsn) + + sizeof(*nextlsn); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &opcode, sizeof(opcode)); + bp += sizeof(opcode); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &pgno, sizeof(pgno)); + bp += sizeof(pgno); + memcpy(bp, &prev_pgno, sizeof(prev_pgno)); + bp += sizeof(prev_pgno); + memcpy(bp, &next_pgno, sizeof(next_pgno)); + bp += sizeof(next_pgno); + if (dbt == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &dbt->size, sizeof(dbt->size)); + bp += sizeof(dbt->size); + memcpy(bp, dbt->data, dbt->size); + bp += dbt->size; + } + if (pagelsn != NULL) + memcpy(bp, pagelsn, sizeof(*pagelsn)); + else + memset(bp, 0, sizeof(*pagelsn)); + bp += sizeof(*pagelsn); + if (prevlsn != NULL) + memcpy(bp, prevlsn, sizeof(*prevlsn)); + else + memset(bp, 0, sizeof(*prevlsn)); + bp += sizeof(*prevlsn); + if (nextlsn != NULL) + memcpy(bp, nextlsn, sizeof(*nextlsn)); + else + memset(bp, 0, sizeof(*nextlsn)); + bp += sizeof(*nextlsn); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __db_big_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__db_big_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __db_big_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __db_big_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]db_big: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\topcode: %lu\n", (u_long)argp->opcode); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tprev_pgno: %lu\n", (u_long)argp->prev_pgno); + printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno); + printf("\tdbt: "); + for (i = 0; i < argp->dbt.size; i++) { + c = ((char *)argp->dbt.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\tpagelsn: [%lu][%lu]\n", + (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); + printf("\tprevlsn: [%lu][%lu]\n", + (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset); + printf("\tnextlsn: [%lu][%lu]\n", + (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __db_big_read __P((void *, __db_big_args **)); + */ +int +__db_big_read(recbuf, argpp) + void *recbuf; + __db_big_args **argpp; +{ + __db_big_args *argp; + u_int8_t *bp; + + argp = (__db_big_args *)malloc(sizeof(__db_big_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->opcode, bp, sizeof(argp->opcode)); + bp += sizeof(argp->opcode); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->pgno, bp, sizeof(argp->pgno)); + bp += sizeof(argp->pgno); + memcpy(&argp->prev_pgno, bp, sizeof(argp->prev_pgno)); + bp += sizeof(argp->prev_pgno); + memcpy(&argp->next_pgno, bp, sizeof(argp->next_pgno)); + bp += sizeof(argp->next_pgno); + memcpy(&argp->dbt.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->dbt.data = bp; + bp += argp->dbt.size; + memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); + bp += sizeof(argp->pagelsn); + memcpy(&argp->prevlsn, bp, sizeof(argp->prevlsn)); + bp += sizeof(argp->prevlsn); + memcpy(&argp->nextlsn, bp, sizeof(argp->nextlsn)); + bp += sizeof(argp->nextlsn); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __db_ovref_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *)); + */ +int __db_ovref_log(logp, txnid, ret_lsnp, flags, + fileid, pgno, lsn) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN * lsn; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_db_ovref; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(fileid) + + sizeof(pgno) + + sizeof(*lsn); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &pgno, sizeof(pgno)); + bp += sizeof(pgno); + if (lsn != NULL) + memcpy(bp, lsn, sizeof(*lsn)); + else + memset(bp, 0, sizeof(*lsn)); + bp += sizeof(*lsn); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __db_ovref_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__db_ovref_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __db_ovref_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __db_ovref_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]db_ovref: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tlsn: [%lu][%lu]\n", + (u_long)argp->lsn.file, (u_long)argp->lsn.offset); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __db_ovref_read __P((void *, __db_ovref_args **)); + */ +int +__db_ovref_read(recbuf, argpp) + void *recbuf; + __db_ovref_args **argpp; +{ + __db_ovref_args *argp; + u_int8_t *bp; + + argp = (__db_ovref_args *)malloc(sizeof(__db_ovref_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->pgno, bp, sizeof(argp->pgno)); + bp += sizeof(argp->pgno); + memcpy(&argp->lsn, bp, sizeof(argp->lsn)); + bp += sizeof(argp->lsn); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __db_relink_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, + * PUBLIC: DB_LSN *, db_pgno_t, DB_LSN *)); + */ +int __db_relink_log(logp, txnid, ret_lsnp, flags, + fileid, pgno, lsn, prev, lsn_prev, next, + lsn_next) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN * lsn; + db_pgno_t prev; + DB_LSN * lsn_prev; + db_pgno_t next; + DB_LSN * lsn_next; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_db_relink; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(fileid) + + sizeof(pgno) + + sizeof(*lsn) + + sizeof(prev) + + sizeof(*lsn_prev) + + sizeof(next) + + sizeof(*lsn_next); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &pgno, sizeof(pgno)); + bp += sizeof(pgno); + if (lsn != NULL) + memcpy(bp, lsn, sizeof(*lsn)); + else + memset(bp, 0, sizeof(*lsn)); + bp += sizeof(*lsn); + memcpy(bp, &prev, sizeof(prev)); + bp += sizeof(prev); + if (lsn_prev != NULL) + memcpy(bp, lsn_prev, sizeof(*lsn_prev)); + else + memset(bp, 0, sizeof(*lsn_prev)); + bp += sizeof(*lsn_prev); + memcpy(bp, &next, sizeof(next)); + bp += sizeof(next); + if (lsn_next != NULL) + memcpy(bp, lsn_next, sizeof(*lsn_next)); + else + memset(bp, 0, sizeof(*lsn_next)); + bp += sizeof(*lsn_next); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __db_relink_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__db_relink_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __db_relink_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __db_relink_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]db_relink: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tlsn: [%lu][%lu]\n", + (u_long)argp->lsn.file, (u_long)argp->lsn.offset); + printf("\tprev: %lu\n", (u_long)argp->prev); + printf("\tlsn_prev: [%lu][%lu]\n", + (u_long)argp->lsn_prev.file, (u_long)argp->lsn_prev.offset); + printf("\tnext: %lu\n", (u_long)argp->next); + printf("\tlsn_next: [%lu][%lu]\n", + (u_long)argp->lsn_next.file, (u_long)argp->lsn_next.offset); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __db_relink_read __P((void *, __db_relink_args **)); + */ +int +__db_relink_read(recbuf, argpp) + void *recbuf; + __db_relink_args **argpp; +{ + __db_relink_args *argp; + u_int8_t *bp; + + argp = (__db_relink_args *)malloc(sizeof(__db_relink_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->pgno, bp, sizeof(argp->pgno)); + bp += sizeof(argp->pgno); + memcpy(&argp->lsn, bp, sizeof(argp->lsn)); + bp += sizeof(argp->lsn); + memcpy(&argp->prev, bp, sizeof(argp->prev)); + bp += sizeof(argp->prev); + memcpy(&argp->lsn_prev, bp, sizeof(argp->lsn_prev)); + bp += sizeof(argp->lsn_prev); + memcpy(&argp->next, bp, sizeof(argp->next)); + bp += sizeof(argp->next); + memcpy(&argp->lsn_next, bp, sizeof(argp->lsn_next)); + bp += sizeof(argp->lsn_next); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __db_addpage_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, + * PUBLIC: DB_LSN *)); + */ +int __db_addpage_log(logp, txnid, ret_lsnp, flags, + fileid, pgno, lsn, nextpgno, nextlsn) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN * lsn; + db_pgno_t nextpgno; + DB_LSN * nextlsn; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_db_addpage; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(fileid) + + sizeof(pgno) + + sizeof(*lsn) + + sizeof(nextpgno) + + sizeof(*nextlsn); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &pgno, sizeof(pgno)); + bp += sizeof(pgno); + if (lsn != NULL) + memcpy(bp, lsn, sizeof(*lsn)); + else + memset(bp, 0, sizeof(*lsn)); + bp += sizeof(*lsn); + memcpy(bp, &nextpgno, sizeof(nextpgno)); + bp += sizeof(nextpgno); + if (nextlsn != NULL) + memcpy(bp, nextlsn, sizeof(*nextlsn)); + else + memset(bp, 0, sizeof(*nextlsn)); + bp += sizeof(*nextlsn); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __db_addpage_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__db_addpage_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __db_addpage_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __db_addpage_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]db_addpage: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tlsn: [%lu][%lu]\n", + (u_long)argp->lsn.file, (u_long)argp->lsn.offset); + printf("\tnextpgno: %lu\n", (u_long)argp->nextpgno); + printf("\tnextlsn: [%lu][%lu]\n", + (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __db_addpage_read __P((void *, __db_addpage_args **)); + */ +int +__db_addpage_read(recbuf, argpp) + void *recbuf; + __db_addpage_args **argpp; +{ + __db_addpage_args *argp; + u_int8_t *bp; + + argp = (__db_addpage_args *)malloc(sizeof(__db_addpage_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->pgno, bp, sizeof(argp->pgno)); + bp += sizeof(argp->pgno); + memcpy(&argp->lsn, bp, sizeof(argp->lsn)); + bp += sizeof(argp->lsn); + memcpy(&argp->nextpgno, bp, sizeof(argp->nextpgno)); + bp += sizeof(argp->nextpgno); + memcpy(&argp->nextlsn, bp, sizeof(argp->nextlsn)); + bp += sizeof(argp->nextlsn); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __db_debug_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: DBT *, u_int32_t, DBT *, DBT *, + * PUBLIC: u_int32_t)); + */ +int __db_debug_log(logp, txnid, ret_lsnp, flags, + op, fileid, key, data, arg_flags) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + DBT *op; + u_int32_t fileid; + DBT *key; + DBT *data; + u_int32_t arg_flags; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t zero; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_db_debug; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(u_int32_t) + (op == NULL ? 0 : op->size) + + sizeof(fileid) + + sizeof(u_int32_t) + (key == NULL ? 0 : key->size) + + sizeof(u_int32_t) + (data == NULL ? 0 : data->size) + + sizeof(arg_flags); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + if (op == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &op->size, sizeof(op->size)); + bp += sizeof(op->size); + memcpy(bp, op->data, op->size); + bp += op->size; + } + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + if (key == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &key->size, sizeof(key->size)); + bp += sizeof(key->size); + memcpy(bp, key->data, key->size); + bp += key->size; + } + if (data == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &data->size, sizeof(data->size)); + bp += sizeof(data->size); + memcpy(bp, data->data, data->size); + bp += data->size; + } + memcpy(bp, &arg_flags, sizeof(arg_flags)); + bp += sizeof(arg_flags); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __db_debug_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__db_debug_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __db_debug_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __db_debug_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]db_debug: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\top: "); + for (i = 0; i < argp->op.size; i++) { + c = ((char *)argp->op.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tkey: "); + for (i = 0; i < argp->key.size; i++) { + c = ((char *)argp->key.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\tdata: "); + for (i = 0; i < argp->data.size; i++) { + c = ((char *)argp->data.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\targ_flags: %lu\n", (u_long)argp->arg_flags); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __db_debug_read __P((void *, __db_debug_args **)); + */ +int +__db_debug_read(recbuf, argpp) + void *recbuf; + __db_debug_args **argpp; +{ + __db_debug_args *argp; + u_int8_t *bp; + + argp = (__db_debug_args *)malloc(sizeof(__db_debug_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->op.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->op.data = bp; + bp += argp->op.size; + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->key.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->key.data = bp; + bp += argp->key.size; + memcpy(&argp->data.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->data.data = bp; + bp += argp->data.size; + memcpy(&argp->arg_flags, bp, sizeof(argp->arg_flags)); + bp += sizeof(argp->arg_flags); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __db_noop_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t)); + */ +int __db_noop_log(logp, txnid, ret_lsnp, flags) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_db_noop; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __db_noop_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__db_noop_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __db_noop_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __db_noop_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]db_noop: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __db_noop_read __P((void *, __db_noop_args **)); + */ +int +__db_noop_read(recbuf, argpp) + void *recbuf; + __db_noop_args **argpp; +{ + __db_noop_args *argp; + u_int8_t *bp; + + argp = (__db_noop_args *)malloc(sizeof(__db_noop_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __db_init_print __P((DB_ENV *)); + */ +int +__db_init_print(dbenv) + DB_ENV *dbenv; +{ + int ret; + + if ((ret = __db_add_recovery(dbenv, + __db_addrem_print, DB_db_addrem)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_split_print, DB_db_split)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_big_print, DB_db_big)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_ovref_print, DB_db_ovref)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_relink_print, DB_db_relink)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_addpage_print, DB_db_addpage)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_debug_print, DB_db_debug)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_noop_print, DB_db_noop)) != 0) + return (ret); + return (0); +} + +/* + * PUBLIC: int __db_init_recover __P((DB_ENV *)); + */ +int +__db_init_recover(dbenv) + DB_ENV *dbenv; +{ + int ret; + + if ((ret = __db_add_recovery(dbenv, + __db_addrem_recover, DB_db_addrem)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_split_recover, DB_db_split)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_big_recover, DB_db_big)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_ovref_recover, DB_db_ovref)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_relink_recover, DB_db_relink)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_addpage_recover, DB_db_addpage)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_debug_recover, DB_db_debug)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __db_noop_recover, DB_db_noop)) != 0) + return (ret); + return (0); +} + diff --git a/db2/db/db_conv.c b/db2/db/db_conv.c new file mode 100644 index 0000000000..39527c6804 --- /dev/null +++ b/db2/db/db_conv.c @@ -0,0 +1,219 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)db_conv.c 10.4 (Sleepycat) 8/15/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_swap.h" +#include "db_am.h" + +static int __db_convert __P((db_pgno_t, void *, int)); + +/* + * __db_pgin, __db_pgout -- + * + * PUBLIC: int __db_pgin __P((db_pgno_t, void *)); + * PUBLIC: int __db_pgout __P((db_pgno_t, void *)); + */ +int +__db_pgin(pg, pp) + db_pgno_t pg; + void *pp; +{ + return (__db_convert(pg, pp, 1)); +} + +int +__db_pgout(pg, pp) + db_pgno_t pg; + void *pp; +{ + return (__db_convert(pg, pp, 0)); +} + +/* + * __db_convert -- + * Actually convert a page. + */ +static int +__db_convert(pg, pp, pgin) + db_pgno_t pg; /* Unused, but left for the future. */ + void *pp; + int pgin; +{ + BINTERNAL *bi; + BKEYDATA *bk; + BOVERFLOW *bo; + HKEYDATA *hk; + PAGE *h; + RINTERNAL *ri; + db_indx_t i; + u_int8_t *p; + + h = pp; + if (pgin) { + M_32_SWAP(h->lsn.file); + M_32_SWAP(h->lsn.offset); + M_32_SWAP(h->pgno); + M_32_SWAP(h->prev_pgno); + M_32_SWAP(h->next_pgno); + M_16_SWAP(h->entries); + M_16_SWAP(h->hf_offset); + } + + switch (h->type) { + case P_HASH: + for (i = 0; i < NUM_ENT(h); i++) { + if (pgin) + M_16_SWAP(h->inp[i]); + + hk = GET_HKEYDATA(h, i); + switch (hk->type) { + case H_KEYDATA: + break; + case H_DUPLICATE: + case H_OFFPAGE: + p = (u_int8_t *)hk + sizeof(u_int8_t); + ++p; + SWAP32(p); /* tlen */ + SWAP32(p); /* pgno */ + SWAP16(p); /* offset */ + SWAP16(p); /* len */ + break; + } + + if (!pgin) + M_16_SWAP(h->inp[i]); + } + break; + case P_LBTREE: + case P_LRECNO: + case P_DUPLICATE: + for (i = 0; i < NUM_ENT(h); i++) { + if (pgin) + M_16_SWAP(h->inp[i]); + + bk = GET_BKEYDATA(h, i); + switch (bk->type) { + case B_KEYDATA: + M_16_SWAP(bk->len); + break; + case B_DUPLICATE: + case B_OVERFLOW: + bo = (BOVERFLOW *)bk; + M_32_SWAP(bo->tlen); + M_32_SWAP(bo->pgno); + break; + } + + if (!pgin) + M_16_SWAP(h->inp[i]); + } + break; + case P_IBTREE: + for (i = 0; i < NUM_ENT(h); i++) { + if (pgin) + M_16_SWAP(h->inp[i]); + + bi = GET_BINTERNAL(h, i); + switch (bi->type) { + case B_KEYDATA: + M_16_SWAP(bi->len); + M_32_SWAP(bi->pgno); + M_32_SWAP(bi->nrecs); + break; + case B_DUPLICATE: + case B_OVERFLOW: + bo = (BOVERFLOW *)bi; + M_32_SWAP(bo->tlen); + M_32_SWAP(bo->pgno); + break; + } + + if (!pgin) + M_16_SWAP(h->inp[i]); + } + break; + case P_IRECNO: + for (i = 0; i < NUM_ENT(h); i++) { + if (pgin) + M_16_SWAP(h->inp[i]); + + ri = GET_RINTERNAL(h, i); + M_32_SWAP(ri->pgno); + M_32_SWAP(ri->nrecs); + + if (!pgin) + M_16_SWAP(h->inp[i]); + } + case P_OVERFLOW: + case P_INVALID: + /* Nothing to do. */ + break; + default: + return (EINVAL); + } + + if (!pgin) { + /* Swap the header information. */ + M_32_SWAP(h->lsn.file); + M_32_SWAP(h->lsn.offset); + M_32_SWAP(h->pgno); + M_32_SWAP(h->prev_pgno); + M_32_SWAP(h->next_pgno); + M_16_SWAP(h->entries); + M_16_SWAP(h->hf_offset); + } + return (0); +} diff --git a/db2/db/db_dispatch.c b/db2/db/db_dispatch.c new file mode 100644 index 0000000000..3d7b162d75 --- /dev/null +++ b/db2/db/db_dispatch.c @@ -0,0 +1,270 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)db_dispatch.c 10.5 (Sleepycat) 7/2/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_dispatch.h" +#include "db_am.h" +#include "common_ext.h" + +/* + * Data structures to manage the DB dispatch table. The dispatch table + * is a dynamically allocated array of pointers to dispatch functions. + * The dispatch_size is the number of entries possible in the current + * dispatch table and the dispatch_valid is the number of valid entries + * in the dispatch table. + */ +static int (**dispatch_table) __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +static u_int32_t dispatch_size = 0; + +/* + * __db_dispatch -- + * + * This is the transaction dispatch function used by the db access methods. + * It is designed to handle the record format used by all the access + * methods (the one automatically generated by the db_{h,log,read}.sh + * scripts in the tools directory). An application using a different + * recovery paradigm will supply a different dispatch function to txn_open. + * + * PUBLIC: int __db_dispatch __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__db_dispatch(logp, db, lsnp, redo, info) + DB_LOG *logp; /* The log file. */ + DBT *db; /* The log record upon which to dispatch. */ + DB_LSN *lsnp; /* The lsn of the record being dispatched. */ + int redo; /* Redo this op (or undo it). */ + void *info; +{ + u_int32_t rectype, txnid; + + memcpy(&rectype, db->data, sizeof(rectype)); + memcpy(&txnid, (u_int8_t *)db->data + sizeof(rectype), sizeof(txnid)); + + switch (redo) { + case TXN_REDO: + case TXN_UNDO: + return ((dispatch_table[rectype])(logp, db, lsnp, redo, info)); + case TXN_OPENFILES: + if (rectype < DB_txn_BEGIN ) + return ((dispatch_table[rectype])(logp, + db, lsnp, redo, info)); + break; + case TXN_BACKWARD_ROLL: + /* + * Running full recovery in the backward pass. If we've + * seen this txnid before and added to it our commit list, + * then we do nothing during this pass. If we've never + * seen it, then we call the appropriate recovery routine + * in "abort mode". + */ + if (__db_txnlist_find(info, txnid) == DB_NOTFOUND) + return ((dispatch_table[rectype])(logp, + db, lsnp, TXN_UNDO, info)); + break; + case TXN_FORWARD_ROLL: + /* + * In the forward pass, if we haven't seen the transaction, + * do nothing, else recovery it. + */ + if (__db_txnlist_find(info, txnid) != DB_NOTFOUND) + return ((dispatch_table[rectype])(logp, + db, lsnp, TXN_REDO, info)); + break; + default: + abort(); + } + return (0); +} + +/* + * __db_add_recovery -- + * + * PUBLIC: int __db_add_recovery __P((DB_ENV *, + * PUBLIC: int (*)(DB_LOG *, DBT *, DB_LSN *, int, void *), u_int32_t)); + */ +int +__db_add_recovery(dbenv, func, ndx) + DB_ENV *dbenv; + int (*func) __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + u_int32_t ndx; +{ + u_int32_t i; + + /* Check if function is already registered. */ + if (dispatch_table && ndx < dispatch_size && + dispatch_table[ndx] != 0 && dispatch_table[ndx] != func) + return (DB_REGISTERED); + + /* Check if we have to grow the table. */ + if (ndx >= dispatch_size) { + if (dispatch_table == NULL) + dispatch_table = (int (**) + __P((DB_LOG *, DBT *, DB_LSN *, int, void *))) + malloc(DB_user_BEGIN * sizeof(dispatch_table[0])); + else + dispatch_table = (int (**) + __P((DB_LOG *, DBT *, DB_LSN *, int, void *))) + realloc(dispatch_table, (DB_user_BEGIN + + dispatch_size) * sizeof(dispatch_table[0])); + if (dispatch_table == NULL) { + __db_err(dbenv, "%s", strerror(ENOMEM)); + return (ENOMEM); + } + for (i = dispatch_size, + dispatch_size += DB_user_BEGIN; i < dispatch_size; ++i) + dispatch_table[i] = NULL; + } + + dispatch_table[ndx] = func; + return (0); +} + +/* + * __db_txnlist_init -- + * Initialize transaction linked list. + * + * PUBLIC: int __db_txnlist_init __P((void *)); + */ +int +__db_txnlist_init(retp) + void *retp; +{ + __db_txnhead *headp; + + if ((headp = + (struct __db_txnhead *)malloc(sizeof(struct __db_txnhead))) == NULL) + return (ENOMEM); + + LIST_INIT(&headp->head); + headp->maxid = 0; + + *(void **)retp = headp; + return (0); +} + +/* + * __db_txnlist_add -- + * Add an element to our transaction linked list. + * + * PUBLIC: int __db_txnlist_add __P((void *, u_int32_t)); + */ +int +__db_txnlist_add(listp, txnid) + void *listp; + u_int32_t txnid; +{ + __db_txnhead *hp; + __db_txnlist *elp; + + if ((elp = (__db_txnlist *)malloc(sizeof(__db_txnlist))) == NULL) + return (ENOMEM); + + elp->txnid = txnid; + hp = (struct __db_txnhead *)listp; + LIST_INSERT_HEAD(&hp->head, elp, links); + if (txnid > hp->maxid) + hp->maxid = txnid; + + return (0); +} + +/* + * __db_txnlist_find -- + * Checks to see if txnid is in the txnid list, returns 1 if found, + * 0 if not found. + * + * PUBLIC: int __db_txnlist_find __P((void *, u_int32_t)); + */ +int +__db_txnlist_find(listp, txnid) + void *listp; + u_int32_t txnid; +{ + __db_txnlist *p; + __db_txnhead *hp; + + if ((hp = (struct __db_txnhead *)listp) == NULL) + return (DB_NOTFOUND); + + if (hp->maxid < txnid) { + hp->maxid = txnid; + return (DB_NOTFOUND); + } + + for (p = hp->head.lh_first; p != NULL; p = p->links.le_next) + if (p->txnid == txnid) + return (0); + + return (DB_NOTFOUND); +} + +#ifdef DEBUG +void +__db_txnlist_print(listp) + void *listp; +{ + __db_txnlist *p; + __db_txnhead *hp; + + hp = (struct __db_txnhead *)listp; + printf("Maxid: %lu\n", (u_long)hp->maxid); + for (p = hp->head.lh_first; p != NULL; p = p->links.le_next) + printf("TXNID: %lu\n", (u_long)p->txnid); +} +#endif diff --git a/db2/db/db_dup.c b/db2/db/db_dup.c new file mode 100644 index 0000000000..8d364d518e --- /dev/null +++ b/db2/db/db_dup.c @@ -0,0 +1,680 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)db_dup.c 10.8 (Sleepycat) 7/20/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_swap.h" +#include "btree.h" +#include "db_am.h" +#include "common_ext.h" + +static int __db_addpage __P((DB *, + PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **))); +static int __db_dsplit __P((DB *, + PAGE **, db_indx_t *, u_int32_t, int (*)(DB *, u_int32_t, PAGE **))); + +/* + * __db_dput -- + * Put a duplicate item onto a duplicate page at the given index. + * + * PUBLIC: int __db_dput __P((DB *, + * PUBLIC: DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **))); + */ +int +__db_dput(dbp, dbt, pp, indxp, newfunc) + DB *dbp; + DBT *dbt; + PAGE **pp; + db_indx_t *indxp; + int (*newfunc) __P((DB *, u_int32_t, PAGE **)); +{ + BOVERFLOW bo; + DBT *data_dbtp, hdr_dbt, *hdr_dbtp; + PAGE *pagep; + db_indx_t size, isize; + db_pgno_t pgno; + int ret; + + /* + * We need some access method independent threshold for when we put + * a duplicate item onto an overflow page. + */ + if (dbt->size > 0.25 * dbp->pgsize) { + if ((ret = __db_poff(dbp, dbt, &pgno, newfunc)) != 0) + return (ret); + bo.deleted = 0; + bo.type = B_OVERFLOW; + bo.tlen = dbt->size; + bo.pgno = pgno; + hdr_dbt.data = &bo; + hdr_dbt.size = isize = BOVERFLOW_SIZE; + hdr_dbtp = &hdr_dbt; + size = BOVERFLOW_PSIZE; + data_dbtp = NULL; + } else { + size = BKEYDATA_PSIZE(dbt->size); + isize = BKEYDATA_SIZE(dbt->size); + hdr_dbtp = NULL; + data_dbtp = dbt; + } + + pagep = *pp; + if (size > P_FREESPACE(pagep)) { + if (*indxp == NUM_ENT(*pp) && NEXT_PGNO(*pp) == PGNO_INVALID) + ret = __db_addpage(dbp, pp, indxp, newfunc); + else + ret = __db_dsplit(dbp, pp, indxp, isize, newfunc); + if (ret != 0) + /* XXX: Pages not returned to free list. */ + return (ret); + pagep = *pp; + } + + /* + * Now, pagep references the page on which to insert and indx is the + * the location to insert. + */ + if ((ret = __db_pitem(dbp, + pagep, (u_int32_t)*indxp, isize, hdr_dbtp, data_dbtp)) != 0) + return (ret); + + (void)memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY); + return (0); +} + +/* + * __db_drem -- + * Remove a duplicate at the given index on the given page. + * + * PUBLIC: int __db_drem __P((DB *, + * PUBLIC: PAGE **, u_int32_t, int (*)(DB *, PAGE *))); + */ +int +__db_drem(dbp, pp, indx, freefunc) + DB *dbp; + PAGE **pp; + u_int32_t indx; + int (*freefunc) __P((DB *, PAGE *)); +{ + PAGE *pagep; + int ret; + + pagep = *pp; + + /* Check if we are freeing a big item. */ + if (GET_BKEYDATA(pagep, indx)->type == B_OVERFLOW) { + if ((ret = __db_doff(dbp, + GET_BOVERFLOW(pagep, indx)->pgno, freefunc)) != 0) + return (ret); + ret = __db_ditem(dbp, pagep, indx, BOVERFLOW_SIZE); + } else + ret = __db_ditem(dbp, pagep, indx, + BKEYDATA_SIZE(GET_BKEYDATA(pagep, indx)->len)); + if (ret != 0) + return (ret); + + if (NUM_ENT(pagep) == 0) { + /* + * If the page is emptied, then the page is freed and the pp + * parameter is set to reference the next, locked page in the + * duplicate chain, if one exists. If there was no such page, + * then it is set to NULL. + * + * !!! + * __db_relink will set the dirty bit for us. + */ + if ((ret = __db_relink(dbp, pagep, pp, 0)) != 0) + return (ret); + if ((ret = freefunc(dbp, pagep)) != 0) + return (ret); + } else + (void)memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY); + + return (0); +} + +/* + * __db_dend -- + * Find the last page in a set of offpage duplicates. + * + * PUBLIC: int __db_dend __P((DB *, db_pgno_t, PAGE **)); + */ +int +__db_dend(dbp, pgno, pagep) + DB *dbp; + db_pgno_t pgno; + PAGE **pagep; +{ + PAGE *h; + int ret; + + /* + * This implements DB_KEYLAST. The last page is returned in pp; pgno + * should be the page number of the first page of the duplicate chain. + */ + for (;;) { + if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) { + (void)__db_pgerr(dbp, pgno); + return (ret); + } + if ((pgno = NEXT_PGNO(h)) == PGNO_INVALID) + break; + (void)memp_fput(dbp->mpf, h, 0); + } + + *pagep = h; + return (0); +} + +/* + * __db_dsplit -- + * Split a page of duplicates, calculating the split point based + * on an element of size "size" being added at "*indxp". + * On entry hp contains a pointer to the page-pointer of the original + * page. On exit, it returns a pointer to the page containing "*indxp" + * and "indxp" has been modified to reflect the index on the new page + * where the element should be added. The function returns with + * the page on which the insert should happen, not yet put. + */ +static int +__db_dsplit(dbp, hp, indxp, size, newfunc) + DB *dbp; + PAGE **hp; + db_indx_t *indxp; + u_int32_t size; + int (*newfunc) __P((DB *, u_int32_t, PAGE **)); +{ + PAGE *h, *np, *tp; + BKEYDATA *bk; + DBT page_dbt; + db_indx_t indx, nindex, oindex, sum; + db_indx_t halfbytes, i, lastsum; + int did_indx, ret, s; + + h = *hp; + indx = *indxp; + + /* Create a temporary page to do compaction onto. */ + if ((tp = (PAGE *)malloc(dbp->pgsize)) == NULL) + return (ENOMEM); +#ifdef DEBUG + memset(tp, 0xff, dbp->pgsize); +#endif + /* Create new page for the split. */ + if ((ret = newfunc(dbp, P_DUPLICATE, &np)) != 0) { + FREE(tp, dbp->pgsize); + return (ret); + } + + P_INIT(np, dbp->pgsize, PGNO(np), PGNO(h), NEXT_PGNO(h), 0, + P_DUPLICATE); + P_INIT(tp, dbp->pgsize, PGNO(h), PREV_PGNO(h), PGNO(np), 0, + P_DUPLICATE); + + /* Figure out the split point */ + halfbytes = (dbp->pgsize - HOFFSET(h)) / 2; + did_indx = 0; + for (sum = 0, lastsum = 0, i = 0; i < NUM_ENT(h); i++) { + if (i == indx) { + sum += size; + if (lastsum < halfbytes && sum >= halfbytes) { + /* We've crossed the halfway point. */ + if ((db_indx_t)(halfbytes - lastsum) < + (db_indx_t)(sum - halfbytes)) { + *hp = np; + *indxp = 0; + i--; + } else + *indxp = i; + break; + } + *indxp = i; + lastsum = sum; + did_indx = 1; + } + if (GET_BKEYDATA(h, i)->type == B_KEYDATA) + sum += BKEYDATA_SIZE(GET_BKEYDATA(h, i)->len); + else + sum += BOVERFLOW_SIZE; + + if (lastsum < halfbytes && sum >= halfbytes) { + /* We've crossed the halfway point. */ + if ((db_indx_t)(halfbytes - lastsum) < + (db_indx_t)(sum - halfbytes)) + i--; + break; + } + } + + /* + * Check if we have set the return values of the index pointer and + * page pointer. + */ + if (!did_indx) { + *hp = np; + *indxp = indx - i - 1; + } + + if (DB_LOGGING(dbp)) { + page_dbt.size = dbp->pgsize; + page_dbt.data = h; + if ((ret = __db_split_log(dbp->dbenv->lg_info, + dbp->txn, &LSN(h), 0, DB_SPLITOLD, dbp->log_fileid, + PGNO(h), &page_dbt, &LSN(h))) != 0) { + FREE(tp, dbp->pgsize); + return (ret); + } + LSN(tp) = LSN(h); + } + + /* + * If it's a btree, adjust the cursors. + * + * i is the index of the last element to stay on the page. + */ + if (dbp->type == DB_BTREE || dbp->type == DB_RECNO) + __bam_ca_split(dbp, PGNO(h), PGNO(h), PGNO(np), i + 1, 0); + + for (nindex = 0, oindex = i + 1; oindex < NUM_ENT(h); oindex++) { + bk = GET_BKEYDATA(h, oindex); + if (bk->type == B_KEYDATA) + s = BKEYDATA_SIZE(bk->len); + else + s = BOVERFLOW_SIZE; + + np->inp[nindex++] = HOFFSET(np) -= s; + memcpy((u_int8_t *)np + HOFFSET(np), bk, s); + NUM_ENT(np)++; + } + + /* + * Now do data compaction by copying the remaining stuff onto the + * temporary page and then copying it back to the real page. + */ + for (nindex = 0, oindex = 0; oindex <= i; oindex++) { + bk = GET_BKEYDATA(h, oindex); + if (bk->type == B_KEYDATA) + s = BKEYDATA_SIZE(bk->len); + else + s = BOVERFLOW_SIZE; + + tp->inp[nindex++] = HOFFSET(tp) -= s; + memcpy((u_int8_t *)tp + HOFFSET(tp), bk, s); + NUM_ENT(tp)++; + } + + /* + * This page (the temporary) should be only half full, so we do two + * memcpy's, one for the top of the page and one for the bottom of + * the page. This way we avoid copying the middle which should be + * about half a page. + */ + memcpy(h, tp, LOFFSET(tp)); + memcpy((u_int8_t *)h + HOFFSET(tp), + (u_int8_t *)tp + HOFFSET(tp), dbp->pgsize - HOFFSET(tp)); + FREE(tp, dbp->pgsize); + + if (DB_LOGGING(dbp)) { + page_dbt.size = dbp->pgsize; + page_dbt.data = h; + if ((ret = __db_split_log(dbp->dbenv->lg_info, + dbp->txn, &LSN(h), 0, DB_SPLITNEW, dbp->log_fileid, + PGNO(h), &page_dbt, &LSN(h))) != 0) + return (ret); + + page_dbt.size = dbp->pgsize; + page_dbt.data = np; + if ((ret = __db_split_log(dbp->dbenv->lg_info, + dbp->txn, &LSN(np), 0, DB_SPLITNEW, dbp->log_fileid, + PGNO(np), &page_dbt, &LSN(np))) != 0) + return (ret); + } + + /* + * Figure out if the location we're interested in is on the new + * page, and if so, reset the callers' pointer. Push the other + * page back to the store. + */ + if (*hp == h) + ret = memp_fput(dbp->mpf, np, DB_MPOOL_DIRTY); + else + ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY); + + return (ret); +} + +/* + * __db_ditem -- + * Remove an item from a page. + * + * PUBLIC: int __db_ditem __P((DB *, PAGE *, int, u_int32_t)); + */ +int +__db_ditem(dbp, pagep, indx, nbytes) + DB *dbp; + PAGE *pagep; + int indx; + u_int32_t nbytes; +{ + DBT ldbt; + db_indx_t cnt, offset; + int ret; + u_int8_t *from; + + if (DB_LOGGING(dbp)) { + ldbt.data = P_ENTRY(pagep, indx); + ldbt.size = nbytes; + if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbp->txn, + &LSN(pagep), 0, DB_REM_DUP, dbp->log_fileid, PGNO(pagep), + (u_int32_t)indx, nbytes, &ldbt, NULL, &LSN(pagep))) != 0) + return (ret); + } + + /* + * If there's only a single item on the page, we don't have to + * work hard. + */ + if (NUM_ENT(pagep) == 1) { + NUM_ENT(pagep) = 0; + HOFFSET(pagep) = dbp->pgsize; + return (0); + } + + /* + * Pack the remaining key/data items at the end of the page. Use + * memmove(3), the regions may overlap. + */ + from = (u_int8_t *)pagep + HOFFSET(pagep); + memmove(from + nbytes, from, pagep->inp[indx] - HOFFSET(pagep)); + HOFFSET(pagep) += nbytes; + + /* Adjust the indices' offsets. */ + offset = pagep->inp[indx]; + for (cnt = 0; cnt < NUM_ENT(pagep); ++cnt) + if (pagep->inp[cnt] < offset) + pagep->inp[cnt] += nbytes; + + /* Shift the indices down. */ + --NUM_ENT(pagep); + if (indx != NUM_ENT(pagep)) + memmove(&pagep->inp[indx], &pagep->inp[indx + 1], + sizeof(db_indx_t) * (NUM_ENT(pagep) - indx)); + + /* If it's a btree, adjust the cursors. */ + if (dbp->type == DB_BTREE || dbp->type == DB_RECNO) + __bam_ca_di(dbp, PGNO(pagep), indx, -1); + + return (0); +} + +/* + * __db_pitem -- + * Put an item on a page. + * + * PUBLIC: int __db_pitem + * PUBLIC: __P((DB *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); + */ +int +__db_pitem(dbp, pagep, indx, nbytes, hdr, data) + DB *dbp; + PAGE *pagep; + u_int32_t indx; + u_int32_t nbytes; + DBT *hdr, *data; +{ + BKEYDATA bk; + DBT thdr; + int ret; + u_int8_t *p; + + /* + * Put a single item onto a page. The logic figuring out where to + * insert and whether it fits is handled in the caller. All we do + * here is manage the page shuffling. We cheat a little bit in that + * we don't want to copy the dbt on a normal put twice. If hdr is + * NULL, we create a BKEYDATA structure on the page, otherwise, just + * copy the caller's information onto the page. + * + * This routine is also used to put entries onto the page where the + * entry is pre-built, e.g., during recovery. In this case, the hdr + * will point to the entry, and the data argument will be NULL. + * + * !!! + * There's a tremendous potential for off-by-one errors here, since + * the passed in header sizes must be adjusted for the structure's + * placeholder for the trailing variable-length data field. + */ + if (DB_LOGGING(dbp)) + if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbp->txn, + &LSN(pagep), 0, DB_ADD_DUP, dbp->log_fileid, PGNO(pagep), + (u_int32_t)indx, nbytes, hdr, data, &LSN(pagep))) != 0) + return (ret); + + if (hdr == NULL) { + bk.deleted = 0; + bk.type = B_KEYDATA; + bk.len = data == NULL ? 0 : data->size; + + thdr.data = &bk; + thdr.size = SSZA(BKEYDATA, data); + hdr = &thdr; + } + + /* Adjust the index table, then put the item on the page. */ + if (indx != NUM_ENT(pagep)) + memmove(&pagep->inp[indx + 1], &pagep->inp[indx], + sizeof(db_indx_t) * (NUM_ENT(pagep) - indx)); + HOFFSET(pagep) -= nbytes; + pagep->inp[indx] = HOFFSET(pagep); + ++NUM_ENT(pagep); + + p = P_ENTRY(pagep, indx); + memcpy(p, hdr->data, hdr->size); + if (data != NULL) + memcpy(p + hdr->size, data->data, data->size); + + /* If it's a btree, adjust the cursors. */ + if (dbp->type == DB_BTREE || dbp->type == DB_RECNO) + __bam_ca_di(dbp, PGNO(pagep), indx, 1); + + return (0); +} + +/* + * __db_relink -- + * Relink around a deleted page. + * + * PUBLIC: int __db_relink __P((DB *, PAGE *, PAGE **, int)); + */ +int +__db_relink(dbp, pagep, new_next, needlock) + DB *dbp; + PAGE *pagep, **new_next; + int needlock; +{ + PAGE *np, *pp; + DB_LOCK npl, ppl; + DB_LSN *nlsnp, *plsnp; + int ret; + + ret = 0; + np = pp = NULL; + npl = ppl = LOCK_INVALID; + nlsnp = plsnp = NULL; + + /* Retrieve and lock the two pages. */ + if (pagep->next_pgno != PGNO_INVALID) { + if (needlock && (ret = __bam_lget(dbp, + 0, pagep->next_pgno, DB_LOCK_WRITE, &npl)) != 0) + goto err; + if ((ret = memp_fget(dbp->mpf, + &pagep->next_pgno, 0, &np)) != 0) { + (void)__db_pgerr(dbp, pagep->next_pgno); + goto err; + } + nlsnp = &np->lsn; + } + if (pagep->prev_pgno != PGNO_INVALID) { + if (needlock && (ret = __bam_lget(dbp, + 0, pagep->prev_pgno, DB_LOCK_WRITE, &ppl)) != 0) + goto err; + if ((ret = memp_fget(dbp->mpf, + &pagep->prev_pgno, 0, &pp)) != 0) { + (void)__db_pgerr(dbp, pagep->next_pgno); + goto err; + } + plsnp = &pp->lsn; + } + + /* Log the change. */ + if (DB_LOGGING(dbp)) { + if ((ret = __db_relink_log(dbp->dbenv->lg_info, dbp->txn, + &pagep->lsn, 0, dbp->log_fileid, pagep->pgno, &pagep->lsn, + pagep->prev_pgno, plsnp, pagep->next_pgno, nlsnp)) != 0) + goto err; + if (np != NULL) + np->lsn = pagep->lsn; + if (pp != NULL) + pp->lsn = pagep->lsn; + } + + /* + * Modify and release the two pages. + * + * !!! + * The parameter new_next gets set to the page following the page we + * are removing. If there is no following page, then new_next gets + * set to NULL. + */ + if (np != NULL) { + np->prev_pgno = pagep->prev_pgno; + if (new_next == NULL) + ret = memp_fput(dbp->mpf, np, DB_MPOOL_DIRTY); + else { + *new_next = np; + ret = memp_fset(dbp->mpf, np, DB_MPOOL_DIRTY); + } + if (ret != 0) + goto err; + if (needlock) + (void)__bam_lput(dbp, npl); + } else if (new_next != NULL) + *new_next = NULL; + + if (pp != NULL) { + pp->next_pgno = pagep->next_pgno; + if ((ret = memp_fput(dbp->mpf, pp, DB_MPOOL_DIRTY)) != 0) + goto err; + if (needlock) + (void)__bam_lput(dbp, ppl); + } + return (0); + +err: if (np != NULL) + (void)memp_fput(dbp->mpf, np, 0); + if (needlock && npl != LOCK_INVALID) + (void)__bam_lput(dbp, npl); + if (pp != NULL) + (void)memp_fput(dbp->mpf, pp, 0); + if (needlock && ppl != LOCK_INVALID) + (void)__bam_lput(dbp, ppl); + return (ret); +} + +/* + * __db_ddup -- + * Delete an offpage chain of duplicates. + * + * PUBLIC: int __db_ddup __P((DB *, db_pgno_t, int (*)(DB *, PAGE *))); + */ +int +__db_ddup(dbp, pgno, freefunc) + DB *dbp; + db_pgno_t pgno; + int (*freefunc) __P((DB *, PAGE *)); +{ + PAGE *pagep; + DBT tmp_dbt; + int ret; + + do { + if ((ret = memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0) { + (void)__db_pgerr(dbp, pgno); + return (ret); + } + + if (DB_LOGGING(dbp)) { + tmp_dbt.data = pagep; + tmp_dbt.size = dbp->pgsize; + if ((ret = __db_split_log(dbp->dbenv->lg_info, dbp->txn, + &LSN(pagep), 0, DB_SPLITOLD, dbp->log_fileid, + PGNO(pagep), &tmp_dbt, &LSN(pagep))) != 0) + return (ret); + } + pgno = pagep->next_pgno; + if ((ret = freefunc(dbp, pagep)) != 0) + return (ret); + } while (pgno != PGNO_INVALID); + + return (0); +} + +/* + * __db_addpage -- + * Create a new page and link it onto the next_pgno field of the + * current page. + */ +static int +__db_addpage(dbp, hp, indxp, newfunc) + DB *dbp; + PAGE **hp; + db_indx_t *indxp; + int (*newfunc) __P((DB *, u_int32_t, PAGE **)); +{ + PAGE *newpage; + int ret; + + if ((ret = newfunc(dbp, P_DUPLICATE, &newpage)) != 0) + return (ret); + + if (DB_LOGGING(dbp)) { + if ((ret = __db_addpage_log(dbp->dbenv->lg_info, + dbp->txn, &LSN(*hp), 0, dbp->log_fileid, + PGNO(*hp), &LSN(*hp), PGNO(newpage), &LSN(newpage))) != 0) { + return (ret); + } + LSN(newpage) = LSN(*hp); + } + + PREV_PGNO(newpage) = PGNO(*hp); + NEXT_PGNO(*hp) = PGNO(newpage); + + if ((ret = memp_fput(dbp->mpf, *hp, DB_MPOOL_DIRTY)) != 0) + return (ret); + *hp = newpage; + *indxp = 0; + return (0); +} diff --git a/db2/db/db_overflow.c b/db2/db/db_overflow.c new file mode 100644 index 0000000000..2340e9e358 --- /dev/null +++ b/db2/db/db_overflow.c @@ -0,0 +1,383 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)db_overflow.c 10.4 (Sleepycat) 7/2/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_am.h" +#include "common_ext.h" + +/* + * Big key/data code. + * + * Big key and data entries are stored on linked lists of pages. The initial + * reference is a structure with the total length of the item and the page + * number where it begins. Each entry in the linked list contains a pointer + * to the next page of data, and so on. + */ + +/* + * __db_goff -- + * Get an offpage item. + * + * PUBLIC: int __db_goff __P((DB *, DBT *, + * PUBLIC: u_int32_t, db_pgno_t, void **, u_int32_t *)); + */ +int +__db_goff(dbp, dbt, tlen, pgno, bpp, bpsz) + DB *dbp; + DBT *dbt; + u_int32_t tlen; + db_pgno_t pgno; + void **bpp; + u_int32_t *bpsz; +{ + PAGE *h; + db_indx_t bytes; + int ret; + u_int32_t curoff, needed, start; + u_int8_t *p, *src; + + /* + * Check if the buffer is big enough; if it is not and we are + * allowed to malloc space, then we'll malloc it. If we are + * not (DB_DBT_USERMEM), then we'll set the dbt and return + * appropriately. + */ + if (F_ISSET(dbt, DB_DBT_PARTIAL)) { + start = dbt->doff; + needed = dbt->dlen; + } else { + start = 0; + needed = tlen; + } + + /* + * Allocate any necessary memory. + * + * XXX: Never allocate 0 bytes; + */ + if (F_ISSET(dbt, DB_DBT_USERMEM)) { + if (needed > dbt->ulen) { + dbt->size = needed; + return (ENOMEM); + } + } else if (F_ISSET(dbt, DB_DBT_MALLOC)) { + dbt->data = dbp->db_malloc == NULL ? + (void *)malloc(needed + 1) : + (void *)dbp->db_malloc(needed + 1); + if (dbt->data == NULL) + return (ENOMEM); + } else if (*bpsz == 0 || *bpsz < needed) { + *bpp = (*bpp == NULL ? + (void *)malloc(needed + 1) : + (void *)realloc(*bpp, needed + 1)); + if (*bpp == NULL) + return (ENOMEM); + *bpsz = needed + 1; + dbt->data = *bpp; + } else + dbt->data = *bpp; + + /* + * Step through the linked list of pages, copying the data on each + * one into the buffer. Never copy more than the total data length. + */ + dbt->size = needed; + for (curoff = 0, p = dbt->data; pgno != P_INVALID && needed > 0;) { + if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) { + (void)__db_pgerr(dbp, pgno); + return (ret); + } + /* Check if we need any bytes from this page. */ + if (curoff + OV_LEN(h) >= start) { + src = (u_int8_t *)h + P_OVERHEAD; + bytes = OV_LEN(h); + if (start > curoff) { + src += start - curoff; + bytes -= start - curoff; + } + if (bytes > needed) + bytes = needed; + memcpy(p, src, bytes); + p += bytes; + needed -= bytes; + } + curoff += OV_LEN(h); + pgno = h->next_pgno; + memp_fput(dbp->mpf, h, 0); + } + return (0); +} + +/* + * __db_poff -- + * Put an offpage item. + * + * PUBLIC: int __db_poff __P((DB *, const DBT *, db_pgno_t *, + * PUBLIC: int (*)(DB *, u_int32_t, PAGE **))); + */ +int +__db_poff(dbp, dbt, pgnop, newfunc) + DB *dbp; + const DBT *dbt; + db_pgno_t *pgnop; + int (*newfunc) __P((DB *, u_int32_t, PAGE **)); +{ + PAGE *pagep, *lastp; + DB_LSN new_lsn, null_lsn; + DBT tmp_dbt; + db_indx_t pagespace; + u_int32_t sz; + u_int8_t *p; + int ret; + + /* + * Allocate pages and copy the key/data item into them. Calculate the + * number of bytes we get for pages we fill completely with a single + * item. + */ + pagespace = P_MAXSPACE(dbp->pgsize); + + lastp = NULL; + for (p = dbt->data, + sz = dbt->size; sz > 0; p += pagespace, sz -= pagespace) { + /* + * Reduce pagespace so we terminate the loop correctly and + * don't copy too much data. + */ + if (sz < pagespace) + pagespace = sz; + + /* + * Allocate and initialize a new page and copy all or part of + * the item onto the page. If sz is less than pagespace, we + * have a partial record. + */ + if ((ret = newfunc(dbp, P_OVERFLOW, &pagep)) != 0) + return (ret); + if (DB_LOGGING(dbp)) { + tmp_dbt.data = p; + tmp_dbt.size = pagespace; + ZERO_LSN(null_lsn); + if ((ret = __db_big_log(dbp->dbenv->lg_info, dbp->txn, + &new_lsn, 0, DB_ADD_BIG, dbp->log_fileid, + PGNO(pagep), lastp ? PGNO(lastp) : PGNO_INVALID, + PGNO_INVALID, &tmp_dbt, &LSN(pagep), + lastp == NULL ? &null_lsn : &LSN(lastp), + &null_lsn)) != 0) + return (ret); + + /* Move lsn onto page. */ + if (lastp) + LSN(lastp) = new_lsn; + LSN(pagep) = new_lsn; + } + + P_INIT(pagep, dbp->pgsize, + PGNO(pagep), PGNO_INVALID, PGNO_INVALID, 0, P_OVERFLOW); + OV_LEN(pagep) = pagespace; + OV_REF(pagep) = 1; + memcpy((u_int8_t *)pagep + P_OVERHEAD, p, pagespace); + + /* + * If this is the first entry, update the user's info. + * Otherwise, update the entry on the last page filled + * in and release that page. + */ + if (lastp == NULL) + *pgnop = PGNO(pagep); + else { + lastp->next_pgno = PGNO(pagep); + pagep->prev_pgno = PGNO(lastp); + (void)memp_fput(dbp->mpf, lastp, DB_MPOOL_DIRTY); + } + lastp = pagep; + } + (void)memp_fput(dbp->mpf, lastp, DB_MPOOL_DIRTY); + return (0); +} + +/* + * __db_ioff -- + * Increment the reference count on an overflow page. + * + * PUBLIC: int __db_ioff __P((DB *, db_pgno_t)); + */ +int +__db_ioff(dbp, pgno) + DB *dbp; + db_pgno_t pgno; +{ + PAGE *h; + int ret; + + if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) { + (void)__db_pgerr(dbp, pgno); + return (ret); + } + + ++OV_REF(h); + if (DB_LOGGING(dbp) && (ret = __db_ovref_log(dbp->dbenv->lg_info, + dbp->txn, &LSN(h), 0, dbp->log_fileid, h->pgno, &LSN(h))) != 0) + return (ret); + + (void)memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY); + return (0); +} + +/* + * __db_doff -- + * Delete an offpage chain of overflow pages. + * + * PUBLIC: int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *))); + */ +int +__db_doff(dbp, pgno, freefunc) + DB *dbp; + db_pgno_t pgno; + int (*freefunc) __P((DB *, PAGE *)); +{ + PAGE *pagep; + DB_LSN null_lsn; + DBT tmp_dbt; + int ret; + + do { + if ((ret = memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0) { + (void)__db_pgerr(dbp, pgno); + return (ret); + } + + /* + * If it's an overflow page and it's referenced by more than + * one key/data item, decrement the reference count and return. + */ + if (TYPE(pagep) == P_OVERFLOW && OV_REF(pagep) > 1) { + --OV_REF(pagep); + (void)memp_fput(dbp->mpf, pagep, DB_MPOOL_DIRTY); + return (0); + } + + if (DB_LOGGING(dbp)) { + tmp_dbt.data = (u_int8_t *)pagep + P_OVERHEAD; + tmp_dbt.size = OV_LEN(pagep); + ZERO_LSN(null_lsn); + if ((ret = __db_big_log(dbp->dbenv->lg_info, dbp->txn, + &LSN(pagep), 0, DB_REM_BIG, dbp->log_fileid, + PGNO(pagep), PREV_PGNO(pagep), NEXT_PGNO(pagep), + &tmp_dbt, &LSN(pagep), &null_lsn, &null_lsn)) != 0) + return (ret); + } + pgno = pagep->next_pgno; + if ((ret = freefunc(dbp, pagep)) != 0) + return (ret); + } while (pgno != PGNO_INVALID); + + return (0); +} + +/* + * __db_moff -- + * Match on overflow pages. + * + * Given a starting page number and a key, return <0, 0, >0 to indicate if the + * key on the page is less than, equal to or greater than the key specified. + * + * PUBLIC: int __db_moff __P((DB *, const DBT *, db_pgno_t)); + */ +int +__db_moff(dbp, dbt, pgno) + DB *dbp; + const DBT *dbt; + db_pgno_t pgno; +{ + PAGE *pagep; + u_int32_t cmp_bytes, key_left; + int ret; + u_int8_t *p1, *p2; + + /* While there are both keys to compare. */ + for (ret = 0, p1 = dbt->data, + key_left = dbt->size; key_left > 0 && pgno != PGNO_INVALID;) { + if (memp_fget(dbp->mpf, &pgno, 0, &pagep) != 0) { + (void)__db_pgerr(dbp, pgno); + return (0); /* No system error return. */ + } + + cmp_bytes = OV_LEN(pagep) < key_left ? OV_LEN(pagep) : key_left; + key_left -= cmp_bytes; + for (p2 = + (u_int8_t *)pagep + P_OVERHEAD; cmp_bytes-- > 0; ++p1, ++p2) + if (*p1 != *p2) { + ret = (long)*p1 - (long)*p2; + break; + } + pgno = NEXT_PGNO(pagep); + (void)memp_fput(dbp->mpf, pagep, 0); + if (ret != 0) + return (ret); + } + if (key_left > 0) /* DBT is longer than page key. */ + return (-1); + if (pgno != PGNO_INVALID) /* DBT is shorter than page key. */ + return (1); + return (0); +} diff --git a/db2/db/db_pr.c b/db2/db/db_pr.c new file mode 100644 index 0000000000..c103b10e4f --- /dev/null +++ b/db2/db/db_pr.c @@ -0,0 +1,785 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)db_pr.c 10.14 (Sleepycat) 8/17/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <ctype.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "btree.h" +#include "hash.h" +#include "db_am.h" + +static void __db_proff __P((void *)); +static void __db_psize __P((DB_MPOOLFILE *)); + +/* + * __db_loadme -- + * Force loading of this file. + * + * PUBLIC: void __db_loadme __P((void)); + */ +void +__db_loadme() +{ + getpid(); +} + +static FILE *set_fp; + +/* + * 64K is the maximum page size, so by default we check for offsets + * larger than that, and, where possible, we refine the test. + */ +#define PSIZE_BOUNDARY (64 * 1024 + 1) +static size_t set_psize = PSIZE_BOUNDARY; + +/* + * __db_prinit -- + * Initialize tree printing routines. + * + * PUBLIC: FILE *__db_prinit __P((FILE *)); + */ +FILE * +__db_prinit(fp) + FILE *fp; +{ + if (set_fp == NULL) + set_fp = fp == NULL ? stdout : fp; + return (set_fp); +} + +/* + * __db_dump -- + * Dump the tree to a file. + * + * PUBLIC: int __db_dump __P((DB *, char *, int)); + */ +int +__db_dump(dbp, name, all) + DB *dbp; + char *name; + int all; +{ + FILE *fp, *save_fp; + + save_fp = NULL; /* XXX: Shut the compiler up. */ + + if (set_psize == PSIZE_BOUNDARY) + __db_psize(dbp->mpf); + + if (name != NULL) { + if ((fp = fopen(name, "w")) == NULL) + return (errno); + save_fp = set_fp; + set_fp = fp; + } else + fp = __db_prinit(NULL); + + (void)__db_prdb(dbp); + if (dbp->type == DB_HASH) + (void)__db_prhash(dbp); + else + (void)__db_prbtree(dbp); + fprintf(fp, "%s\n", DB_LINE); + __db_prtree(dbp->mpf, all); + + if (name != NULL) { + (void)fclose(fp); + set_fp = save_fp; + } + return (0); +} + +/* + * __db_prdb -- + * Print out the DB structure information. + * + * PUBLIC: int __db_prdb __P((DB *)); + */ +int +__db_prdb(dbp) + DB *dbp; +{ + static const FN fn[] = { + { DB_AM_DUP, "duplicates" }, + { DB_AM_INMEM, "in-memory" }, + { DB_AM_LOCKING, "locking" }, + { DB_AM_LOGGING, "logging" }, + { DB_AM_MLOCAL, "local mpool" }, + { DB_AM_PGDEF, "default page size" }, + { DB_AM_RDONLY, "read-only" }, + { DB_AM_RECOVER, "recover" }, + { DB_AM_SWAP, "needswap" }, + { DB_AM_THREAD, "thread" }, + { DB_BT_RECNUM, "btree:records" }, + { DB_HS_DIRTYMETA, "hash:dirty-meta" }, + { DB_RE_DELIMITER, "recno:delimiter" }, + { DB_RE_FIXEDLEN, "recno:fixed-length" }, + { DB_RE_PAD, "recno:pad" }, + { DB_RE_RENUMBER, "recno:renumber" }, + { DB_RE_SNAPSHOT, "recno:snapshot" }, + { 0 }, + }; + FILE *fp; + const char *t; + + fp = __db_prinit(NULL); + + switch (dbp->type) { + case DB_BTREE: + t = "btree"; + break; + case DB_HASH: + t = "hash"; + break; + case DB_RECNO: + t = "recno"; + break; + default: + t = "UNKNOWN"; + break; + } + + fprintf(fp, "%s ", t); + __db_prflags(dbp->flags, fn); + fprintf(fp, "\n"); + + return (0); +} + +/* + * __db_prbtree -- + * Print out the btree internal information. + * + * PUBLIC: int __db_prbtree __P((DB *)); + */ +int +__db_prbtree(dbp) + DB *dbp; +{ + static const FN mfn[] = { + { BTM_DUP, "duplicates" }, + { BTM_RECNO, "recno" }, + { 0 }, + }; + BTMETA *mp; + BTREE *t; + DB_LOCK lock; + EPG *sp; + FILE *fp; + RECNO *rp; + db_pgno_t i; + int ret; + + t = dbp->internal; + fp = __db_prinit(NULL); + + (void)fprintf(fp, "%s\nOn-page metadata:\n", DB_LINE); + i = PGNO_METADATA; + if ((ret = __bam_lget(dbp, 0, PGNO_METADATA, DB_LOCK_READ, &lock)) != 0) + return (ret); + + if ((ret = __bam_pget(dbp, (PAGE **)&mp, &i, 0)) != 0) + return (ret); + + (void)fprintf(fp, "magic %#lx\n", (u_long)mp->magic); + (void)fprintf(fp, "version %lu\n", (u_long)mp->version); + (void)fprintf(fp, "pagesize %lu\n", (u_long)mp->pagesize); + (void)fprintf(fp, "maxkey: %lu minkey: %lu\n", + (u_long)mp->maxkey, (u_long)mp->minkey); + (void)fprintf(fp, "free %lu\n", (u_long)mp->free); + (void)fprintf(fp, "flags %lu", (u_long)mp->flags); + __db_prflags(mp->flags, mfn); + (void)fprintf(fp, "\n"); + (void)memp_fput(dbp->mpf, mp, 0); + (void)__bam_lput(dbp, lock); + + (void)fprintf(fp, "%s\nDB_INFO:\n", DB_LINE); + (void)fprintf(fp, "bt_maxkey: %lu bt_minkey: %lu\n", + (u_long)t->bt_maxkey, (u_long)t->bt_minkey); + (void)fprintf(fp, "bt_compare: %#lx bt_prefix: %#lx\n", + (u_long)t->bt_compare, (u_long)t->bt_prefix); + if ((rp = t->bt_recno) != NULL) { + (void)fprintf(fp, + "re_delim: %#lx re_pad: %#lx re_len: %lu re_source: %s\n", + (u_long)rp->re_delim, (u_long)rp->re_pad, + (u_long)rp->re_len, + rp->re_source == NULL ? "" : rp->re_source); + (void)fprintf(fp, + "cmap: %#lx smap: %#lx emap: %#lx msize: %lu\n", + (u_long)rp->re_cmap, (u_long)rp->re_smap, + (u_long)rp->re_emap, (u_long)rp->re_msize); + } + (void)fprintf(fp, "stack:"); + for (sp = t->bt_stack; sp < t->bt_sp; ++sp) + (void)fprintf(fp, " %lu", (u_long)sp->page->pgno); + (void)fprintf(fp, "\n"); + (void)fprintf(fp, "ovflsize: %lu\n", (u_long)t->bt_ovflsize); + (void)fflush(fp); + return (0); +} + +/* + * __db_prhash -- + * Print out the hash internal information. + * + * PUBLIC: int __db_prhash __P((DB *)); + */ +int +__db_prhash(dbp) + DB *dbp; +{ + FILE *fp; + HTAB *t; + int i, put_page, ret; + db_pgno_t pgno; + + t = dbp->internal; + + fp = __db_prinit(NULL); + + fprintf(fp, "\thash_accesses %lu\n", (u_long)t->hash_accesses); + fprintf(fp, "\thash_collisions %lu\n", (u_long)t->hash_collisions); + fprintf(fp, "\thash_expansions %lu\n", (u_long)t->hash_expansions); + fprintf(fp, "\thash_overflows %lu\n", (u_long)t->hash_overflows); + fprintf(fp, "\thash_bigpages %lu\n", (u_long)t->hash_bigpages); + fprintf(fp, "\n"); + + if (t->hdr == NULL) { + pgno = PGNO_METADATA; + if ((ret = memp_fget(dbp->mpf, &pgno, 0, &t->hdr)) != 0) + return (ret); + put_page = 1; + } else + put_page = 0; + + fprintf(fp, "\tmagic %#lx\n", (u_long)t->hdr->magic); + fprintf(fp, "\tversion %lu\n", (u_long)t->hdr->version); + fprintf(fp, "\tpagesize %lu\n", (u_long)t->hdr->pagesize); + fprintf(fp, "\tovfl_point %lu\n", (u_long)t->hdr->ovfl_point); + fprintf(fp, "\tlast_freed %lu\n", (u_long)t->hdr->last_freed); + fprintf(fp, "\tmax_bucket %lu\n", (u_long)t->hdr->max_bucket); + fprintf(fp, "\thigh_mask %#lx\n", (u_long)t->hdr->high_mask); + fprintf(fp, "\tlow_mask %#lx\n", (u_long)t->hdr->low_mask); + fprintf(fp, "\tffactor %lu\n", (u_long)t->hdr->ffactor); + fprintf(fp, "\tnelem %lu\n", (u_long)t->hdr->nelem); + fprintf(fp, "\th_charkey %#lx\n", (u_long)t->hdr->h_charkey); + + for (i = 0; i < NCACHED; i++) + fprintf(fp, "%lu ", (u_long)t->hdr->spares[i]); + fprintf(fp, "\n"); + + (void)fflush(fp); + if (put_page) { + (void)memp_fput(dbp->mpf, (PAGE *)t->hdr, 0); + t->hdr = NULL; + } + return (0); +} + +/* + * __db_prtree -- + * Print out the entire tree. + * + * PUBLIC: int __db_prtree __P((DB_MPOOLFILE *, int)); + */ +int +__db_prtree(mpf, all) + DB_MPOOLFILE *mpf; + int all; +{ + PAGE *h; + db_pgno_t i; + int ret, t_ret; + + if (set_psize == PSIZE_BOUNDARY) + __db_psize(mpf); + + ret = 0; + for (i = PGNO_ROOT;; ++i) { + if ((ret = memp_fget(mpf, &i, 0, &h)) != 0) + break; + if (TYPE(h) != P_INVALID) + if ((t_ret = __db_prpage(h, all)) != 0 && ret == 0) + ret = t_ret; + (void)memp_fput(mpf, h, 0); + } + (void)fflush(__db_prinit(NULL)); + return (ret); +} + +/* + * __db_prnpage + * -- Print out a specific page. + * + * PUBLIC: int __db_prnpage __P((DB_MPOOLFILE *, db_pgno_t)); + */ +int +__db_prnpage(mpf, pgno) + DB_MPOOLFILE *mpf; + db_pgno_t pgno; +{ + PAGE *h; + int ret; + + if (set_psize == PSIZE_BOUNDARY) + __db_psize(mpf); + + if ((ret = memp_fget(mpf, &pgno, 0, &h)) != 0) + return (ret); + + ret = __db_prpage(h, 1); + (void)fflush(__db_prinit(NULL)); + + (void)memp_fput(mpf, h, 0); + return (ret); +} + +/* + * __db_prpage + * -- Print out a page. + * + * PUBLIC: int __db_prpage __P((PAGE *, int)); + */ +int +__db_prpage(h, all) + PAGE *h; + int all; +{ + BINTERNAL *bi; + BKEYDATA *bk; + HKEYDATA *hkd; + HOFFPAGE a_hkd; + FILE *fp; + RINTERNAL *ri; + db_indx_t dlen, len, i; + db_pgno_t pgno; + u_int8_t *p; + int deleted, ret; + const char *s; + + bi = NULL; /* XXX: Shut the compiler up. */ + bk = NULL; + hkd = NULL; + ri = NULL; + + fp = __db_prinit(NULL); + + switch (TYPE(h)) { + case P_DUPLICATE: + s = "duplicate"; + break; + case P_HASH: + s = "hash"; + break; + case P_IBTREE: + s = "btree internal"; + break; + case P_INVALID: + s = "invalid"; + break; + case P_IRECNO: + s = "recno internal"; + break; + case P_LBTREE: + s = "btree leaf"; + break; + case P_LRECNO: + s = "recno leaf"; + break; + case P_OVERFLOW: + s = "overflow"; + break; + default: + fprintf(fp, "ILLEGAL PAGE TYPE: page: %lu type: %lu\n", + (u_long)h->pgno, (u_long)TYPE(h)); + return (1); + } + fprintf(fp, "page %4lu: (%s)\n", (u_long)h->pgno, s); + fprintf(fp, " lsn.file: %lu lsn.offset: %lu", + (u_long)LSN(h).file, (u_long)LSN(h).offset); + if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO || + (TYPE(h) == P_LRECNO && h->pgno == PGNO_ROOT)) + fprintf(fp, " total records: %4lu", (u_long)RE_NREC(h)); + fprintf(fp, "\n"); + if (TYPE(h) == P_LBTREE || TYPE(h) == P_LRECNO) + fprintf(fp, " prev: %4lu next: %4lu", + (u_long)PREV_PGNO(h), (u_long)NEXT_PGNO(h)); + if (TYPE(h) == P_IBTREE || TYPE(h) == P_LBTREE) + fprintf(fp, " level: %2lu", (u_long)h->level); + if (TYPE(h) == P_OVERFLOW) { + fprintf(fp, " ref cnt: %4lu ", (u_long)OV_REF(h)); + __db_pr((u_int8_t *)h + P_OVERHEAD, OV_LEN(h)); + return (0); + } + fprintf(fp, " entries: %4lu", (u_long)NUM_ENT(h)); + fprintf(fp, " offset: %4lu\n", (u_long)HOFFSET(h)); + + if (!all || TYPE(h) == P_INVALID) + return (0); + + ret = 0; + for (i = 0; i < NUM_ENT(h); i++) { + if (P_ENTRY(h, i) - (u_int8_t *)h < P_OVERHEAD || + (size_t)(P_ENTRY(h, i) - (u_int8_t *)h) >= set_psize) { + fprintf(fp, + "ILLEGAL PAGE OFFSET: indx: %lu of %lu\n", + (u_long)i, (u_long)h->inp[i]); + ret = EINVAL; + continue; + } + deleted = 0; + switch (TYPE(h)) { + case P_HASH: + hkd = GET_HKEYDATA(h, i); + break; + case P_IBTREE: + bi = GET_BINTERNAL(h, i); + break; + case P_IRECNO: + ri = GET_RINTERNAL(h, i); + break; + case P_LBTREE: + bk = GET_BKEYDATA(h, i); + deleted = i % 2 == 0 && + GET_BKEYDATA(h, i + O_INDX)->deleted; + break; + case P_LRECNO: + case P_DUPLICATE: + bk = GET_BKEYDATA(h, i); + deleted = GET_BKEYDATA(h, i)->deleted; + break; + default: + fprintf(fp, + "ILLEGAL PAGE ITEM: %lu\n", (u_long)TYPE(h)); + ret = EINVAL; + continue; + } + fprintf(fp, " %s[%03lu] %4lu ", + deleted ? "D" : " ", (u_long)i, (u_long)h->inp[i]); + switch (TYPE(h)) { + case P_HASH: + switch (hkd->type) { + case H_OFFDUP: + memcpy(&pgno, + (u_int8_t *)hkd + SSZ(HOFFDUP, pgno), + sizeof(db_pgno_t)); + fprintf(fp, + "%4lu [offpage dups]\n", (u_long)pgno); + break; + case H_DUPLICATE: + /* + * If this is the first item on a page, then + * we cannot figure out how long it is, so + * we only print the first one in the duplicate + * set. + */ + if (i != 0) + len = LEN_HKEYDATA(h, 0, i); + else + len = 1; + + fprintf(fp, "Duplicates:\n"); + for (p = hkd->data; p < hkd->data + len;) { + memcpy(&dlen, p, sizeof(db_indx_t)); + p += sizeof(db_indx_t); + fprintf(fp, "\t\t"); + __db_pr(p, dlen); + p += sizeof(db_indx_t) + dlen; + } + break; + case H_KEYDATA: + if (i != 0) + __db_pr(hkd->data, + LEN_HKEYDATA(h, 0, i)); + else + fprintf(fp, "%s\n", hkd->data); + break; + case H_OFFPAGE: + memcpy(&a_hkd, hkd, HOFFPAGE_SIZE); + fprintf(fp, + "overflow: total len: %4lu page: %4lu\n", + (u_long)a_hkd.tlen, (u_long)a_hkd.pgno); + break; + } + break; + case P_IBTREE: + fprintf(fp, "count: %4lu pgno: %4lu ", + (u_long)bi->nrecs, (u_long)bi->pgno); + switch (bi->type) { + case B_KEYDATA: + __db_pr(bi->data, bi->len); + break; + case B_DUPLICATE: + case B_OVERFLOW: + __db_proff(bi->data); + break; + default: + fprintf(fp, "ILLEGAL BINTERNAL TYPE: %lu\n", + (u_long)bi->type); + ret = EINVAL; + break; + } + break; + case P_IRECNO: + fprintf(fp, "entries %4lu pgno %4lu\n", + (u_long)ri->nrecs, (u_long)ri->pgno); + break; + case P_LBTREE: + case P_LRECNO: + case P_DUPLICATE: + switch (bk->type) { + case B_KEYDATA: + __db_pr(bk->data, bk->len); + break; + case B_DUPLICATE: + case B_OVERFLOW: + __db_proff(bk); + break; + default: + fprintf(fp, + "ILLEGAL DUPLICATE/LBTREE/LRECNO TYPE: %lu\n", + (u_long)bk->type); + ret = EINVAL; + break; + } + break; + } + } + (void)fflush(fp); + return (ret); +} + +/* + * __db_isbad + * -- Decide if a page is corrupted. + * + * PUBLIC: int __db_isbad __P((PAGE *, int)); + */ +int +__db_isbad(h, die) + PAGE *h; + int die; +{ + BINTERNAL *bi; + BKEYDATA *bk; + HKEYDATA *hkd; + FILE *fp; + db_indx_t i; + + bi = NULL; /* XXX: Shut the compiler up. */ + bk = NULL; + hkd = NULL; + + fp = __db_prinit(NULL); + + switch (TYPE(h)) { + case P_DUPLICATE: + case P_HASH: + case P_IBTREE: + case P_INVALID: + case P_IRECNO: + case P_LBTREE: + case P_LRECNO: + case P_OVERFLOW: + break; + default: + fprintf(fp, "ILLEGAL PAGE TYPE: page: %lu type: %lu\n", + (u_long)h->pgno, (u_long)TYPE(h)); + goto bad; + } + + for (i = 0; i < NUM_ENT(h); i++) { + if (P_ENTRY(h, i) - (u_int8_t *)h < P_OVERHEAD || + (size_t)(P_ENTRY(h, i) - (u_int8_t *)h) >= set_psize) { + fprintf(fp, + "ILLEGAL PAGE OFFSET: indx: %lu of %lu\n", + (u_long)i, (u_long)h->inp[i]); + goto bad; + } + switch (TYPE(h)) { + case P_HASH: + hkd = GET_HKEYDATA(h, i); + if (hkd->type != H_OFFDUP && + hkd->type != H_DUPLICATE && + hkd->type != H_KEYDATA && + hkd->type != H_OFFPAGE) { + fprintf(fp, "ILLEGAL HASH TYPE: %lu\n", + (u_long)hkd->type); + goto bad; + } + break; + case P_IBTREE: + bi = GET_BINTERNAL(h, i); + if (bi->type != B_KEYDATA && + bi->type != B_DUPLICATE && + bi->type != B_OVERFLOW) { + fprintf(fp, "ILLEGAL BINTERNAL TYPE: %lu\n", + (u_long)bi->type); + goto bad; + } + break; + case P_IRECNO: + case P_LBTREE: + case P_LRECNO: + break; + case P_DUPLICATE: + bk = GET_BKEYDATA(h, i); + if (bk->type != B_KEYDATA && + bk->type != B_DUPLICATE && + bk->type != B_OVERFLOW) { + fprintf(fp, + "ILLEGAL DUPLICATE/LBTREE/LRECNO TYPE: %lu\n", + (u_long)bk->type); + goto bad; + } + break; + default: + fprintf(fp, + "ILLEGAL PAGE ITEM: %lu\n", (u_long)TYPE(h)); + goto bad; + } + } + return (0); + +bad: if (die) { + abort(); + /* NOTREACHED */ + } + return (1); +} + +/* + * __db_pr -- + * Print out a data element. + * + * PUBLIC: void __db_pr __P((u_int8_t *, u_int32_t)); + */ +void +__db_pr(p, len) + u_int8_t *p; + u_int32_t len; +{ + FILE *fp; + int i, lastch; + + fp = __db_prinit(NULL); + + fprintf(fp, "len: %3lu", (u_long)len); + lastch = '.'; + if (len != 0) { + fprintf(fp, " data: "); + for (i = len <= 20 ? len : 20; i > 0; --i, ++p) { + lastch = *p; + if (isprint(*p) || *p == '\n') + fprintf(fp, "%c", *p); + else + fprintf(fp, "%#x", (u_int)*p); + } + if (len > 20) { + fprintf(fp, "..."); + lastch = '.'; + } + } + if (lastch != '\n') + fprintf(fp, "\n"); +} + +/* + * __db_proff -- + * Print out an off-page element. + */ +static void +__db_proff(vp) + void *vp; +{ + FILE *fp; + BOVERFLOW *p; + + fp = __db_prinit(NULL); + + p = vp; + switch (p->type) { + case B_OVERFLOW: + fprintf(fp, "overflow: total len: %4lu page: %4lu\n", + (u_long)p->tlen, (u_long)p->pgno); + break; + case B_DUPLICATE: + fprintf(fp, "duplicate: page: %4lu\n", (u_long)p->pgno); + break; + } +} + +/* + * __db_prflags -- + * Print out flags values. + * + * PUBLIC: void __db_prflags __P((u_int32_t, const FN *)); + */ +void +__db_prflags(flags, fn) + u_int32_t flags; + FN const *fn; +{ + FILE *fp; + const FN *fnp; + int found; + const char *sep; + + fp = __db_prinit(NULL); + + sep = " ("; + for (found = 0, fnp = fn; fnp->mask != 0; ++fnp) + if (fnp->mask & flags) { + fprintf(fp, "%s%s", sep, fnp->name); + sep = ", "; + found = 1; + } + if (found) + fprintf(fp, ")"); +} + +/* + * __db_psize -- + * Get the page size. + */ +static void +__db_psize(mpf) + DB_MPOOLFILE *mpf; +{ + BTMETA *mp; + db_pgno_t pgno; + + set_psize = PSIZE_BOUNDARY - 1; + + pgno = PGNO_METADATA; + if (memp_fget(mpf, &pgno, 0, &mp) != 0) + return; + + switch (mp->magic) { + case DB_BTREEMAGIC: + case DB_HASHMAGIC: + set_psize = mp->pagesize; + break; + } + (void)memp_fput(mpf, mp, 0); +} diff --git a/db2/db/db_rec.c b/db2/db/db_rec.c new file mode 100644 index 0000000000..900b0ed579 --- /dev/null +++ b/db2/db/db_rec.c @@ -0,0 +1,623 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)db_rec.c 10.8 (Sleepycat) 8/22/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#endif +#include <ctype.h> +#include <errno.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "db_int.h" +#include "shqueue.h" +#include "db_page.h" +#include "db_dispatch.h" +#include "log.h" +#include "hash.h" +#include "btree.h" + +/* + * PUBLIC: int __db_addrem_recover + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + * + * This log message is generated whenever we add or remove a duplicate + * to/from a duplicate page. On recover, we just do the opposite. + */ +int +__db_addrem_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __db_addrem_args *argp; + DB *file_dbp, *mdbp; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int change, cmp_n, cmp_p, ret; + + REC_PRINT(__db_addrem_print); + REC_INTRO(__db_addrem_read); + + if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + if (!redo) { + /* + * We are undoing and the page doesn't exist. That + * is equivalent to having a pagelsn of 0, so we + * would not have to undo anything. In this case, + * don't bother creating a page. + */ + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } else + if ((ret = memp_fget(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } + + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + change = 0; + if ((cmp_p == 0 && redo && argp->opcode == DB_ADD_DUP) || + (cmp_n == 0 && !redo && argp->opcode == DB_REM_DUP)) { + + /* Need to redo an add, or undo a delete. */ + if ((ret = __db_pitem(file_dbp, pagep, argp->indx, argp->nbytes, + argp->hdr.size == 0 ? NULL : &argp->hdr, + argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0) + goto out; + + change = DB_MPOOL_DIRTY; + + } else if ((cmp_n == 0 && !redo && argp->opcode == DB_ADD_DUP) || + (cmp_p == 0 && redo && argp->opcode == DB_REM_DUP)) { + /* Need to undo an add, or redo a delete. */ + if ((ret = __db_ditem(file_dbp, pagep, argp->indx, + argp->nbytes)) != 0) + goto out; + change = DB_MPOOL_DIRTY; + } + + if (change) + if (redo) + LSN(pagep) = *lsnp; + else + LSN(pagep) = argp->pagelsn; + + if ((ret = memp_fput(mpf, pagep, change)) == 0) + *lsnp = argp->prev_lsn; + +out: REC_CLOSE; +} + +/* + * PUBLIC: int __db_split_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__db_split_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __db_split_args *argp; + DB *file_dbp, *mdbp; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int change, cmp_n, cmp_p, ret; + + REC_PRINT(__db_split_print); + REC_INTRO(__db_split_read); + + if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + if (!redo) { + /* + * We are undoing and the page doesn't exist. That + * is equivalent to having a pagelsn of 0, so we + * would not have to undo anything. In this case, + * don't bother creating a page. + */ + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } else + if ((ret = memp_fget(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } + + /* + * There are two types of log messages here, one for the old page + * and one for the new pages created. The original image in the + * SPLITOLD record is used for undo. The image in the SPLITNEW + * is used for redo. We should never have a case where there is + * a redo operation and the SPLITOLD record is on disk, but not + * the SPLITNEW record. Therefore, we only redo NEW messages + * and only undo OLD messages. + */ + + change = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + if (cmp_p == 0 && redo) { + if (argp->opcode == DB_SPLITNEW) { + /* Need to redo the split described. */ + memcpy(pagep, + argp->pageimage.data, argp->pageimage.size); + } + LSN(pagep) = *lsnp; + change = DB_MPOOL_DIRTY; + } else if (cmp_n == 0 && !redo) { + if (argp->opcode == DB_SPLITOLD) { + /* Put back the old image. */ + memcpy(pagep, + argp->pageimage.data, argp->pageimage.size); + } + LSN(pagep) = argp->pagelsn; + change = DB_MPOOL_DIRTY; + } + if ((ret = memp_fput(mpf, pagep, change)) == 0) + *lsnp = argp->prev_lsn; + +out: REC_CLOSE; +} + +/* + * PUBLIC: int __db_big_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__db_big_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __db_big_args *argp; + DB *file_dbp, *mdbp; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int change, cmp_n, cmp_p, ret; + + REC_PRINT(__db_big_print); + REC_INTRO(__db_big_read); + + if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + if (!redo) { + /* + * We are undoing and the page doesn't exist. That + * is equivalent to having a pagelsn of 0, so we + * would not have to undo anything. In this case, + * don't bother creating a page. + */ + ret = 0; + goto ppage; + } else + if ((ret = memp_fget(mpf, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } + + /* + * There are three pages we need to check. The one on which we are + * adding data, the previous one whose next_pointer may have + * been updated, and the next one whose prev_pointer may have + * been updated. + */ + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + change = 0; + if ((cmp_p == 0 && redo && argp->opcode == DB_ADD_BIG) || + (cmp_n == 0 && !redo && argp->opcode == DB_REM_BIG)) { + /* We are either redo-ing an add, or undoing a delete. */ + P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno, + argp->next_pgno, 0, P_OVERFLOW); + OV_LEN(pagep) = argp->dbt.size; + OV_REF(pagep) = 1; + memcpy((u_int8_t *)pagep + P_OVERHEAD, argp->dbt.data, + argp->dbt.size); + PREV_PGNO(pagep) = argp->prev_pgno; + change = DB_MPOOL_DIRTY; + } else if ((cmp_n == 0 && !redo && argp->opcode == DB_ADD_BIG) || + (cmp_p == 0 && redo && argp->opcode == DB_REM_BIG)) { + /* + * We are either undo-ing an add or redo-ing a delete. + * The page is about to be reclaimed in either case, so + * there really isn't anything to do here. + */ + change = DB_MPOOL_DIRTY; + } + if (change) + LSN(pagep) = redo ? *lsnp : argp->pagelsn; + + if ((ret = memp_fput(mpf, pagep, change)) != 0) + goto out; + + /* Now check the previous page. */ +ppage: if (argp->prev_pgno != PGNO_INVALID) { + change = 0; + if ((ret = memp_fget(mpf, &argp->prev_pgno, 0, &pagep)) != 0) + if (!redo) { + /* + * We are undoing and the page doesn't exist. + * That is equivalent to having a pagelsn of 0, + * so we would not have to undo anything. In + * this case, don't bother creating a page. + */ + *lsnp = argp->prev_lsn; + ret = 0; + goto npage; + } else + if ((ret = memp_fget(mpf, &argp->prev_pgno, + DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->prevlsn); + + if ((cmp_p == 0 && redo && argp->opcode == DB_ADD_BIG) || + (cmp_n == 0 && !redo && argp->opcode == DB_REM_BIG)) { + /* Redo add, undo delete. */ + NEXT_PGNO(pagep) = argp->pgno; + change = DB_MPOOL_DIRTY; + } else if ((cmp_n == 0 && + !redo && argp->opcode == DB_ADD_BIG) || + (cmp_p == 0 && redo && argp->opcode == DB_REM_BIG)) { + /* Redo delete, undo add. */ + NEXT_PGNO(pagep) = argp->next_pgno; + change = DB_MPOOL_DIRTY; + } + if (change) + LSN(pagep) = redo ? *lsnp : argp->prevlsn; + if ((ret = memp_fput(mpf, pagep, change)) != 0) + goto out; + } + + /* Now check the next page. Can only be set on a delete. */ +npage: if (argp->next_pgno != PGNO_INVALID) { + change = 0; + if ((ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0) + if (!redo) { + /* + * We are undoing and the page doesn't exist. + * That is equivalent to having a pagelsn of 0, + * so we would not have to undo anything. In + * this case, don't bother creating a page. + */ + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } else + if ((ret = memp_fget(mpf, &argp->next_pgno, + DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->nextlsn); + if (cmp_p == 0 && redo) { + PREV_PGNO(pagep) = PGNO_INVALID; + change = DB_MPOOL_DIRTY; + } else if (cmp_n == 0 && !redo) { + PREV_PGNO(pagep) = argp->pgno; + change = DB_MPOOL_DIRTY; + } + if (change) + LSN(pagep) = redo ? *lsnp : argp->nextlsn; + if ((ret = memp_fput(mpf, pagep, change)) != 0) + goto out; + } + + *lsnp = argp->prev_lsn; + +out: REC_CLOSE; +} + +/* + * __db_ovref_recover -- + * Recovery function for __db_ioff(). + * + * PUBLIC: int __db_ovref_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__db_ovref_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __db_ovref_args *argp; + DB *file_dbp, *mdbp; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int modified, ret; + + REC_PRINT(__db_ovref_print); + REC_INTRO(__db_ovref_read); + + if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + (void)__db_pgerr(file_dbp, argp->pgno); + goto out; + } + + modified = 0; + if (log_compare(lsnp, &argp->lsn) == 0 && redo) { + /* Need to redo update described. */ + ++OV_REF(pagep); + + pagep->lsn = *lsnp; + modified = 1; + } else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) { + /* Need to undo update described. */ + --OV_REF(pagep); + + pagep->lsn = argp->lsn; + modified = 1; + } + ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0); + + *lsnp = argp->prev_lsn; + +out: REC_CLOSE; +} + +/* + * __db_relink_recover -- + * Recovery function for relink. + * + * PUBLIC: int __db_relink_recover + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__db_relink_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __db_relink_args *argp; + DB *file_dbp, *mdbp; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int modified, ret; + + REC_PRINT(__db_relink_print); + REC_INTRO(__db_relink_read); + + /* + * There are three pages we need to check -- the page, and the + * previous and next pages, if they existed. + */ + if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + if (redo) { + (void)__db_pgerr(file_dbp, argp->pgno); + goto out; + } + goto next; + } + modified = 0; + if (log_compare(lsnp, &argp->lsn) == 0 && redo) { + /* Redo the relink. */ + pagep->lsn = *lsnp; + modified = 1; + } else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) { + /* Undo the relink. */ + pagep->next_pgno = argp->next; + pagep->prev_pgno = argp->prev; + + pagep->lsn = argp->lsn; + modified = 1; + } + if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) { + (void)__db_panic(file_dbp); + goto out; + } + +next: if ((ret = memp_fget(mpf, &argp->next, 0, &pagep)) != 0) { + if (redo) { + (void)__db_pgerr(file_dbp, argp->next); + goto out; + } + goto prev; + } + modified = 0; + if (log_compare(lsnp, &argp->lsn_next) == 0 && redo) { + /* Redo the relink. */ + pagep->prev_pgno = argp->prev; + + pagep->lsn = *lsnp; + modified = 1; + } else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) { + /* Undo the relink. */ + pagep->prev_pgno = argp->pgno; + + pagep->lsn = argp->lsn_next; + modified = 1; + } + if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) { + (void)__db_panic(file_dbp); + goto out; + } + +prev: if ((ret = memp_fget(mpf, &argp->prev, 0, &pagep)) != 0) { + if (redo) { + (void)__db_pgerr(file_dbp, argp->prev); + goto out; + } + goto done; + } + modified = 0; + if (log_compare(lsnp, &argp->lsn_prev) == 0 && redo) { + /* Redo the relink. */ + pagep->next_pgno = argp->next; + + pagep->lsn = *lsnp; + modified = 1; + } else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) { + /* Undo the relink. */ + pagep->next_pgno = argp->pgno; + + pagep->lsn = argp->lsn_prev; + modified = 1; + } + if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) { + (void) __db_panic(file_dbp); + goto out; + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: REC_CLOSE; +} + +/* + * PUBLIC: int __db_addpage_recover + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__db_addpage_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __db_addpage_args *argp; + DB *file_dbp, *mdbp; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int change, cmp_n, cmp_p, ret; + + REC_PRINT(__db_addpage_print); + REC_INTRO(__db_addpage_read); + + /* + * We need to check two pages: the old one and the new one onto + * which we're going to add duplicates. Do the old one first. + */ + if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) + goto out; + + change = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->lsn); + if (cmp_p == 0 && redo) { + NEXT_PGNO(pagep) = argp->nextpgno; + + LSN(pagep) = *lsnp; + change = DB_MPOOL_DIRTY; + } else if (cmp_n == 0 && !redo) { + NEXT_PGNO(pagep) = PGNO_INVALID; + + LSN(pagep) = argp->lsn; + change = DB_MPOOL_DIRTY; + } + if ((ret = memp_fput(mpf, pagep, change)) != 0) + goto out; + + if ((ret = memp_fget(mpf, &argp->nextpgno, 0, &pagep)) != 0) + if (!redo) { + /* + * We are undoing and the page doesn't exist. That + * is equivalent to having a pagelsn of 0, so we + * would not have to undo anything. In this case, + * don't bother creating a page. + */ + ret = 0; + goto out; + } else + if ((ret = memp_fget(mpf, + &argp->nextpgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + change = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->nextlsn); + if (cmp_p == 0 && redo) { + PREV_PGNO(pagep) = argp->pgno; + + LSN(pagep) = *lsnp; + change = DB_MPOOL_DIRTY; + } else if (cmp_n == 0 && !redo) { + PREV_PGNO(pagep) = PGNO_INVALID; + + LSN(pagep) = argp->nextlsn; + change = DB_MPOOL_DIRTY; + } + ret = memp_fput(mpf, pagep, change); + +out: if (ret == 0) + *lsnp = argp->prev_lsn; + REC_CLOSE; +} + +/* + * __db_debug_recover -- + * Recovery function for debug. + * + * PUBLIC: int __db_debug_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__db_debug_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __db_debug_args *argp; + int ret; + + REC_PRINT(__db_debug_print); + REC_NOOP_INTRO(__db_debug_read); + + *lsnp = argp->prev_lsn; + ret = 0; + + REC_NOOP_CLOSE; +} + +/* + * __db_noop_recover -- + * Recovery function for noop. + * + * PUBLIC: int __db_noop_recover + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__db_noop_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __db_noop_args *argp; + int ret; + + REC_PRINT(__db_noop_print); + REC_NOOP_INTRO(__db_noop_read); + + *lsnp = argp->prev_lsn; + ret = 0; + + REC_NOOP_CLOSE; +} diff --git a/db2/db/db_ret.c b/db2/db/db_ret.c new file mode 100644 index 0000000000..ddeb26eb94 --- /dev/null +++ b/db2/db/db_ret.c @@ -0,0 +1,149 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)db_ret.c 10.5 (Sleepycat) 7/12/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "btree.h" +#include "hash.h" +#include "db_am.h" + +/* + * __db_ret -- + * Build return DBT. + * + * PUBLIC: int __db_ret __P((DB *, + * PUBLIC: PAGE *, u_int32_t, DBT *, void **, u_int32_t *)); + */ +int +__db_ret(dbp, h, indx, dbt, memp, memsize) + DB *dbp; + PAGE *h; + u_int32_t indx; + DBT *dbt; + void **memp; + u_int32_t *memsize; +{ + BKEYDATA *bk; + HOFFPAGE ho; + BOVERFLOW *bo; + u_int32_t len; + void *data, *hk; + + switch (TYPE(h)) { + case P_HASH: + hk = P_ENTRY(h, indx); + if (((HKEYDATA *)hk)->type == H_OFFPAGE) { + memcpy(&ho, hk, sizeof(HOFFPAGE)); + return (__db_goff(dbp, dbt, + ho.tlen, ho.pgno, memp, memsize)); + } + len = LEN_HKEYDATA(h, dbp->pgsize, indx); + data = ((HKEYDATA *)hk)->data; + break; + case P_DUPLICATE: + case P_LBTREE: + case P_LRECNO: + bk = GET_BKEYDATA(h, indx); + if (bk->type == B_OVERFLOW) { + bo = (BOVERFLOW *)bk; + return (__db_goff(dbp, dbt, + bo->tlen, bo->pgno, memp, memsize)); + } + len = bk->len; + data = bk->data; + break; + default: + return (__db_pgfmt(dbp, h->pgno)); + } + + return (__db_retcopy(dbt, data, len, memp, memsize, + F_ISSET(dbt, DB_DBT_INTERNAL) ? NULL : dbp->db_malloc)); +} + +/* + * __db_retcopy -- + * Copy the returned data into the user's DBT, handling special flags. + * + * PUBLIC: int __db_retcopy __P((DBT *, + * PUBLIC: void *, u_int32_t, void **, u_int32_t *, void *(*)(size_t))); + */ +int +__db_retcopy(dbt, data, len, memp, memsize, db_malloc) + DBT *dbt; + void *data; + u_int32_t len; + void **memp; + u_int32_t *memsize; + void *(*db_malloc) __P((size_t)); +{ + /* If returning a partial record, reset the length. */ + if (F_ISSET(dbt, DB_DBT_PARTIAL)) { + data = (u_int8_t *)data + dbt->doff; + if (len > dbt->doff) { + len -= dbt->doff; + if (len > dbt->dlen) + len = dbt->dlen; + } else + len = 0; + } + + /* + * Return the length of the returned record in the DBT size field. + * This satisfies the requirement that if we're using user memory + * and insufficient memory was provided, return the amount necessary + * in the size field. + */ + dbt->size = len; + + /* + * Allocate any necessary memory. + * + * XXX: Never allocate 0 bytes. + */ + if (F_ISSET(dbt, DB_DBT_MALLOC)) { + dbt->data = db_malloc == NULL ? + (void *)malloc(len + 1) : + (void *)db_malloc(len + 1); + if (dbt->data == NULL) + return (ENOMEM); + } else if (F_ISSET(dbt, DB_DBT_USERMEM)) { + if (dbt->ulen < len) + return (ENOMEM); + } else if (memp == NULL || memsize == NULL) { + return (EINVAL); + } else { + if (*memsize == 0 || *memsize < len) { + *memp = *memp == NULL ? + (void *)malloc(len + 1) : + (void *)realloc(*memp, len + 1); + if (*memp == NULL) { + *memsize = 0; + return (ENOMEM); + } + *memsize = len + 1; + } + dbt->data = *memp; + } + + memcpy(dbt->data, data, len); + return (0); +} diff --git a/db2/db/db_thread.c b/db2/db/db_thread.c new file mode 100644 index 0000000000..e956e809d9 --- /dev/null +++ b/db2/db/db_thread.c @@ -0,0 +1,125 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)db_thread.c 8.11 (Sleepycat) 8/18/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "shqueue.h" +#include "db_am.h" + +static int __db_getlockid __P((DB *, DB *)); + +/* + * __db_gethandle -- + * Called by db access method routines when the DB_THREAD flag is set. + * This routine returns a handle, either an existing handle from the + * chain of handles, or creating one if necessary. + * + * PUBLIC: int __db_gethandle __P((DB *, int (*)(DB *, DB *), DB **)); + */ +int +__db_gethandle(dbp, am_func, dbpp) + DB *dbp, **dbpp; + int (*am_func) __P((DB *, DB *)); +{ + DB *ret_dbp; + int ret, t_ret; + + if ((ret = __db_mutex_lock((db_mutex_t *)dbp->mutex, -1, + dbp->dbenv == NULL ? NULL : dbp->dbenv->db_yield)) != 0) + return (ret); + + if ((ret_dbp = LIST_FIRST(&dbp->handleq)) != NULL) + /* Simply take one off the list. */ + LIST_REMOVE(ret_dbp, links); + else { + /* Allocate a new handle. */ + if ((ret_dbp = (DB *)malloc(sizeof(*dbp))) == NULL) { + ret = ENOMEM; + goto err; + } + memcpy(ret_dbp, dbp, sizeof(*dbp)); + ret_dbp->internal = NULL; + TAILQ_INIT(&ret_dbp->curs_queue); + + /* Set the locker, the lock structure and the lock DBT. */ + if ((ret = __db_getlockid(dbp, ret_dbp)) != 0) + goto err; + + /* Finally, call the access method specific dup function. */ + if ((ret = am_func(dbp, ret_dbp)) != 0) + goto err; + } + + *dbpp = ret_dbp; + + if (0) { +err: if (ret_dbp != NULL) + FREE(ret_dbp, sizeof(*ret_dbp)); + } + if ((t_ret = + __db_mutex_unlock((db_mutex_t *)dbp->mutex, -1)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * __db_puthandle -- + * Return a DB handle to the pool for later use. + * + * PUBLIC: int __db_puthandle __P((DB *)); + */ +int +__db_puthandle(dbp) + DB *dbp; +{ + DB *master; + int ret; + + master = dbp->master; + if ((ret = __db_mutex_lock((db_mutex_t *)master->mutex, -1, + dbp->dbenv == NULL ? NULL : dbp->dbenv->db_yield)) != 0) + return (ret); + + LIST_INSERT_HEAD(&master->handleq, dbp, links); + + return (__db_mutex_unlock((db_mutex_t *)master->mutex, -1)); +} + +/* + * __db_getlockid -- + * Create a new locker ID and copy the file lock information from + * the old DB into the new one. + */ +static int +__db_getlockid(dbp, new_dbp) + DB *dbp, *new_dbp; +{ + int ret; + + if (F_ISSET(dbp, DB_AM_LOCKING)) { + if ((ret = lock_id(dbp->dbenv->lk_info, &new_dbp->locker)) != 0) + return (ret); + memcpy(new_dbp->lock.fileid, dbp->lock.fileid, DB_FILE_ID_LEN); + new_dbp->lock_dbt.size = sizeof(new_dbp->lock); + new_dbp->lock_dbt.data = &new_dbp->lock; + } + return (0); +} |