From bf7997b65c7887d2acda95f5201d818a19d81711 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 9 Jun 1998 15:16:55 +0000 Subject: Update. 1998-06-09 Ulrich Drepper * sysdeps/unix/sysv/linux/netinet/ip.h (struct ip_options): Define __data member only for gcc. Reported by ak@muc.de. * misc/mntent.h: Undo last patch. * sysdeps/unix/sysv/linux/fstatvfs.c (fstatvfs): Undo last patch. * misc/tst/mntent.c: Adjust code for this change. * io/fts.c: Updated from a slightly more recent BSD version. * io/fts.h: Likewise. * libc.map: Add __libc_stack_end. * db2/Makefile (routines): Add lock_region. * db2/config.h: Update from db-2.4.14. * db2/db.h: Likewise. * db2/db_185.h: Likewise. * db2/db_int.h: Likewise. * db2/bt_close.c: Likewise. * db2/bt_compare.c: Likewise. * db2/bt_conv.c: Likewise. * db2/bt_cursor.c: Likewise. * db2/bt_delete.c: Likewise. * db2/bt_open.c: Likewise. * db2/bt_page.c: Likewise. * db2/bt_put.c: Likewise. * db2/bt_rec.c: Likewise. * db2/bt_recno.c: Likewise. * db2/bt_rsearch.c: Likewise. * db2/bt_search.c: Likewise. * db2/bt_split.c: Likewise. * db2/bt_stat.c: Likewise. * db2/btree.src: Likewise. * db2/btree_auto.c: Likewise. * db2/getlong.c: Likewise. * db2/db_appinit.c: Likewise. * db2/db_apprec.c: Likewise. * db2/db_byteorder.c: Likewise. * db2/db_err.c: Likewise. * db2/db_log2.c: Likewise. * db2/db_region.c: Likewise. * db2/db_salloc.c: Likewise. * db2/db_shash.c: Likewise. * db2/db.c: Likewise. * db2/db.src: Likewise. * db2/db_auto.c: Likewise. * db2/db_conv.c: Likewise. * db2/db_dispatch.c: Likewise. * db2/db_dup.c: Likewise. * db2/db_overflow.c: Likewise. * db2/db_pr.c: Likewise. * db2/db_rec.c: Likewise. * db2/db_ret.c: Likewise. * db2/db_thread.c: Likewise. * db2/db185.c: Likewise. * db2/db185_int.h: Likewise. * db2/dbm.c: Likewise. * db2/hash.c: Likewise. * db2/hash.src: Likewise. * db2/hash_auto.c: Likewise. * db2/hash_conv.c: Likewise. * db2/hash_debug.c: Likewise. * db2/hash_dup.c: Likewise. * db2/hash_func.c: Likewise. * db2/hash_page.c: Likewise. * db2/hash_rec.c: Likewise. * db2/hash_stat.c: Likewise. * db2/btree.h: Likewise. * db2/btree_ext.h: Likewise. * db2/clib_ext.h: Likewise. * db2/common_ext.h: Likewise. * db2/cxx_int.h: Likewise. * db2/db.h.src: Likewise. * db2/db_185.h.src: Likewise. * db2/db_am.h: Likewise. * db2/db_auto.h: Likewise. * db2/db_cxx.h: Likewise. * db2/db_dispatch.h: Likewise. * db2/db_ext.h: Likewise. * db2/db_int.h.src: Likewise. * db2/db_page.h: Likewise. * db2/db_shash.h: Likewise. * db2/db_swap.h: Likewise. * db2/hash.h: Likewise. * db2/hash_ext.h: Likewise. * db2/lock.h: Likewise. * db2/lock_ext.h: Likewise. * db2/log.h: Likewise. * db2/log_ext.h: Likewise. * db2/mp.h: Likewise. * db2/mp_ext.h: Likewise. * db2/mutex_ext.h: Likewise. * db2/os_ext.h: Likewise. * db2/os_func.h: Likewise. * db2/queue.h: Likewise. * db2/shqueue.h: Likewise. * db2/txn.h: Likewise. * db2/lock.c: Likewise. * db2/lock_conflict.c: Likewise. * db2/lock_deadlock.c: Likewise. * db2/lock_region.c: Likewise. * db2/lock_util.c: Likewise. * db2/log.c: Likewise. * db2/log.src: Likewise. * db2/log_archive.c: Likewise. * db2/log_auto.c: Likewise. * db2/log_compare.c: Likewise. * db2/log_findckp.c: Likewise. * db2/log_get.c: Likewise. * db2/log_put.c: Likewise. * db2/log_rec.c: Likewise. * db2/log_register.c: Likewise. * db2/mp_bh.c: Likewise. * db2/mp_fget.c: Likewise. * db2/mp_fopen.c: Likewise. * db2/mp_fput.c: Likewise. * db2/mp_fset.c: Likewise. * db2/mp_open.c: Likewise. * db2/mp_pr.c: Likewise. * db2/mp_region.c: Likewise. * db2/mp_sync.c: Likewise. * db2/68020.gcc: Likewise. * db2/mutex.c: Likewise. * db2/parisc.gcc: Likewise. * db2/parisc.hp: Likewise. * db2/sco.cc: Likewise. * db2/os_abs.c: Likewise. * db2/os_alloc.c: Likewise. * db2/os_config.c: Likewise. * db2/os_dir.c: Likewise. * db2/os_fid.c: Likewise. * db2/os_fsync.c: Likewise. * db2/os_map.c: Likewise. * db2/os_oflags.c: Likewise. * db2/os_open.c: Likewise. * db2/os_rpath.c: Likewise. * db2/os_rw.c: Likewise. * db2/os_seek.c: Likewise. * db2/os_sleep.c: Likewise. * db2/os_spin.c: Likewise. * db2/os_stat.c: Likewise. * db2/os_unlink.c: Likewise. * db2/db_archive.c: Likewise. * db2/db_checkpoint.c: Likewise. * db2/db_deadlock.c: Likewise. * db2/db_dump.c: Likewise. * db2/db_dump185.c: Likewise. * db2/db_load.c: Likewise. * db2/db_printlog.c: Likewise. * db2/db_recover.c: Likewise. * db2/db_stat.c: Likewise. * db2/txn.c: Likewise. * db2/txn.src: Likewise. * db2/txn_auto.c: Likewise. * db2/txn_rec.c: Likewise. * elf/rtld.c: Move definition of __libc_stack_end to ... * sysdeps/generic/dl-sysdep.h: ...here. * sysdeps/unix/sysv/linux/fstatvfs.c: Handle nodiratime option. * sysdeps/unix/sysv/linux/bits/statvfs.h: Define ST_NODIRATIME. * sysdeps/unix/sysv/linux/sys/mount.h: Define MS_NODIRATIME. 1998-06-08 21:44 Ulrich Drepper * sysdeps/unix/sysv/linux/fstatvfs.c: Handle constant option string from mntent correctly. 1998-06-06 Andreas Jaeger * sunrpc/Makefile (generated): Correct typo. 1998-06-04 Philip Blundell * elf/elf.h (EM_ARM, et al.): New definitions. * sysdeps/arm/dl-machine.h: Update for new draft ARM ELF ABI. --- db2/mp/mp_bh.c | 79 +++++++----- db2/mp/mp_fget.c | 359 +++++++++++++++++++++++++++-------------------------- db2/mp/mp_fopen.c | 128 ++++++++++--------- db2/mp/mp_fput.c | 64 +++++----- db2/mp/mp_fset.c | 8 +- db2/mp/mp_open.c | 41 +++--- db2/mp/mp_pr.c | 294 +++++++++++++++++++------------------------ db2/mp/mp_region.c | 229 +++++++++++++--------------------- db2/mp/mp_sync.c | 74 ++++++++--- 9 files changed, 640 insertions(+), 636 deletions(-) (limited to 'db2/mp') diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c index c23abdda24..d89f9c2ded 100644 --- a/db2/mp/mp_bh.c +++ b/db2/mp/mp_bh.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_bh.c 10.28 (Sleepycat) 1/8/98"; +static const char sccsid[] = "@(#)mp_bh.c 10.38 (Sleepycat) 5/20/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -59,8 +59,10 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep) dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) if (dbmfp->mfp == mfp) { if (F_ISSET(dbmfp, MP_READONLY) && - __memp_upgrade(dbmp, dbmfp, mfp)) + __memp_upgrade(dbmp, dbmfp, mfp)) { + UNLOCKHANDLE(dbmp, dbmp->mutexp); return (0); + } break; } UNLOCKHANDLE(dbmp, dbmp->mutexp); @@ -111,8 +113,8 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep) if (F_ISSET(mfp, MP_TEMP)) return (0); - if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off), mfp->ftype, - 0, 0, mfp->stat.st_pagesize, 0, NULL, NULL, 0, &dbmfp) != 0) + if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off), + 0, 0, mfp->stat.st_pagesize, 0, NULL, &dbmfp) != 0) return (0); found: return (__memp_pgwrite(dbmfp, bhp, restartp, wrotep)); @@ -152,7 +154,7 @@ __memp_pgread(dbmfp, bhp, can_create) ret = 0; LOCKHANDLE(dbmp, dbmfp->mutexp); if (dbmfp->fd == -1 || (ret = - __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) { + __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, 0, SEEK_SET)) != 0) { if (!can_create) { if (dbmfp->fd == -1) ret = EINVAL; @@ -164,8 +166,17 @@ __memp_pgread(dbmfp, bhp, can_create) } UNLOCKHANDLE(dbmp, dbmfp->mutexp); - /* Clear any uninitialized data. */ - memset(bhp->buf, 0, pagesize); + /* Clear the created page. */ + if (mfp->clear_len == 0) + memset(bhp->buf, 0, pagesize); + else { + memset(bhp->buf, 0, mfp->clear_len); +#ifdef DIAGNOSTIC + memset(bhp->buf + mfp->clear_len, + 0xff, pagesize - mfp->clear_len); +#endif + } + goto pgin; } @@ -186,8 +197,16 @@ __memp_pgread(dbmfp, bhp, can_create) goto err; } - /* Clear any uninitialized data. */ - memset(bhp->buf + nr, 0, pagesize - nr); + /* + * If we didn't fail until we tried the read, don't clear the + * whole page, it wouldn't be insane for a filesystem to just + * always behave that way. Else, clear any uninitialized data. + */ + if (nr == 0) + memset(bhp->buf, 0, + mfp->clear_len == 0 ? pagesize : mfp->clear_len); + else + memset(bhp->buf + nr, 0, pagesize - nr); } /* Call any pgin function. */ @@ -308,31 +327,31 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep) /* Temporary files may not yet have been created. */ LOCKHANDLE(dbmp, dbmfp->mutexp); - if (dbmfp->fd == -1) - if ((ret = __db_appname(dbenv, DB_APP_TMP, - NULL, NULL, &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1) { - UNLOCKHANDLE(dbmp, dbmfp->mutexp); - __db_err(dbenv, - "unable to create temporary backing file"); - goto err; - } + if (dbmfp->fd == -1 && + ((ret = __db_appname(dbenv, DB_APP_TMP, NULL, NULL, + DB_CREATE | DB_EXCL | DB_TEMPORARY, &dbmfp->fd, NULL)) != 0 || + dbmfp->fd == -1)) { + UNLOCKHANDLE(dbmp, dbmfp->mutexp); + __db_err(dbenv, "unable to create temporary backing file"); + goto err; + } - /* Write the page out. */ - if ((ret = __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) + /* + * Write the page out. + * + * XXX + * Shut the compiler up; it doesn't understand the correlation between + * the failing clauses to __db_lseek and __db_write and this ret != 0. + */ + COMPQUIET(fail, NULL); + if ((ret = + __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, 0, SEEK_SET)) != 0) fail = "seek"; else if ((ret = __db_write(dbmfp->fd, bhp->buf, pagesize, &nw)) != 0) fail = "write"; UNLOCKHANDLE(dbmp, dbmfp->mutexp); - if (ret != 0) { - /* - * XXX - * Shut the compiler up; it doesn't understand the correlation - * between the failing clauses to __db_lseek and __db_write and - * this ret != 0. - */ - COMPQUIET(fail, NULL); + if (ret != 0) goto syserr; - } if (nw != (ssize_t)pagesize) { ret = EIO; @@ -548,7 +567,7 @@ __memp_upgrade(dbmp, dbmfp, mfp) * way we could have gotten a file descriptor of any kind. */ if ((ret = __db_appname(dbmp->dbenv, DB_APP_DATA, - NULL, R_ADDR(dbmp, mfp->path_off), NULL, &rpath)) != 0) + NULL, R_ADDR(dbmp, mfp->path_off), 0, NULL, &rpath)) != 0) return (ret); if (__db_open(rpath, 0, 0, 0, &fd) != 0) { F_SET(dbmfp, MP_UPGRADE_FAIL); diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c index f5955c4c6f..c8ae2e9d98 100644 --- a/db2/mp/mp_fget.c +++ b/db2/mp/mp_fget.c @@ -1,21 +1,19 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fget.c 10.33 (Sleepycat) 12/2/97"; +static const char sccsid[] = "@(#)mp_fget.c 10.48 (Sleepycat) 6/2/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include -#include #include -#include #include #endif @@ -25,8 +23,6 @@ static const char sccsid[] = "@(#)mp_fget.c 10.33 (Sleepycat) 12/2/97"; #include "mp.h" #include "common_ext.h" -int __sleep_on_every_page_get; /* XXX: thread debugging option. */ - /* * memp_fget -- * Get a page from the file. @@ -35,7 +31,7 @@ int memp_fget(dbmfp, pgnoaddr, flags, addrp) DB_MPOOLFILE *dbmfp; db_pgno_t *pgnoaddr; - int flags; + u_int32_t flags; void *addrp; { BH *bhp; @@ -43,11 +39,12 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) MPOOL *mp; MPOOLFILE *mfp; size_t bucket, mf_offset; - u_long cnt; - int b_incr, b_inserted, readonly_alloc, ret; - void *addr; + u_int32_t st_hsearch; + int b_incr, first, ret; dbmp = dbmfp->dbmp; + mp = dbmp->mp; + mfp = dbmfp->mfp; /* * Validate arguments. @@ -79,32 +76,62 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) } } -#ifdef DEBUG +#ifdef DIAGNOSTIC /* * XXX * We want to switch threads as often as possible. Sleep every time * we get a new page to make it more likely. */ - if (__sleep_on_every_page_get && + if (DB_GLOBAL(db_pageyield) && (__db_yield == NULL || __db_yield() != 0)) __db_sleep(0, 1); #endif - mp = dbmp->mp; - mfp = dbmfp->mfp; + /* Initialize remaining local variables. */ mf_offset = R_OFFSET(dbmp, mfp); - addr = NULL; bhp = NULL; - b_incr = b_inserted = ret = 0; + st_hsearch = 0; + b_incr = ret = 0; + + /* Determine the hash bucket where this page will live. */ + bucket = BUCKET(mp, mf_offset, *pgnoaddr); LOCKREGION(dbmp); /* - * If mmap'ing the file, just return a pointer. However, if another - * process has opened the file for writing since we mmap'd it, start - * playing the game by their rules, i.e. everything goes through the - * cache. All pages previously returned should be safe, as long as - * a locking protocol was observed. + * Check for the last or last + 1 page requests. + * + * Examine and update the file's last_pgno value. We don't care if + * the last_pgno value immediately changes due to another thread -- + * at this instant in time, the value is correct. We do increment the + * current last_pgno value if the thread is asking for a new page, + * however, to ensure that two threads creating pages don't get the + * same one. + */ + if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) { + if (LF_ISSET(DB_MPOOL_NEW)) + ++mfp->last_pgno; + *pgnoaddr = mfp->last_pgno; + bucket = BUCKET(mp, mf_offset, mfp->last_pgno); + + if (LF_ISSET(DB_MPOOL_NEW)) + goto alloc; + } + + /* + * If mmap'ing the file and the page is not past the end of the file, + * just return a pointer. + * + * The page may be past the end of the file, so check the page number + * argument against the original length of the file. If we previously + * returned pages past the original end of the file, last_pgno will + * have been updated to match the "new" end of the file, and checking + * against it would return pointers past the end of the mmap'd region. + * + * If another process has opened the file for writing since we mmap'd + * it, we will start playing the game by their rules, i.e. everything + * goes through the cache. All pages previously returned will be safe, + * as long as the correct locking protocol was observed. * * XXX * We don't discard the map because we don't know when all of the @@ -112,203 +139,180 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) * It would be possible to do so by reference counting the open * pages from the mmap, but it's unclear to me that it's worth it. */ - if (dbmfp->addr != NULL && F_ISSET(dbmfp->mfp, MP_CAN_MMAP)) { - readonly_alloc = 0; - if (LF_ISSET(DB_MPOOL_LAST)) - *pgnoaddr = mfp->last_pgno; - else { + if (dbmfp->addr != NULL && F_ISSET(mfp, MP_CAN_MMAP)) + if (*pgnoaddr > mfp->orig_last_pgno) { /* * !!! - * Allocate a page that can never really exist. See - * the comment above about non-existent pages and the - * hash access method. + * See the comment above about non-existent pages and + * the hash access method. */ - if (LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW)) - readonly_alloc = 1; - else if (*pgnoaddr > mfp->last_pgno) { + if (!LF_ISSET(DB_MPOOL_CREATE)) { __db_err(dbmp->dbenv, "%s: page %lu doesn't exist", __memp_fn(dbmfp), (u_long)*pgnoaddr); ret = EINVAL; goto err; } - } - if (!readonly_alloc) { - addr = R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize); - + } else { + *(void **)addrp = + R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize); ++mp->stat.st_map; ++mfp->stat.st_map; + goto done; + } - goto mapret; + /* Search the hash chain for the page. */ + for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) { + ++st_hsearch; + if (bhp->pgno != *pgnoaddr || bhp->mf_offset != mf_offset) + continue; + + /* Increment the reference count. */ + if (bhp->ref == UINT16_T_MAX) { + __db_err(dbmp->dbenv, + "%s: page %lu: reference count overflow", + __memp_fn(dbmfp), (u_long)bhp->pgno); + ret = EINVAL; + goto err; } - } - /* Check if requesting the last page or a new page. */ - if (LF_ISSET(DB_MPOOL_LAST)) - *pgnoaddr = mfp->last_pgno; + /* + * Increment the reference count. We may discard the region + * lock as we evaluate and/or read the buffer, so we need to + * ensure that it doesn't move and that its contents remain + * unchanged. + */ + ++bhp->ref; + b_incr = 1; - if (LF_ISSET(DB_MPOOL_NEW)) { - *pgnoaddr = mfp->last_pgno + 1; - goto alloc; - } + /* + * Any buffer we find might be trouble. + * + * BH_LOCKED -- + * I/O is in progress. Because we've incremented the buffer + * reference count, we know the buffer can't move. Unlock + * the region lock, wait for the I/O to complete, and reacquire + * the region. + */ + for (first = 1; F_ISSET(bhp, BH_LOCKED); first = 0) { + UNLOCKREGION(dbmp); - /* Check the BH hash bucket queue. */ - bucket = BUCKET(mp, mf_offset, *pgnoaddr); - for (cnt = 0, - bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) { - ++cnt; - if (bhp->pgno == *pgnoaddr && bhp->mf_offset == mf_offset) { - addr = bhp->buf; - ++mp->stat.st_hash_searches; - if (cnt > mp->stat.st_hash_longest) - mp->stat.st_hash_longest = cnt; - mp->stat.st_hash_examined += cnt; - goto found; + /* + * Explicitly yield the processor if it's not the first + * pass through this loop -- if we don't, we might end + * up running to the end of our CPU quantum as we will + * simply be swapping between the two locks. + */ + if (!first && (__db_yield == NULL || __db_yield() != 0)) + __db_sleep(0, 1); + + LOCKBUFFER(dbmp, bhp); + /* Wait for I/O to finish... */ + UNLOCKBUFFER(dbmp, bhp); + LOCKREGION(dbmp); } - } - if (cnt != 0) { - ++mp->stat.st_hash_searches; - if (cnt > mp->stat.st_hash_longest) - mp->stat.st_hash_longest = cnt; - mp->stat.st_hash_examined += cnt; + + /* + * BH_TRASH -- + * The contents of the buffer are garbage. Shouldn't happen, + * and this read is likely to fail, but might as well try. + */ + if (F_ISSET(bhp, BH_TRASH)) + goto reread; + + /* + * BH_CALLPGIN -- + * The buffer was converted so it could be written, and the + * contents need to be converted again. + */ + if (F_ISSET(bhp, BH_CALLPGIN)) { + if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0) + goto err; + F_CLR(bhp, BH_CALLPGIN); + } + + ++mp->stat.st_cache_hit; + ++mfp->stat.st_cache_hit; + *(void **)addrp = bhp->buf; + goto done; } -alloc: /* - * Allocate a new buffer header and data space, and mark the contents - * as useless. - */ +alloc: /* Allocate new buffer header and data space. */ if ((ret = __memp_ralloc(dbmp, sizeof(BH) - sizeof(u_int8_t) + mfp->stat.st_pagesize, NULL, &bhp)) != 0) goto err; - addr = bhp->buf; -#ifdef DEBUG - if ((ALIGNTYPE)addr & (sizeof(size_t) - 1)) { + +#ifdef DIAGNOSTIC + if ((ALIGNTYPE)bhp->buf & (sizeof(size_t) - 1)) { __db_err(dbmp->dbenv, "Internal error: BH data NOT size_t aligned."); - abort(); + ret = EINVAL; + goto err; } #endif + /* Initialize the BH fields. */ memset(bhp, 0, sizeof(BH)); LOCKINIT(dbmp, &bhp->mutex); + bhp->ref = 1; + bhp->pgno = *pgnoaddr; + bhp->mf_offset = mf_offset; /* * Prepend the bucket header to the head of the appropriate MPOOL * bucket hash list. Append the bucket header to the tail of the * MPOOL LRU chain. - * - * We have to do this before we read in the page so we can discard - * our region lock without screwing up the world. */ - bucket = BUCKET(mp, mf_offset, *pgnoaddr); SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, hq, __bh); SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q); - ++mp->stat.st_page_clean; - b_inserted = 1; - - /* Set the page number, and associated MPOOLFILE. */ - bhp->mf_offset = mf_offset; - bhp->pgno = *pgnoaddr; /* - * If we know we created the page, zero it out and continue. + * If we created the page, zero it out and continue. * * !!! - * Note: DB_MPOOL_NEW deliberately doesn't call the pgin function. + * Note: DB_MPOOL_NEW specifically doesn't call the pgin function. * If DB_MPOOL_CREATE is used, then the application's pgin function * has to be able to handle pages of 0's -- if it uses DB_MPOOL_NEW, * it can detect all of its page creates, and not bother. * * Otherwise, read the page into memory, optionally creating it if * DB_MPOOL_CREATE is set. - * - * Increment the reference count for created buffers, but importantly, - * increment the reference count for buffers we're about to read so - * that the buffer can't move. */ - ++bhp->ref; - b_incr = 1; + if (LF_ISSET(DB_MPOOL_NEW)) { + if (mfp->clear_len == 0) + memset(bhp->buf, 0, mfp->stat.st_pagesize); + else { + memset(bhp->buf, 0, mfp->clear_len); +#ifdef DIAGNOSTIC + memset(bhp->buf + mfp->clear_len, 0xff, + mfp->stat.st_pagesize - mfp->clear_len); +#endif + } - if (LF_ISSET(DB_MPOOL_NEW)) - memset(addr, 0, mfp->stat.st_pagesize); - else { + ++mp->stat.st_page_create; + ++mfp->stat.st_page_create; + } else { /* * It's possible for the read function to fail, which means - * that we fail as well. + * that we fail as well. Note, the __memp_pgread() function + * discards the region lock, so the buffer must be pinned + * down so that it cannot move and its contents are unchanged. */ reread: if ((ret = __memp_pgread(dbmfp, - bhp, LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))) != 0) - goto err; - - /* - * !!! - * The __memp_pgread call discarded and reacquired the region - * lock. Because the buffer reference count was incremented - * before the region lock was discarded the buffer can't move - * and its contents can't change. - */ - ++mp->stat.st_cache_miss; - ++mfp->stat.st_cache_miss; - } - - if (0) { -found: /* Increment the reference count. */ - if (bhp->ref == UINT16_T_MAX) { - __db_err(dbmp->dbenv, - "%s: too many references to page %lu", - __memp_fn(dbmfp), bhp->pgno); - ret = EINVAL; - goto err; - } - ++bhp->ref; - b_incr = 1; - - /* - * Any found buffer might be trouble. - * - * BH_LOCKED -- - * I/O in progress, wait for it to finish. Because the buffer - * reference count was incremented before the region lock was - * discarded we know the buffer can't move and its contents - * can't change. - */ - for (cnt = 0; F_ISSET(bhp, BH_LOCKED); ++cnt) { - UNLOCKREGION(dbmp); - + bhp, LF_ISSET(DB_MPOOL_CREATE))) != 0) { /* - * Sleep so that we don't simply spin, switching locks. - * (See the comment in include/mp.h.) + * !!! + * Discard the buffer unless another thread is waiting + * on our I/O to complete. Regardless, the header has + * the BH_TRASH flag set. */ - if (cnt != 0 && - (__db_yield == NULL || __db_yield() != 0)) - __db_sleep(0, 1); - - LOCKBUFFER(dbmp, bhp); - /* Waiting for I/O to finish... */ - UNLOCKBUFFER(dbmp, bhp); - LOCKREGION(dbmp); - } - - /* - * BH_TRASH -- - * The buffer is garbage. - */ - if (F_ISSET(bhp, BH_TRASH)) - goto reread; - - /* - * BH_CALLPGIN -- - * The buffer was written, and the contents need to be - * converted again. - */ - if (F_ISSET(bhp, BH_CALLPGIN)) { - if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0) - goto err; - F_CLR(bhp, BH_CALLPGIN); + if (bhp->ref == 1) + __memp_bhfree(dbmp, mfp, bhp, 1); + goto err; } - ++mp->stat.st_cache_hit; - ++mfp->stat.st_cache_hit; + ++mp->stat.st_cache_miss; + ++mfp->stat.st_cache_miss; } /* @@ -319,23 +323,30 @@ found: /* Increment the reference count. */ if (bhp->pgno > mfp->last_pgno) mfp->last_pgno = bhp->pgno; -mapret: LOCKHANDLE(dbmp, dbmfp->mutexp); + ++mp->stat.st_page_clean; + *(void **)addrp = bhp->buf; + +done: /* Update the chain search statistics. */ + if (st_hsearch) { + ++mp->stat.st_hash_searches; + if (st_hsearch > mp->stat.st_hash_longest) + mp->stat.st_hash_longest = st_hsearch; + mp->stat.st_hash_examined += st_hsearch; + } + + UNLOCKREGION(dbmp); + + LOCKHANDLE(dbmp, dbmfp->mutexp); ++dbmfp->pinref; UNLOCKHANDLE(dbmp, dbmfp->mutexp); - if (0) { -err: /* - * If no other process is already waiting on a created buffer, - * go ahead and discard it, it's not useful. - */ - if (b_incr) - --bhp->ref; - if (b_inserted && bhp->ref == 0) - __memp_bhfree(dbmp, mfp, bhp, 1); - } + return (0); +err: /* Discard our reference. */ + if (b_incr) + --bhp->ref; UNLOCKREGION(dbmp); - *(void **)addrp = addr; + *(void **)addrp = NULL; return (ret); } diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c index 0f41122373..a4cbac8d4e 100644 --- a/db2/mp/mp_fopen.c +++ b/db2/mp/mp_fopen.c @@ -1,24 +1,20 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fopen.c 10.37 (Sleepycat) 1/18/98"; +static const char sccsid[] = "@(#)mp_fopen.c 10.47 (Sleepycat) 5/4/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include -#include -#include #include -#include #include -#include #endif #include "db_int.h" @@ -28,22 +24,21 @@ static const char sccsid[] = "@(#)mp_fopen.c 10.37 (Sleepycat) 1/18/98"; #include "common_ext.h" static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *)); -static int __memp_mf_open __P((DB_MPOOL *, const char *, - int, size_t, db_pgno_t, int, DBT *, u_int8_t *, MPOOLFILE **)); +static int __memp_mf_open __P((DB_MPOOL *, + const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **)); /* * memp_fopen -- * Open a backing file for the memory pool. */ int -memp_fopen(dbmp, path, ftype, - flags, mode, pagesize, lsn_offset, pgcookie, fileid, retp) +memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp) DB_MPOOL *dbmp; const char *path; - int ftype, flags, mode, lsn_offset; + u_int32_t flags; + int mode; size_t pagesize; - DBT *pgcookie; - u_int8_t *fileid; + DB_MPOOL_FINFO *finfop; DB_MPOOLFILE **retp; { int ret; @@ -59,31 +54,31 @@ memp_fopen(dbmp, path, ftype, return (EINVAL); } - return (__memp_fopen(dbmp, NULL, path, ftype, - flags, mode, pagesize, lsn_offset, pgcookie, fileid, 1, retp)); + return (__memp_fopen(dbmp, + NULL, path, flags, mode, pagesize, 1, finfop, retp)); } /* * __memp_fopen -- * Open a backing file for the memory pool; internal version. * - * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, int, - * PUBLIC: int, int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **)); + * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, + * PUBLIC: u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **)); */ int -__memp_fopen(dbmp, mfp, path, - ftype, flags, mode, pagesize, lsn_offset, pgcookie, fileid, needlock, retp) +__memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp) DB_MPOOL *dbmp; MPOOLFILE *mfp; const char *path; - int ftype, flags, mode, lsn_offset, needlock; + u_int32_t flags; + int mode, needlock; size_t pagesize; - DBT *pgcookie; - u_int8_t *fileid; + DB_MPOOL_FINFO *finfop; DB_MPOOLFILE **retp; { DB_ENV *dbenv; DB_MPOOLFILE *dbmfp; + DB_MPOOL_FINFO finfo; db_pgno_t last_pgno; size_t size; u_int32_t mbytes, bytes; @@ -91,18 +86,34 @@ __memp_fopen(dbmp, mfp, path, u_int8_t idbuf[DB_FILE_ID_LEN]; char *rpath; - /* - * XXX - * If mfp is provided, the following arguments do NOT need to be - * specified: - * lsn_offset - * pgcookie - * fileid - */ dbenv = dbmp->dbenv; ret = 0; rpath = NULL; + /* + * If mfp is provided, we take the DB_MPOOL_FINFO information from + * the mfp. We don't bother initializing everything, because some + * of them are expensive to acquire. If no mfp is provided and the + * finfop argument is NULL, we default the values. + */ + if (finfop == NULL) { + memset(&finfo, 0, sizeof(finfo)); + if (mfp != NULL) { + finfo.ftype = mfp->ftype; + finfo.pgcookie = NULL; + finfo.fileid = NULL; + finfo.lsn_offset = mfp->lsn_off; + finfo.clear_len = mfp->clear_len; + } else { + finfo.ftype = 0; + finfo.pgcookie = NULL; + finfo.fileid = NULL; + finfo.lsn_offset = -1; + finfo.clear_len = 0; + } + finfop = &finfo; + } + /* Allocate and initialize the per-process structure. */ if ((dbmfp = (DB_MPOOLFILE *)__db_calloc(1, sizeof(DB_MPOOLFILE))) == NULL) { @@ -126,11 +137,11 @@ __memp_fopen(dbmp, mfp, path, } else { /* Get the real name for this file and open it. */ if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, path, NULL, &rpath)) != 0) + DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0) goto err; if ((ret = __db_open(rpath, - LF_ISSET(DB_CREATE | DB_RDONLY), DB_CREATE | DB_RDONLY, - mode, &dbmfp->fd)) != 0) { + LF_ISSET(DB_CREATE | DB_RDONLY), + DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) { __db_err(dbenv, "%s: %s", rpath, strerror(ret)); goto err; } @@ -156,12 +167,11 @@ __memp_fopen(dbmp, mfp, path, * don't use timestamps, otherwise there'd be no chance of any * other process joining the party. */ - if (mfp == NULL && fileid == NULL) { + if (finfop->fileid == NULL) { if ((ret = __db_fileid(dbenv, rpath, 0, idbuf)) != 0) goto err; - fileid = idbuf; + finfop->fileid = idbuf; } - FREES(rpath); } /* @@ -173,8 +183,8 @@ __memp_fopen(dbmp, mfp, path, LOCKREGION(dbmp); if (mfp == NULL) - ret = __memp_mf_open(dbmp, path, ftype, - pagesize, last_pgno, lsn_offset, pgcookie, fileid, &mfp); + ret = __memp_mf_open(dbmp, + path, pagesize, last_pgno, finfop, &mfp); else { ++mfp->ref; ret = 0; @@ -218,7 +228,7 @@ __memp_fopen(dbmp, mfp, path, F_CLR(mfp, MP_CAN_MMAP); if (path == NULL) F_CLR(mfp, MP_CAN_MMAP); - if (ftype != 0) + if (finfop->ftype != 0) F_CLR(mfp, MP_CAN_MMAP); if (LF_ISSET(DB_NOMMAP)) F_CLR(mfp, MP_CAN_MMAP); @@ -229,11 +239,14 @@ __memp_fopen(dbmp, mfp, path, dbmfp->addr = NULL; if (F_ISSET(mfp, MP_CAN_MMAP)) { dbmfp->len = size; - if (__db_map(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) { + if (__db_mapfile(rpath, + dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) { dbmfp->addr = NULL; F_CLR(mfp, MP_CAN_MMAP); } } + if (rpath != NULL) + FREES(rpath); LOCKHANDLE(dbmp, dbmp->mutexp); TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q); @@ -260,15 +273,12 @@ err: /* * Open an MPOOLFILE. */ static int -__memp_mf_open(dbmp, path, - ftype, pagesize, last_pgno, lsn_offset, pgcookie, fileid, retp) +__memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp) DB_MPOOL *dbmp; const char *path; - int ftype, lsn_offset; size_t pagesize; db_pgno_t last_pgno; - DBT *pgcookie; - u_int8_t *fileid; + DB_MPOOL_FINFO *finfop; MPOOLFILE **retp; { MPOOLFILE *mfp; @@ -286,12 +296,13 @@ __memp_mf_open(dbmp, path, mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { if (F_ISSET(mfp, MP_TEMP)) continue; - if (!memcmp(fileid, + if (!memcmp(finfop->fileid, R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) { - if (ftype != mfp->ftype || + if (finfop->clear_len != mfp->clear_len || + finfop->ftype != mfp->ftype || pagesize != mfp->stat.st_pagesize) { __db_err(dbmp->dbenv, - "%s: ftype or pagesize changed", + "%s: ftype, clear length or pagesize changed", path); return (EINVAL); } @@ -311,8 +322,9 @@ __memp_mf_open(dbmp, path, /* Initialize the structure. */ memset(mfp, 0, sizeof(MPOOLFILE)); mfp->ref = 1; - mfp->ftype = ftype; - mfp->lsn_off = lsn_offset; + mfp->ftype = finfop->ftype; + mfp->lsn_off = finfop->lsn_offset; + mfp->clear_len = finfop->clear_len; /* * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget, @@ -320,7 +332,7 @@ __memp_mf_open(dbmp, path, * it away. */ mfp->stat.st_pagesize = pagesize; - mfp->last_pgno = last_pgno; + mfp->orig_last_pgno = mfp->last_pgno = last_pgno; F_SET(mfp, MP_CAN_MMAP); if (ISTEMPORARY) @@ -336,19 +348,19 @@ __memp_mf_open(dbmp, path, if ((ret = __memp_ralloc(dbmp, DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0) goto err; - memcpy(p, fileid, DB_FILE_ID_LEN); + memcpy(p, finfop->fileid, DB_FILE_ID_LEN); } /* Copy the page cookie into shared memory. */ - if (pgcookie == NULL || pgcookie->size == 0) { + if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) { mfp->pgcookie_len = 0; mfp->pgcookie_off = 0; } else { if ((ret = __memp_ralloc(dbmp, - pgcookie->size, &mfp->pgcookie_off, &p)) != 0) + finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0) goto err; - memcpy(p, pgcookie->data, pgcookie->size); - mfp->pgcookie_len = pgcookie->size; + memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size); + mfp->pgcookie_len = finfop->pgcookie->size; } /* Prepend the MPOOLFILE to the list of MPOOLFILE's. */ @@ -397,7 +409,7 @@ memp_fclose(dbmfp) /* Discard any mmap information. */ if (dbmfp->addr != NULL && - (ret = __db_unmap(dbmfp->addr, dbmfp->len)) != 0) + (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0) __db_err(dbmp->dbenv, "%s: %s", __memp_fn(dbmfp), strerror(ret)); @@ -480,13 +492,13 @@ __memp_mf_close(dbmp, dbmfp) SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile); /* Free the space. */ - __db_shalloc_free(dbmp->addr, mfp); if (mfp->path_off != 0) __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off)); if (mfp->fileid_off != 0) __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off)); if (mfp->pgcookie_off != 0) __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off)); + __db_shalloc_free(dbmp->addr, mfp); ret1: UNLOCKREGION(dbmp); return (0); diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c index 335ee9ff16..5675493137 100644 --- a/db2/mp/mp_fput.c +++ b/db2/mp/mp_fput.c @@ -1,20 +1,19 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fput.c 10.17 (Sleepycat) 12/20/97"; +static const char sccsid[] = "@(#)mp_fput.c 10.22 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include #include -#include #endif #include "db_int.h" @@ -31,12 +30,11 @@ int memp_fput(dbmfp, pgaddr, flags) DB_MPOOLFILE *dbmfp; void *pgaddr; - int flags; + u_int32_t flags; { BH *bhp; DB_MPOOL *dbmp; MPOOL *mp; - MPOOLFILE *mfp; int wrote, ret; dbmp = dbmfp->dbmp; @@ -71,8 +69,9 @@ memp_fput(dbmfp, pgaddr, flags) /* * If we're mapping the file, there's nothing to do. Because we can - * quit mapping at any time, we have to check on each buffer to see - * if it's in the map region. + * stop mapping the file at any time, we have to check on each buffer + * to see if the address we gave the application was part of the map + * region. */ if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr && (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len) @@ -98,36 +97,33 @@ memp_fput(dbmfp, pgaddr, flags) F_SET(bhp, BH_DISCARD); /* - * If more than one reference to the page, we're done. Ignore discard - * flags (for now) and leave it at its position in the LRU chain. The - * rest gets done at last reference close. + * Check for a reference count going to zero. This can happen if the + * application returns a page twice. */ -#ifdef DEBUG if (bhp->ref == 0) { - __db_err(dbmp->dbenv, - "Unpinned page returned: reference count on page %lu went negative.", - (u_long)bhp->pgno); - abort(); + __db_err(dbmp->dbenv, "%s: page %lu: unpinned page returned", + __memp_fn(dbmfp), (u_long)bhp->pgno); + UNLOCKREGION(dbmp); + return (EINVAL); } -#endif + + /* + * If more than one reference to the page, we're done. Ignore the + * discard flags (for now) and leave it at its position in the LRU + * chain. The rest gets done at last reference close. + */ if (--bhp->ref > 0) { UNLOCKREGION(dbmp); return (0); } - /* Move the buffer to the head/tail of the LRU chain. */ - SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh); - if (F_ISSET(bhp, BH_DISCARD)) - SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh); - else - SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q); - /* - * If this buffer is scheduled for writing because of a checkpoint, - * write it now. If we can't write it, set a flag so that the next - * time the memp_sync function is called we try writing it there, - * as the checkpoint application better be able to write all of the - * files. + * If this buffer is scheduled for writing because of a checkpoint, we + * need to write it (if we marked it dirty), or update the checkpoint + * counters (if we didn't mark it dirty). If we try to write it and + * can't, that's not necessarily an error, but set a flag so that the + * next time the memp_sync function runs we try writing it there, as + * the checkpoint application better be able to write all of the files. */ if (F_ISSET(bhp, BH_WRITE)) if (F_ISSET(bhp, BH_DIRTY)) { @@ -137,12 +133,18 @@ memp_fput(dbmfp, pgaddr, flags) } else { F_CLR(bhp, BH_WRITE); - mfp = R_ADDR(dbmp, bhp->mf_offset); - --mfp->lsn_cnt; - + --dbmfp->mfp->lsn_cnt; --mp->lsn_cnt; } + /* Move the buffer to the head/tail of the LRU chain. */ + SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh); + if (F_ISSET(bhp, BH_DISCARD)) + SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh); + else + SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q); + + UNLOCKREGION(dbmp); return (0); } diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c index 2eff7dd74c..3b352aa553 100644 --- a/db2/mp/mp_fset.c +++ b/db2/mp/mp_fset.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fset.c 10.12 (Sleepycat) 11/26/97"; +static const char sccsid[] = "@(#)mp_fset.c 10.15 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -30,16 +30,14 @@ int memp_fset(dbmfp, pgaddr, flags) DB_MPOOLFILE *dbmfp; void *pgaddr; - int flags; + u_int32_t flags; { BH *bhp; DB_MPOOL *dbmp; MPOOL *mp; - MPOOLFILE *mfp; int ret; dbmp = dbmfp->dbmp; - mfp = dbmfp->mfp; mp = dbmp->mp; /* Validate arguments. */ diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c index ca81f8d6d6..fc985bc521 100644 --- a/db2/mp/mp_open.c +++ b/db2/mp/mp_open.c @@ -1,23 +1,20 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_open.c 10.16 (Sleepycat) 11/28/97"; +static const char sccsid[] = "@(#)mp_open.c 10.23 (Sleepycat) 5/3/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include #include -#include -#include #include -#include #endif #include "db_int.h" @@ -33,13 +30,14 @@ static const char sccsid[] = "@(#)mp_open.c 10.16 (Sleepycat) 11/28/97"; int memp_open(path, flags, mode, dbenv, retp) const char *path; - int flags, mode; + u_int32_t flags; + int mode; DB_ENV *dbenv; DB_MPOOL **retp; { DB_MPOOL *dbmp; size_t cachesize; - int ret; + int is_private, ret; /* Validate arguments. */ #ifdef HAVE_SPINLOCKS @@ -62,15 +60,16 @@ memp_open(path, flags, mode, dbenv, retp) dbmp->dbenv = dbenv; /* Decide if it's possible for anyone else to access the pool. */ - if ((dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE)) - F_SET(dbmp, MP_ISPRIVATE); + is_private = + (dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE); /* * Map in the region. We do locking regardless, as portions of it are * implemented in common code (if we put the region in a file, that is). */ F_SET(dbmp, MP_LOCKREGION); - if ((ret = __memp_ropen(dbmp, path, cachesize, mode, flags)) != 0) + if ((ret = __memp_ropen(dbmp, + path, cachesize, mode, is_private, LF_ISSET(DB_CREATE))) != 0) goto err; F_CLR(dbmp, MP_LOCKREGION); @@ -79,7 +78,7 @@ memp_open(path, flags, mode, dbenv, retp) * If it's threaded, then we have to lock both the handles and the * region, and we need to allocate a mutex for that purpose. */ - if (!F_ISSET(dbmp, MP_ISPRIVATE)) + if (!is_private) F_SET(dbmp, MP_LOCKREGION); if (LF_ISSET(DB_THREAD)) { F_SET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION); @@ -135,10 +134,11 @@ memp_close(dbmp) } /* Close the region. */ - if ((t_ret = __memp_rclose(dbmp)) && ret == 0) + if ((t_ret = __db_rdetach(&dbmp->reginfo)) != 0 && ret == 0) ret = t_ret; - /* Discard the structure. */ + if (dbmp->reginfo.path != NULL) + FREES(dbmp->reginfo.path); FREE(dbmp, sizeof(DB_MPOOL)); return (ret); @@ -154,8 +154,19 @@ memp_unlink(path, force, dbenv) int force; DB_ENV *dbenv; { - return (__db_runlink(dbenv, - DB_APP_NONE, path, DB_DEFAULT_MPOOL_FILE, force)); + REGINFO reginfo; + int ret; + + memset(®info, 0, sizeof(reginfo)); + reginfo.dbenv = dbenv; + reginfo.appname = DB_APP_NONE; + if (path != NULL && (reginfo.path = __db_strdup(path)) == NULL) + return (ENOMEM); + reginfo.file = DB_DEFAULT_MPOOL_FILE; + ret = __db_runlink(®info, force); + if (reginfo.path != NULL) + FREES(reginfo.path); + return (ret); } /* diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c index 13a6c62d35..e83e0f44fa 100644 --- a/db2/mp/mp_pr.c +++ b/db2/mp/mp_pr.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_pr.c 10.21 (Sleepycat) 1/6/98"; +static const char sccsid[] = "@(#)mp_pr.c 10.26 (Sleepycat) 5/23/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -15,20 +15,20 @@ static const char sccsid[] = "@(#)mp_pr.c 10.21 (Sleepycat) 1/6/98"; #include #include -#include #include #include #endif #include "db_int.h" +#include "db_page.h" #include "shqueue.h" #include "db_shash.h" #include "mp.h" +#include "db_auto.h" +#include "db_ext.h" +#include "common_ext.h" -static void __memp_pbh __P((FILE *, DB_MPOOL *, BH *, int)); -static void __memp_pdbmf __P((FILE *, DB_MPOOLFILE *, int)); -static void __memp_pmf __P((FILE *, MPOOLFILE *, int)); -static void __memp_pmp __P((FILE *, DB_MPOOL *, MPOOL *, int)); +static void __memp_pbh __P((DB_MPOOL *, BH *, size_t *, FILE *)); /* * memp_stat -- @@ -64,6 +64,8 @@ memp_stat(dbmp, gspp, fspp, db_malloc) dbmp->mp->rlayout.lock.mutex_set_wait; (*gspp)->st_region_nowait = dbmp->mp->rlayout.lock.mutex_set_nowait; + (*gspp)->st_refcnt = dbmp->mp->rlayout.refcnt; + (*gspp)->st_regsize = dbmp->mp->rlayout.size; UNLOCKREGION(dbmp); } @@ -77,7 +79,8 @@ memp_stat(dbmp, gspp, fspp, db_malloc) for (len = 0, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); mfp != NULL; - ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)); + ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) + ; UNLOCKREGION(dbmp); @@ -148,174 +151,118 @@ __memp_fns(dbmp, mfp) return ((char *)R_ADDR(dbmp, mfp->path_off)); } +#define FMAP_ENTRIES 200 /* Files we map. */ + +#define MPOOL_DUMP_HASH 0x01 /* Debug hash chains. */ +#define MPOOL_DUMP_LRU 0x02 /* Debug LRU chains. */ +#define MPOOL_DUMP_MEM 0x04 /* Debug region memory. */ +#define MPOOL_DUMP_ALL 0x07 /* Debug all. */ + + /* - * __memp_debug -- + * __memp_dump_region -- * Display MPOOL structures. * - * PUBLIC: void __memp_debug __P((DB_MPOOL *, FILE *, int)); + * PUBLIC: void __memp_dump_region __P((DB_MPOOL *, char *, FILE *)); */ void -__memp_debug(dbmp, fp, data) +__memp_dump_region(dbmp, area, fp) DB_MPOOL *dbmp; + char *area; FILE *fp; - int data; { + BH *bhp; + DB_HASHTAB *htabp; DB_MPOOLFILE *dbmfp; - u_long cnt; + MPOOL *mp; + MPOOLFILE *mfp; + size_t bucket, fmap[FMAP_ENTRIES + 1]; + u_int32_t flags; + int cnt; /* Make it easy to call from the debugger. */ if (fp == NULL) fp = stderr; - /* Welcome message. */ - (void)fprintf(fp, "%s\nMpool per-process (%lu) statistics\n", - DB_LINE, (u_long)getpid()); - - if (data) - (void)fprintf(fp, " fd: %d; addr %lx; maddr %lx\n", - dbmp->fd, (u_long)dbmp->addr, (u_long)dbmp->maddr); - - /* Display the DB_MPOOLFILE structures. */ - for (cnt = 0, dbmfp = TAILQ_FIRST(&dbmp->dbmfq); - dbmfp != NULL; ++cnt, dbmfp = TAILQ_NEXT(dbmfp, q)); - (void)fprintf(fp, "%lu process-local files\n", cnt); - for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq); - dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) { - (void)fprintf(fp, "%s\n", __memp_fn(dbmfp)); - __memp_pdbmf(fp, dbmfp, data); - } + for (flags = 0; *area != '\0'; ++area) + switch (*area) { + case 'A': + LF_SET(MPOOL_DUMP_ALL); + break; + case 'h': + LF_SET(MPOOL_DUMP_HASH); + break; + case 'l': + LF_SET(MPOOL_DUMP_LRU); + break; + case 'm': + LF_SET(MPOOL_DUMP_MEM); + break; + } - /* Switch to global statistics. */ - (void)fprintf(fp, "\n%s\nMpool statistics\n", DB_LINE); + LOCKREGION(dbmp); - /* Display the MPOOL structure. */ - __memp_pmp(fp, dbmp, dbmp->mp, data); + mp = dbmp->mp; - /* Flush in case we're debugging. */ - (void)fflush(fp); -} - -/* - * __memp_pdbmf -- - * Display a DB_MPOOLFILE structure. - */ -static void -__memp_pdbmf(fp, dbmfp, data) - FILE *fp; - DB_MPOOLFILE *dbmfp; - int data; -{ - if (!data) - return; - - (void)fprintf(fp, " fd: %d; %s\n", - dbmfp->fd, F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write"); -} - -/* - * __memp_pmp -- - * Display the MPOOL structure. - */ -static void -__memp_pmp(fp, dbmp, mp, data) - FILE *fp; - DB_MPOOL *dbmp; - MPOOL *mp; - int data; -{ - BH *bhp; - MPOOLFILE *mfp; - DB_HASHTAB *htabp; - size_t bucket; - int cnt; - const char *sep; - - (void)fprintf(fp, "references: %lu; cachesize: %lu\n", - (u_long)mp->rlayout.refcnt, (u_long)mp->stat.st_cachesize); - (void)fprintf(fp, - " %lu pages created\n", (u_long)mp->stat.st_page_create); - (void)fprintf(fp, - " %lu mmap pages returned\n", (u_long)mp->stat.st_map); - (void)fprintf(fp, " %lu I/O's (%lu read, %lu written)\n", - (u_long)mp->stat.st_page_in + mp->stat.st_page_out, - (u_long)mp->stat.st_page_in, (u_long)mp->stat.st_page_out); - if (mp->stat.st_cache_hit + mp->stat.st_cache_miss != 0) - (void)fprintf(fp, - " %.0f%% cache hit rate (%lu hit, %lu miss)\n", - ((double)mp->stat.st_cache_hit / - (mp->stat.st_cache_hit + mp->stat.st_cache_miss)) * 100, - (u_long)mp->stat.st_cache_hit, - (u_long)mp->stat.st_cache_miss); + /* Display MPOOL structures. */ + (void)fprintf(fp, "%s\nPool (region addr 0x%lx, alloc addr 0x%lx)\n", + DB_LINE, (u_long)dbmp->reginfo.addr, (u_long)dbmp->addr); /* Display the MPOOLFILE structures. */ - for (cnt = 0, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); - mfp != NULL; ++cnt, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)); - (void)fprintf(fp, "%d total files\n", cnt); - for (cnt = 1, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); - mfp != NULL; ++cnt, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { - (void)fprintf(fp, "file %d\n", cnt); - __memp_pmf(fp, mfp, data); + cnt = 0; + for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); + mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile), ++cnt) { + (void)fprintf(fp, "file #%d: %s: %lu references: %s\n", + cnt + 1, __memp_fns(dbmp, mfp), (u_long)mfp->ref, + F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write"); + if (cnt < FMAP_ENTRIES) + fmap[cnt] = R_OFFSET(dbmp, mfp); } - if (!data) - return; + for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq); + dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q), ++cnt) { + (void)fprintf(fp, "file #%d: %s: fd: %d: per-process, %s\n", + cnt + 1, __memp_fn(dbmfp), dbmfp->fd, + F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write"); + if (cnt < FMAP_ENTRIES) + fmap[cnt] = R_OFFSET(dbmp, mfp); + } + if (cnt < FMAP_ENTRIES) + fmap[cnt] = INVALID; + else + fmap[FMAP_ENTRIES] = INVALID; /* Display the hash table list of BH's. */ - (void)fprintf(fp, "%s\nHASH table of BH's (%lu buckets):\n", - DB_LINE, (u_long)mp->htab_buckets); - (void)fprintf(fp, - "longest chain searched %lu\n", (u_long)mp->stat.st_hash_longest); - (void)fprintf(fp, "average chain searched %lu (total/calls: %lu/%lu)\n", - (u_long)mp->stat.st_hash_examined / - (mp->stat.st_hash_searches ? mp->stat.st_hash_searches : 1), - (u_long)mp->stat.st_hash_examined, - (u_long)mp->stat.st_hash_searches); - for (htabp = dbmp->htab, - bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) { - if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL) - (void)fprintf(fp, "%lu:\n", (u_long)bucket); - for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) - __memp_pbh(fp, dbmp, bhp, data); + if (LF_ISSET(MPOOL_DUMP_HASH)) { + (void)fprintf(fp, + "%s\nBH hash table (%lu hash slots)\npageno, file, ref, address\n", + DB_LINE, (u_long)mp->htab_buckets); + for (htabp = dbmp->htab, + bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) { + if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL) + (void)fprintf(fp, "%lu:\n", (u_long)bucket); + for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) + __memp_pbh(dbmp, bhp, fmap, fp); + } } /* Display the LRU list of BH's. */ - (void)fprintf(fp, "LRU list of BH's (pgno/offset):"); - for (sep = "\n ", bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh); - bhp != NULL; sep = ", ", bhp = SH_TAILQ_NEXT(bhp, q, __bh)) - (void)fprintf(fp, "%s%lu/%lu", sep, - (u_long)bhp->pgno, (u_long)R_OFFSET(dbmp, bhp)); - (void)fprintf(fp, "\n"); -} + if (LF_ISSET(MPOOL_DUMP_LRU)) { + (void)fprintf(fp, "%s\nBH LRU list\n", DB_LINE); + (void)fprintf(fp, "pageno, file, ref, address\n"); + for (bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh); + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) + __memp_pbh(dbmp, bhp, fmap, fp); + } -/* - * __memp_pmf -- - * Display an MPOOLFILE structure. - */ -static void -__memp_pmf(fp, mfp, data) - FILE *fp; - MPOOLFILE *mfp; - int data; -{ - (void)fprintf(fp, " %lu pages created\n", - (u_long)mfp->stat.st_page_create); - (void)fprintf(fp, " %lu I/O's (%lu read, %lu written)\n", - (u_long)mfp->stat.st_page_in + mfp->stat.st_page_out, - (u_long)mfp->stat.st_page_in, (u_long)mfp->stat.st_page_out); - if (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss != 0) - (void)fprintf(fp, - " %.0f%% cache hit rate (%lu hit, %lu miss)\n", - ((double)mfp->stat.st_cache_hit / - (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss)) * 100, - (u_long)mfp->stat.st_cache_hit, - (u_long)mfp->stat.st_cache_miss); - if (!data) - return; - - (void)fprintf(fp, " %d references; %s; pagesize: %lu\n", mfp->ref, - F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write", - (u_long)mfp->stat.st_pagesize); + if (LF_ISSET(MPOOL_DUMP_MEM)) + __db_shalloc_dump(dbmp->addr, fp); + + UNLOCKREGION(dbmp); + + /* Flush in case we're debugging. */ + (void)fflush(fp); } /* @@ -323,28 +270,37 @@ __memp_pmf(fp, mfp, data) * Display a BH structure. */ static void -__memp_pbh(fp, dbmp, bhp, data) - FILE *fp; +__memp_pbh(dbmp, bhp, fmap, fp) DB_MPOOL *dbmp; BH *bhp; - int data; + size_t *fmap; + FILE *fp; { - const char *sep; - - if (!data) - return; - - (void)fprintf(fp, " BH @ %lu (mf: %lu): page %lu; ref %lu", - (u_long)R_OFFSET(dbmp, bhp), - (u_long)bhp->mf_offset, (u_long)bhp->pgno, (u_long)bhp->ref); - sep = "; "; - if (F_ISSET(bhp, BH_DIRTY)) { - (void)fprintf(fp, "%sdirty", sep); - sep = ", "; - } - if (F_ISSET(bhp, BH_WRITE)) { - (void)fprintf(fp, "%schk_write", sep); - sep = ", "; - } + static const FN fn[] = { + { BH_CALLPGIN, "callpgin" }, + { BH_DIRTY, "dirty" }, + { BH_DISCARD, "discard" }, + { BH_LOCKED, "locked" }, + { BH_TRASH, "trash" }, + { BH_WRITE, "write" }, + { 0 }, + }; + int i; + + for (i = 0; i < FMAP_ENTRIES; ++i) + if (fmap[i] == INVALID || fmap[i] == bhp->mf_offset) + break; + + if (fmap[i] == INVALID) + (void)fprintf(fp, " %4lu, %lu, %2lu, %lu", + (u_long)bhp->pgno, (u_long)bhp->mf_offset, + (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp)); + else + (void)fprintf(fp, " %4lu, #%d, %2lu, %lu", + (u_long)bhp->pgno, i + 1, + (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp)); + + __db_prflags(bhp->flags, fn, fp); + (void)fprintf(fp, "\n"); } diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c index c20e669749..6b92fbdad4 100644 --- a/db2/mp/mp_region.c +++ b/db2/mp/mp_region.c @@ -1,24 +1,20 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_region.c 10.18 (Sleepycat) 11/29/97"; +static const char sccsid[] = "@(#)mp_region.c 10.30 (Sleepycat) 5/31/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include -#include #include -#include -#include #include -#include #endif #include "db_int.h" @@ -86,7 +82,7 @@ alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) { /* * Retry as soon as we've freed up sufficient space. If we - * have to coalesce of memory to satisfy the request, don't + * will have to coalesce memory to satisfy the request, don't * try until it's likely (possible?) that we'll succeed. */ total += fsize = __db_shsizeof(bhp); @@ -179,18 +175,19 @@ retry: /* Find a buffer we can flush; pure LRU. */ * Attach to, and optionally create, the mpool region. * * PUBLIC: int __memp_ropen - * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int)); + * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t)); */ int -__memp_ropen(dbmp, path, cachesize, mode, flags) +__memp_ropen(dbmp, path, cachesize, mode, is_private, flags) DB_MPOOL *dbmp; const char *path; size_t cachesize; - int mode, flags; + int mode, is_private; + u_int32_t flags; { MPOOL *mp; size_t rlen; - int fd, newregion, ret, retry_cnt; + int defcache, ret; /* * Unlike other DB subsystems, mpool can't simply grow the region @@ -204,155 +201,107 @@ __memp_ropen(dbmp, path, cachesize, mode, flags) * * Up the user's cachesize by 25% to account for our overhead. */ + defcache = 0; if (cachesize < DB_CACHESIZE_MIN) - if (cachesize == 0) + if (cachesize == 0) { + defcache = 1; cachesize = DB_CACHESIZE_DEF; - else + } else cachesize = DB_CACHESIZE_MIN; rlen = cachesize + cachesize / 4; - /* Map in the region. */ - retry_cnt = newregion = 0; -retry: if (LF_ISSET(DB_CREATE)) { - /* - * If it's a private mpool, use malloc, it's a lot faster than - * instantiating a region. - * - * XXX - * If we're doing locking and don't have spinlocks for this - * architecture, we'd have to instantiate the file, we need - * the file descriptor for locking. However, it should not - * be possible for DB_THREAD to be set if HAVE_SPINLOCKS aren't - * defined. - * - * XXX - * HP-UX won't permit mutexes to live in anything but shared - * memory. So, instantiate the shared mpool region file on - * that architecture, regardless. If this turns out to be a - * performance problem, we could use anonymous memory instead. - */ -#if !defined(__hppa) - if (F_ISSET(dbmp, MP_ISPRIVATE)) - if ((dbmp->maddr = __db_malloc(rlen)) == NULL) - ret = ENOMEM; - else { - F_SET(dbmp, MP_MALLOC); - ret = __db_rinit(dbmp->dbenv, - dbmp->maddr, 0, rlen, 0); - } - else -#endif - ret = __db_rcreate(dbmp->dbenv, DB_APP_NONE, path, - DB_DEFAULT_MPOOL_FILE, mode, rlen, - F_ISSET(dbmp, MP_ISPRIVATE) ? DB_TEMPORARY : 0, - &fd, &dbmp->maddr); - if (ret == 0) { - /* Put the MPOOL structure first in the region. */ - mp = dbmp->maddr; - - SH_TAILQ_INIT(&mp->bhq); - SH_TAILQ_INIT(&mp->bhfq); - SH_TAILQ_INIT(&mp->mpfq); - - /* Initialize the rest of the region as free space. */ - dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL); - __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL)); - - /* - * - * Pretend that the cache will be broken up into 4K - * pages, and that we want to keep it under, say, 10 - * pages on each chain. This means a 256MB cache will - * allocate ~6500 offset pairs. - */ - mp->htab_buckets = - __db_tablesize((cachesize / (4 * 1024)) / 10); + /* + * Map in the region. + * + * If it's a private mpool, use malloc, it's a lot faster than + * instantiating a region. + */ + dbmp->reginfo.dbenv = dbmp->dbenv; + dbmp->reginfo.appname = DB_APP_NONE; + if (path == NULL) + dbmp->reginfo.path = NULL; + else + if ((dbmp->reginfo.path = __db_strdup(path)) == NULL) + return (ENOMEM); + dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE; + dbmp->reginfo.mode = mode; + dbmp->reginfo.size = rlen; + dbmp->reginfo.dbflags = flags; + dbmp->reginfo.flags = 0; + if (defcache) + F_SET(&dbmp->reginfo, REGION_SIZEDEF); - /* Allocate hash table space and initialize it. */ - if ((ret = __db_shalloc(dbmp->addr, - mp->htab_buckets * sizeof(DB_HASHTAB), - 0, &dbmp->htab)) != 0) - goto err; - __db_hashinit(dbmp->htab, mp->htab_buckets); - mp->htab = R_OFFSET(dbmp, dbmp->htab); + /* + * If we're creating a temporary region, don't use any standard + * naming. + */ + if (is_private) { + dbmp->reginfo.appname = DB_APP_TMP; + dbmp->reginfo.file = NULL; + F_SET(&dbmp->reginfo, REGION_PRIVATE); + } - ZERO_LSN(mp->lsn); - mp->lsn_cnt = 0; + if ((ret = __db_rattach(&dbmp->reginfo)) != 0) { + if (dbmp->reginfo.path != NULL) + FREES(dbmp->reginfo.path); + return (ret); + } - memset(&mp->stat, 0, sizeof(mp->stat)); - mp->stat.st_cachesize = cachesize; + /* + * The MPOOL structure is first in the region, the rest of the region + * is free space. + */ + dbmp->mp = dbmp->reginfo.addr; + dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL); - mp->flags = 0; + /* Initialize a created region. */ + if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) { + mp = dbmp->mp; + SH_TAILQ_INIT(&mp->bhq); + SH_TAILQ_INIT(&mp->bhfq); + SH_TAILQ_INIT(&mp->mpfq); - newregion = 1; - } else if (ret != EEXIST) - return (ret); - } + __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL)); - /* If we didn't or couldn't create the region, try and join it. */ - if (!newregion && - (ret = __db_ropen(dbmp->dbenv, DB_APP_NONE, - path, DB_DEFAULT_MPOOL_FILE, 0, &fd, &dbmp->maddr)) != 0) { /* - * If we failed because the file wasn't available, wait a - * second and try again. + * Assume we want to keep the hash chains with under 10 pages + * on each chain. We don't know the pagesize in advance, and + * it may differ for different files. Use a pagesize of 1K for + * the calculation -- we walk these chains a lot, they should + * be short. */ - if (ret == EAGAIN && ++retry_cnt < 3) { - (void)__db_sleep(1, 0); - goto retry; - } - return (ret); - } + mp->htab_buckets = + __db_tablesize((cachesize / (1 * 1024)) / 10); - /* Set up the common pointers. */ - dbmp->mp = dbmp->maddr; - dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL); + /* Allocate hash table space and initialize it. */ + if ((ret = __db_shalloc(dbmp->addr, + mp->htab_buckets * sizeof(DB_HASHTAB), + 0, &dbmp->htab)) != 0) + goto err; + __db_hashinit(dbmp->htab, mp->htab_buckets); + mp->htab = R_OFFSET(dbmp, dbmp->htab); - /* - * If not already locked, lock the region -- if it's a new region, - * then either __db_rcreate() locked it for us or we malloc'd it - * instead of creating a region, neither of which requires locking - * here. - */ - if (!newregion) - LOCKREGION(dbmp); + ZERO_LSN(mp->lsn); + mp->lsn_cnt = 0; - /* - * Get the hash table address; it's on the shared page, so we have - * to lock first. - */ - dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab); + memset(&mp->stat, 0, sizeof(mp->stat)); + mp->stat.st_cachesize = cachesize; - dbmp->fd = fd; + mp->flags = 0; + } - /* If we locked the region, release it now. */ - if (!F_ISSET(dbmp, MP_MALLOC)) - UNLOCKREGION(dbmp); - return (0); + /* Get the local hash table address. */ + dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab); -err: if (fd != -1) { - dbmp->fd = fd; - (void)__memp_rclose(dbmp); - } + UNLOCKREGION(dbmp); + return (0); - if (newregion) +err: UNLOCKREGION(dbmp); + (void)__db_rdetach(&dbmp->reginfo); + if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) (void)memp_unlink(path, 1, dbmp->dbenv); - return (ret); -} -/* - * __memp_rclose -- - * Close the mpool region. - * - * PUBLIC: int __memp_rclose __P((DB_MPOOL *)); - */ -int -__memp_rclose(dbmp) - DB_MPOOL *dbmp; -{ - if (F_ISSET(dbmp, MP_MALLOC)) { - __db_free(dbmp->maddr); - return (0); - } - return (__db_rclose(dbmp->dbenv, dbmp->fd, dbmp->maddr)); + if (dbmp->reginfo.path != NULL) + FREES(dbmp->reginfo.path); + return (ret); } diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c index 6d16cf3cd4..33218eef1a 100644 --- a/db2/mp/mp_sync.c +++ b/db2/mp/mp_sync.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997 + * Copyright (c) 1996, 1997, 1998 * Sleepycat Software. All rights reserved. */ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_sync.c 10.19 (Sleepycat) 12/3/97"; +static const char sccsid[] = "@(#)mp_sync.c 10.25 (Sleepycat) 4/26/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -15,7 +15,6 @@ static const char sccsid[] = "@(#)mp_sync.c 10.19 (Sleepycat) 12/3/97"; #include #include -#include #endif #include "db_int.h" @@ -25,6 +24,7 @@ static const char sccsid[] = "@(#)mp_sync.c 10.19 (Sleepycat) 12/3/97"; #include "common_ext.h" static int __bhcmp __P((const void *, const void *)); +static int __memp_fsync __P((DB_MPOOLFILE *)); /* * memp_sync -- @@ -145,7 +145,8 @@ memp_sync(dbmp, lsnp) bharray[ar_cnt++] = bhp; } } else - F_CLR(bhp, BH_WRITE); + if (F_ISSET(bhp, BH_WRITE)) + F_CLR(bhp, BH_WRITE); /* If there no buffers we can write immediately, we're done. */ if (ar_cnt == 0) { @@ -235,10 +236,8 @@ int memp_fsync(dbmfp) DB_MPOOLFILE *dbmfp; { - BH *bhp, **bharray; DB_MPOOL *dbmp; - size_t mf_offset; - int ar_cnt, cnt, nalloc, next, pincnt, ret, wrote; + int is_tmp; dbmp = dbmfp->dbmp; @@ -250,14 +249,62 @@ memp_fsync(dbmfp) if (F_ISSET(dbmfp, MP_READONLY)) return (0); - ret = 0; LOCKREGION(dbmp); - if (F_ISSET(dbmfp->mfp, MP_TEMP)) - ret = 1; + is_tmp = F_ISSET(dbmfp->mfp, MP_TEMP); UNLOCKREGION(dbmp); - if (ret) + if (is_tmp) return (0); + return (__memp_fsync(dbmfp)); +} + +/* + * __mp_xxx_fd -- + * Return a file descriptor for DB 1.85 compatibility locking. + * + * PUBLIC: int __mp_xxx_fd __P((DB_MPOOLFILE *, int *)); + */ +int +__mp_xxx_fd(dbmfp, fdp) + DB_MPOOLFILE *dbmfp; + int *fdp; +{ + int ret; + + /* + * This is a truly spectacular layering violation, intended ONLY to + * support compatibility for the DB 1.85 DB->fd call. + * + * Sync the database file to disk, creating the file as necessary. + * + * We skip the MP_READONLY and MP_TEMP tests done by memp_fsync(3). + * The MP_READONLY test isn't interesting because we will either + * already have a file descriptor (we opened the database file for + * reading) or we aren't readonly (we created the database which + * requires write privileges). The MP_TEMP test isn't interesting + * because we want to write to the backing file regardless so that + * we get a file descriptor to return. + */ + ret = dbmfp->fd == -1 ? __memp_fsync(dbmfp) : 0; + + return ((*fdp = dbmfp->fd) == -1 ? ENOENT : ret); +} + +/* + * __memp_fsync -- + * Mpool file internal sync function. + */ +static int +__memp_fsync(dbmfp) + DB_MPOOLFILE *dbmfp; +{ + BH *bhp, **bharray; + DB_MPOOL *dbmp; + size_t mf_offset; + int ar_cnt, cnt, nalloc, next, pincnt, ret, wrote; + + ret = 0; + dbmp = dbmfp->dbmp; mf_offset = R_OFFSET(dbmp, dbmfp->mfp); /* @@ -359,7 +406,6 @@ err: UNLOCKREGION(dbmp); if (ret == 0) return (pincnt == 0 ? __db_fsync(dbmfp->fd) : DB_INCOMPLETE); return (ret); - } /* @@ -453,8 +499,8 @@ __bhcmp(p1, p2) { BH *bhp1, *bhp2; - bhp1 = *(BH **)p1; - bhp2 = *(BH **)p2; + bhp1 = *(BH * const *)p1; + bhp2 = *(BH * const *)p2; /* Sort by file (shared memory pool offset). */ if (bhp1->mf_offset < bhp2->mf_offset) -- cgit 1.4.1