diff options
Diffstat (limited to 'db2/common/db_region.c')
-rw-r--r-- | db2/common/db_region.c | 565 |
1 files changed, 565 insertions, 0 deletions
diff --git a/db2/common/db_region.c b/db2/common/db_region.c new file mode 100644 index 0000000000..51f8f4465c --- /dev/null +++ b/db2/common/db_region.c @@ -0,0 +1,565 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * This code is derived from software contributed to Harvard by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)db_region.c 10.12 (Sleepycat) 7/26/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#endif + +#include "db_int.h" +#include "common_ext.h" + +static int __db_rmap __P((DB_ENV *, int, size_t, void *)); + +/* + * __db_rcreate -- + * + * Common interface for creating a shared region. Handles synchronization + * across multiple processes. + * + * The dbenv contains the environment for this process, including naming + * information. The path argument represents the parameters passed to + * the open routines and may be either a file or a directory. If it is + * a directory, it must exist. If it is a file, then the file parameter + * must be NULL, otherwise, file is the name to be created inside the + * directory path. + * + * The function returns a pointer to the shared region that has been mapped + * into memory, NULL on error. + * + * PUBLIC: int __db_rcreate __P((DB_ENV *, APPNAME, + * PUBLIC: const char *, const char *, int, size_t, int *, void *)); + */ +int +__db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp) + DB_ENV *dbenv; + APPNAME appname; + const char *path, *file; + int mode, *fdp; + size_t size; + void *retp; +{ + RLAYOUT *rp; + int fd, ret; + char *name; + + fd = -1; + rp = NULL; + + /* + * Get the filename -- note, if it's a temporary file, it will + * be created by the underlying temporary file creation code, + * so we have to check the file descriptor to be sure it's an + * error. + */ + if ((ret = __db_appname(dbenv, appname, path, file, &fd, &name)) != 0) + return (ret); + + /* + * Now open the file. We need to make sure that multiple processes + * that attempt to create the region at the same time are properly + * ordered, so we open it O_EXCL and O_CREAT so two simultaneous + * attempts to create the region will return failure in one of the + * attempts. + */ + if (fd == -1 && (ret = __db_fdopen(name, + DB_CREATE | DB_EXCL, DB_CREATE | DB_EXCL, mode, &fd)) != 0) { + if (ret != EEXIST) + __db_err(dbenv, + "region create: %s: %s", name, strerror(ret)); + goto err; + } + *fdp = fd; + + /* Grow the region to the correct size. */ + if ((ret = __db_rgrow(dbenv, fd, size)) != 0) + goto err; + + /* Map the region in. */ + if ((ret = __db_rmap(dbenv, fd, size, &rp)) != 0) + goto err; + + /* + * Initialize the common information. + * + * !!! + * We have to order the region creates so that two processes don't try + * to simultaneously create the region and so that processes that are + * joining the region never see inconsistent data. We'd like to play + * file permissions games, but we can't because WNT filesystems won't + * open a file mode 0. + * + * So, the process that's creating the region always acquires the lock + * before the setting the version number. Any process joining always + * checks the version number before attempting to acquire the lock. + * + * We have to check the version number first, because if the version + * number has not been written, it's possible that the mutex has not + * been initialized in which case an attempt to get it could lead to + * random behavior. If the version number isn't there (the file size + * is too small) or it's 0, we know that the region is being created. + */ + (void)__db_mutex_init(&rp->lock, MUTEX_LOCK_OFFSET(rp, &rp->lock)); + (void)__db_mutex_lock(&rp->lock, + fd, dbenv == NULL ? NULL : dbenv->db_yield); + + rp->refcnt = 1; + rp->size = size; + rp->flags = 0; + db_version(&rp->majver, &rp->minver, &rp->patch); + + if (name != NULL) + FREES(name); + + *(void **)retp = rp; + return (0); + +err: if (fd != -1) { + if (rp != NULL) + (void)__db_munmap(rp, rp->size); + (void)__db_unlink(name); + (void)__db_close(fd); + } + if (name != NULL) + FREES(name); + return (ret); +} + +/* + * __db_ropen -- + * Construct the name of a file, open it and map it in. + * + * PUBLIC: int __db_ropen __P((DB_ENV *, + * PUBLIC: APPNAME, const char *, const char *, int, int *, void *)); + */ +int +__db_ropen(dbenv, appname, path, file, flags, fdp, retp) + DB_ENV *dbenv; + APPNAME appname; + const char *path, *file; + int flags, *fdp; + void *retp; +{ + RLAYOUT *rp; + off_t size1, size2; + int fd, ret; + char *name; + + fd = -1; + rp = NULL; + + /* Get the filename. */ + if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0) + return (ret); + + /* Open the file. */ + if ((ret = __db_fdopen(name, flags, DB_MUTEXDEBUG, 0, &fd)) != 0) { + __db_err(dbenv, "region open: %s: %s", name, strerror(ret)); + goto err2; + } + + *fdp = fd; + + /* + * Map the file in. We have to do things in a strange order so that + * we don't get into a situation where the file was just created and + * isn't yet initialized. See the comment in __db_rcreate() above. + * + * XXX + * We'd like to test to see if the file is too big to mmap. Since we + * don't know what size or type off_t's or size_t's are, or the largest + * unsigned integral type is, or what random insanity the local C + * compiler will perpetrate, doing the comparison in a portable way is + * flatly impossible. Hope that mmap fails if the file is too large. + * + */ + if ((ret = __db_stat(dbenv, name, fd, &size1, NULL)) != 0) + goto err2; + + /* Check to make sure the first block has been written. */ + if ((size_t) size1 < sizeof(RLAYOUT)) { + ret = EAGAIN; + goto err2; + } + + /* Map in whatever is there. */ + if ((ret = __db_rmap(dbenv, fd, size1, &rp)) != 0) + goto err2; + + /* + * Check to make sure the region has been initialized. We can't just + * grab the lock because the lock may not have been initialized yet. + */ + if (rp->majver == 0) { + ret = EAGAIN; + goto err2; + } + + /* Get the region lock. */ + if (!LF_ISSET(DB_MUTEXDEBUG)) + (void)__db_mutex_lock(&rp->lock, + fd, dbenv == NULL ? NULL : dbenv->db_yield); + + /* + * The file may have been half-written if we were descheduled between + * getting the size of the file and checking the major version. Check + * to make sure we got the entire file. + */ + if ((ret = __db_stat(dbenv, name, fd, &size2, NULL)) != 0) + goto err1; + if (size1 != size2) { + ret = EAGAIN; + goto err1; + } + + /* The file may have just been deleted. */ + if (F_ISSET(rp, DB_R_DELETED)) { + ret = EAGAIN; + goto err1; + } + + /* Increment the reference count. */ + ++rp->refcnt; + + /* Release the lock. */ + if (!LF_ISSET(DB_MUTEXDEBUG)) + (void)__db_mutex_unlock(&rp->lock, fd); + + FREES(name); + + *(void **)retp = rp; + return (0); + +err1: if (!LF_ISSET(DB_MUTEXDEBUG)) + (void)__db_mutex_unlock(&rp->lock, fd); +err2: if (rp != NULL) + (void)__db_munmap(rp, rp->size); + if (fd != -1) + (void)__db_close(fd); + FREES(name); + return (ret); +} + +/* + * __db_rclose -- + * Close a shared memory region. + * + * PUBLIC: int __db_rclose __P((DB_ENV *, int, void *)); + */ +int +__db_rclose(dbenv, fd, ptr) + DB_ENV *dbenv; + int fd; + void *ptr; +{ + RLAYOUT *rp; + int ret, t_ret; + const char *fail; + + rp = ptr; + fail = NULL; + + /* Get the lock. */ + if ((ret = __db_mutex_lock(&rp->lock, + fd, dbenv == NULL ? NULL : dbenv->db_yield)) != 0) { + fail = "lock get"; + goto err; + } + + /* Decrement the reference count. */ + --rp->refcnt; + + /* Release the lock. */ + if ((t_ret = __db_mutex_unlock(&rp->lock, fd)) != 0 && fail == NULL) { + ret = t_ret; + fail = "lock release"; + } + + /* Discard the region. */ + if ((t_ret = __db_munmap(ptr, rp->size)) != 0 && fail == NULL) { + ret = t_ret; + fail = "munmap"; + } + + if ((t_ret = __db_close(fd)) != 0 && fail == NULL) { + ret = t_ret; + fail = "close"; + } + + if (fail == NULL) + return (0); + +err: __db_err(dbenv, "region detach: %s: %s", fail, strerror(ret)); + return (ret); +} + +/* + * __db_runlink -- + * Remove a shared memory region. + * + * PUBLIC: int __db_runlink __P((DB_ENV *, + * PUBLIC: APPNAME, const char *, const char *, int)); + */ +int +__db_runlink(dbenv, appname, path, file, force) + DB_ENV *dbenv; + APPNAME appname; + const char *path, *file; + int force; +{ + RLAYOUT *rp; + int cnt, fd, ret, t_ret; + char *name; + + rp = NULL; + + /* Get the filename. */ + if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0) + return (ret); + + /* If the file doesn't exist, we're done. */ + if (__db_exists(name, NULL)) + return (0); /* XXX: ENOENT? */ + + /* + * If we're called with a force flag, try and unlink the file. This + * may not succeed if the file is currently open, but there's nothing + * we can do about that. There is a race condition between the check + * for existence above and the actual unlink. If someone else snuck + * in and removed it before we do the remove, then we might get an + * ENOENT error. If we get the ENOENT, we treat it as success, just + * as we do above. + */ + if (force) { + if ((ret = __db_unlink(name)) != 0 && ret != ENOENT) + goto err1; + FREES(name); + return (0); + } + + /* Open and lock the region. */ + if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0) + goto err1; + (void)__db_mutex_lock(&rp->lock, + fd, dbenv == NULL ? NULL : dbenv->db_yield); + + /* If the region is currently being deleted, fail. */ + if (F_ISSET(rp, DB_R_DELETED)) { + ret = ENOENT; /* XXX: ENOENT? */ + goto err2; + } + + /* If the region is currently in use by someone else, fail. */ + if (rp->refcnt > 1) { + ret = EBUSY; + goto err2; + } + + /* Set the delete flag. */ + F_SET(rp, DB_R_DELETED); + + /* Release the lock and close the region. */ + (void)__db_mutex_unlock(&rp->lock, fd); + if ((t_ret = __db_rclose(dbenv, fd, rp)) != 0 && ret == 0) + goto err1; + + /* + * Unlink the region. There's a race here -- other threads or + * processes might be opening the region while we're trying to + * remove it. They'll fail, because we've set the DELETED flag, + * but they could still stop us from succeeding in the unlink. + */ + for (cnt = 5; cnt > 0; --cnt) { + if ((ret = __db_unlink(name)) == 0) + break; + (void)__db_sleep(0, 250000); + } + if (ret == 0) { + FREES(name); + return (0); + } + + /* Not a clue. Try to clear the DB_R_DELETED flag. */ + if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0) + goto err1; + (void)__db_mutex_lock(&rp->lock, + fd, dbenv == NULL ? NULL : dbenv->db_yield); + F_CLR(rp, DB_R_DELETED); + /* FALLTHROUGH */ + +err2: (void)__db_mutex_unlock(&rp->lock, fd); + (void)__db_rclose(dbenv, fd, rp); +err1: __db_err(dbenv, "region unlink: %s: %s", name, strerror(ret)); + FREES(name); + return (ret); +} + +/* + * DB creates all regions on 4K boundaries so that we don't make the + * underlying VM unhappy. + */ +#define __DB_VMPAGESIZE (4 * 1024) + +/* + * __db_rgrow -- + * Extend a region by a specified amount. + * + * PUBLIC: int __db_rgrow __P((DB_ENV *, int, size_t)); + */ +int +__db_rgrow(dbenv, fd, incr) + DB_ENV *dbenv; + int fd; + size_t incr; +{ +#ifdef MMAP_INIT_NEEDED + size_t i; +#endif + ssize_t nw; + int ret; + char buf[__DB_VMPAGESIZE]; + + /* Seek to the end of the region. */ + if ((ret = __db_lseek(fd, 0, 0, 0, SEEK_END)) != 0) + goto err; + + /* Write nuls to the new bytes. */ + memset(buf, 0, sizeof(buf)); + + /* + * Historically, some systems required that all of the bytes of the + * region be written before you could mmap it and access it randomly. + */ +#ifdef MMAP_INIT_NEEDED + /* Extend the region by writing each new page. */ + for (i = 0; i < incr; i += __DB_VMPAGESIZE) { + if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0) + goto err; + if (nw != sizeof(buf)) + goto eio; + } +#else + /* + * Extend the region by writing the last page. + * + * Round off the increment to the next page boundary. + */ + incr += __DB_VMPAGESIZE - 1; + incr -= incr % __DB_VMPAGESIZE; + + /* Write the last page, not the page after the last. */ + if ((ret = __db_lseek(fd, 0, 0, incr - __DB_VMPAGESIZE, SEEK_CUR)) != 0) + goto err; + if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0) + goto err; + if (nw != sizeof(buf)) + goto eio; +#endif + return (0); + +eio: ret = EIO; +err: __db_err(dbenv, "region grow: %s", strerror(ret)); + return (ret); +} + +/* + * __db_rremap -- + * Unmap the old region and map in a new region of a new size. If + * either call fails, returns NULL, else returns the address of the + * new region. + * + * PUBLIC: int __db_rremap __P((DB_ENV *, void *, size_t, size_t, int, void *)); + */ +int +__db_rremap(dbenv, ptr, oldsize, newsize, fd, retp) + DB_ENV *dbenv; + void *ptr, *retp; + size_t oldsize, newsize; + int fd; +{ + int ret; + + if ((ret = __db_munmap(ptr, oldsize)) != 0) { + __db_err(dbenv, "region remap: munmap: %s", strerror(ret)); + return (ret); + } + + return (__db_rmap(dbenv, fd, newsize, retp)); +} + +/* + * __db_rmap -- + * Attach to a shared memory region. + */ +static int +__db_rmap(dbenv, fd, size, retp) + DB_ENV *dbenv; + int fd; + size_t size; + void *retp; +{ + RLAYOUT *rp; + int ret; + + if ((ret = __db_mmap(fd, size, 0, 0, &rp)) != 0) { + __db_err(dbenv, "region map: mmap %s", strerror(ret)); + return (ret); + } + if (rp->size < size) + rp->size = size; + + *(void **)retp = rp; + return (0); +} |