summary refs log tree commit diff
path: root/db2/mp/mp_region.c
diff options
context:
space:
mode:
Diffstat (limited to 'db2/mp/mp_region.c')
-rw-r--r--db2/mp/mp_region.c340
1 files changed, 340 insertions, 0 deletions
diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c
new file mode 100644
index 0000000000..a5c52123b9
--- /dev/null
+++ b/db2/mp/mp_region.c
@@ -0,0 +1,340 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *	Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)mp_region.c	10.11 (Sleepycat) 8/2/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "mp.h"
+#include "common_ext.h"
+
+/*
+ * __memp_ralloc --
+ *	Allocate some space in the mpool region.
+ *
+ * PUBLIC: int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *));
+ */
+int
+__memp_ralloc(dbmp, len, offsetp, retp)
+	DB_MPOOL *dbmp;
+	size_t len, *offsetp;
+	void *retp;
+{
+	BH *bhp, *nbhp;
+	MPOOL *mp;
+	MPOOLFILE *mfp;
+	size_t fsize, total;
+	int nomore, restart, ret, wrote;
+	void *p;
+
+	mp = dbmp->mp;
+
+	nomore = 0;
+alloc:	if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
+		if (offsetp != NULL)
+			*offsetp = OFFSET(dbmp, p);
+		*(void **)retp = p;
+		return (0);
+	}
+	if (nomore) {
+		__db_err(dbmp->dbenv, "%s", strerror(ret));
+		return (ret);
+	}
+
+	/* Look for a buffer on the free list that's the right size. */
+	for (bhp =
+	    SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
+		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
+
+		if (__db_shsizeof(bhp) == len) {
+			SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
+			if (offsetp != NULL)
+				*offsetp = OFFSET(dbmp, bhp);
+			*(void **)retp = bhp;
+			return (0);
+		}
+	}
+
+	/* Discard from the free list until we've freed enough memory. */
+	total = 0;
+	for (bhp =
+	    SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
+		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
+
+		SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
+		__db_shalloc_free(dbmp->addr, bhp);
+
+		/*
+		 * Retry as soon as we've freed up sufficient space.  If we
+		 * have to coalesce of memory to satisfy the request, don't
+		 * try until it's likely (possible?) that we'll succeed.
+		 */
+		total += fsize = __db_shsizeof(bhp);
+		if (fsize >= len || total >= 3 * len)
+			goto alloc;
+	}
+
+retry:	/* Find a buffer we can flush; pure LRU. */
+	total = 0;
+	for (bhp =
+	    SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
+		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
+
+		/* Ignore pinned or locked (I/O in progress) buffers. */
+		if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
+			continue;
+
+		/* Find the associated MPOOLFILE. */
+		mfp = ADDR(dbmp, bhp->mf_offset);
+
+		/*
+		 * Write the page if it's dirty.
+		 *
+		 * If we wrote the page, fall through and free the buffer.  We
+		 * don't have to rewalk the list to acquire the buffer because
+		 * it was never available for any other process to modify it.
+		 * If we didn't write the page, but we discarded and reacquired
+		 * the region lock, restart the buffer list walk.  If we neither
+		 * wrote the buffer nor discarded the region lock, continue down
+		 * the buffer list.
+		 */
+		if (F_ISSET(bhp, BH_DIRTY)) {
+			if ((ret = __memp_bhwrite(dbmp,
+			    mfp, bhp, &restart, &wrote)) != 0)
+				return (ret);
+
+			/*
+			 * It's possible that another process wants this buffer
+			 * and incremented the ref count while we were writing
+			 * it.
+			 */
+			if (bhp->ref != 0)
+				goto retry;
+
+			if (wrote)
+				++mp->stat.st_rw_evict;
+			else {
+				if (restart)
+					goto retry;
+				else
+					continue;
+			}
+		} else
+			++mp->stat.st_ro_evict;
+
+		/*
+		 * Check to see if the buffer is the size we're looking for.
+		 * If it is, simply reuse it.
+		 */
+		total += fsize = __db_shsizeof(bhp);
+		if (fsize == len) {
+			__memp_bhfree(dbmp, mfp, bhp, 0);
+
+			if (offsetp != NULL)
+				*offsetp = OFFSET(dbmp, bhp);
+			*(void **)retp = bhp;
+			return (0);
+		}
+
+		/* Free the buffer. */
+		__memp_bhfree(dbmp, mfp, bhp, 1);
+
+		/*
+		 * Retry as soon as we've freed up sufficient space.  If we
+		 * have to coalesce of memory to satisfy the request, don't
+		 * try until it's likely (possible?) that we'll succeed.
+		 */
+		if (fsize >= len || total >= 3 * len)
+			goto alloc;
+
+		/* Restart the walk if we discarded the region lock. */
+		if (restart)
+			goto retry;
+	}
+	nomore = 1;
+	goto alloc;
+}
+
+/*
+ * __memp_ropen --
+ *	Attach to, and optionally create, the mpool region.
+ *
+ * PUBLIC: int __memp_ropen
+ * PUBLIC:    __P((DB_MPOOL *, const char *, size_t, int, int));
+ */
+int
+__memp_ropen(dbmp, path, cachesize, mode, flags)
+	DB_MPOOL *dbmp;
+	const char *path;
+	size_t cachesize;
+	int mode, flags;
+{
+	MPOOL *mp;
+	size_t rlen;
+	int fd, newregion, ret, retry_cnt;
+
+	/*
+	 * Unlike other DB subsystems, mpool can't simply grow the region
+	 * because it returns pointers into the region to its clients.  To
+	 * "grow" the region, we'd have to allocate a new region and then
+	 * store a region number in the structures that reference regional
+	 * objects.  It's reasonable that we fail regardless, as clients
+	 * shouldn't have every page in the region pinned, so the only
+	 * "failure" mode should be a performance penalty because we don't
+	 * find a page in the cache that we'd like to have found.
+	 *
+	 * Up the user's cachesize by 25% to account for our overhead.
+	 */
+	if (cachesize < DB_CACHESIZE_MIN)
+		if (cachesize == 0)
+			cachesize = DB_CACHESIZE_DEF;
+		else
+			cachesize = DB_CACHESIZE_MIN;
+	rlen = cachesize + cachesize / 4;
+
+	/* Map in the region. */
+	retry_cnt = newregion = 0;
+retry:	if (LF_ISSET(DB_CREATE)) {
+		/*
+		 * If it's a private mpool, use malloc, it's a lot faster than
+		 * instantiating a region.
+		 *
+		 * XXX
+		 * If we're doing locking and don't have spinlocks for this
+		 * architecture, we'd have to instantiate the file, we need
+		 * the file descriptor for locking.  However, it should not
+		 * be possible for DB_THREAD to be set if HAVE_SPINLOCKS aren't
+		 * defined.
+		 */
+		if (F_ISSET(dbmp, MP_ISPRIVATE))
+			ret = (dbmp->maddr = malloc(rlen)) == NULL ? ENOMEM : 0;
+		else
+			ret = __db_rcreate(dbmp->dbenv, DB_APP_NONE, path,
+			    DB_DEFAULT_MPOOL_FILE, mode, rlen, &fd,
+			    &dbmp->maddr);
+		if (ret == 0) {
+			/* Put the MPOOL structure first in the region. */
+			mp = dbmp->maddr;
+
+			SH_TAILQ_INIT(&mp->bhq);
+			SH_TAILQ_INIT(&mp->bhfq);
+			SH_TAILQ_INIT(&mp->mpfq);
+
+			/* Initialize the rest of the region as free space. */
+			dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL);
+			__db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
+
+			/*
+			 *
+			 * Pretend that the cache will be broken up into 4K
+			 * pages, and that we want to keep it under, say, 10
+			 * pages on each chain.  This means a 256MB cache will
+			 * allocate ~6500 offset pairs.
+			 */
+			mp->htab_buckets =
+			    __db_tablesize((cachesize / (4 * 1024)) / 10);
+
+			/* Allocate hash table space and initialize it. */
+			if ((ret = __db_shalloc(dbmp->addr,
+			    mp->htab_buckets * sizeof(DB_HASHTAB),
+			    0, &dbmp->htab)) != 0)
+				goto err;
+			__db_hashinit(dbmp->htab, mp->htab_buckets);
+			mp->htab = OFFSET(dbmp, dbmp->htab);
+
+			memset(&mp->stat, 0, sizeof(mp->stat));
+			mp->stat.st_cachesize = cachesize;
+
+			mp->flags = 0;
+
+			newregion = 1;
+		} else if (ret != EEXIST)
+			return (ret);
+	}
+
+	/* If we didn't or couldn't create the region, try and join it. */
+	if (!newregion &&
+	    (ret = __db_ropen(dbmp->dbenv, DB_APP_NONE,
+	    path, DB_DEFAULT_MPOOL_FILE, 0, &fd, &dbmp->maddr)) != 0) {
+		/*
+		 * If we failed because the file wasn't available, wait a
+		 * second and try again.
+		 */
+		if (ret == EAGAIN && ++retry_cnt < 3) {
+			(void)__db_sleep(1, 0);
+			goto retry;
+		}
+		return (ret);
+	}
+
+	/* Set up the common pointers. */
+	dbmp->mp = dbmp->maddr;
+	dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL);
+
+	/*
+	 * If not already locked, lock the region -- if it's a new region,
+	 * then either __db_rcreate() locked it for us or we malloc'd it
+	 * instead of creating a region, neither of which requires locking
+	 * here.
+	 */
+	if (!newregion)
+		LOCKREGION(dbmp);
+
+	/*
+	 * Get the hash table address; it's on the shared page, so we have
+	 * to lock first.
+	 */
+	dbmp->htab = ADDR(dbmp, dbmp->mp->htab);
+
+	dbmp->fd = fd;
+
+	/* If we locked the region, release it now. */
+	if (!F_ISSET(dbmp, MP_ISPRIVATE))
+		UNLOCKREGION(dbmp);
+	return (0);
+
+err:	if (fd != -1) {
+		dbmp->fd = fd;
+		(void)__memp_rclose(dbmp);
+	}
+
+	if (newregion)
+		(void)memp_unlink(path, 1, dbmp->dbenv);
+	return (ret);
+}
+
+/*
+ * __memp_rclose --
+ *	Close the mpool region.
+ *
+ * PUBLIC: int __memp_rclose __P((DB_MPOOL *));
+ */
+int
+__memp_rclose(dbmp)
+	DB_MPOOL *dbmp;
+{
+	if (F_ISSET(dbmp, MP_ISPRIVATE)) {
+		free(dbmp->maddr);
+		return (0);
+	}
+	return (__db_rclose(dbmp->dbenv, dbmp->fd, dbmp->maddr));
+}