about summary refs log tree commit diff
path: root/db2/mp
diff options
context:
space:
mode:
Diffstat (limited to 'db2/mp')
-rw-r--r--db2/mp/mp_bh.c131
-rw-r--r--db2/mp/mp_fget.c13
-rw-r--r--db2/mp/mp_fopen.c34
-rw-r--r--db2/mp/mp_fput.c4
-rw-r--r--db2/mp/mp_pr.c4
-rw-r--r--db2/mp/mp_sync.c29
6 files changed, 135 insertions, 80 deletions
diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c
index 578abedcb6..c23abdda24 100644
--- a/db2/mp/mp_bh.c
+++ b/db2/mp/mp_bh.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_bh.c	10.23 (Sleepycat) 11/26/97";
+static const char sccsid[] = "@(#)mp_bh.c	10.28 (Sleepycat) 1/8/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -193,30 +193,28 @@ __memp_pgread(dbmfp, bhp, can_create)
 	/* Call any pgin function. */
 pgin:	ret = mfp->ftype == 0 ? 0 : __memp_pg(dbmfp, bhp, 1);
 
-	/* Reacquire the region lock. */
+	/* Unlock the buffer and reacquire the region lock. */
+err:	UNLOCKBUFFER(dbmp, bhp);
 	LOCKREGION(dbmp);
 
-	/* If the pgin function succeeded, the data is now valid. */
-	if (ret == 0)
+	/*
+	 * If no errors occurred, the data is now valid, clear the BH_TRASH
+	 * flag; regardless, clear the lock bit and let other threads proceed.
+	 */
+	F_CLR(bhp, BH_LOCKED);
+	if (ret == 0) {
 		F_CLR(bhp, BH_TRASH);
 
-	/* Update the statistics. */
-	if (can_create) {
-		++dbmp->mp->stat.st_page_create;
-		++mfp->stat.st_page_create;
-	} else {
-		++dbmp->mp->stat.st_page_in;
-		++mfp->stat.st_page_in;
-	}
-
-	if (0) {
-err:		LOCKREGION(dbmp);
+		/* Update the statistics. */
+		if (can_create) {
+			++dbmp->mp->stat.st_page_create;
+			++mfp->stat.st_page_create;
+		} else {
+			++dbmp->mp->stat.st_page_in;
+			++mfp->stat.st_page_in;
+		}
 	}
 
-	/* Release the buffer. */
-	F_CLR(bhp, BH_LOCKED);
-	UNLOCKBUFFER(dbmp, bhp);
-
 	return (ret);
 }
 
@@ -240,7 +238,7 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 	MPOOLFILE *mfp;
 	size_t pagesize;
 	ssize_t nw;
-	int callpgin, ret;
+	int callpgin, ret, syncfail;
 	const char *fail;
 
 	dbmp = dbmfp->dbmp;
@@ -255,8 +253,32 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 	callpgin = 0;
 	pagesize = mfp->stat.st_pagesize;
 
-	F_SET(bhp, BH_LOCKED);
+	/*
+	 * Check the dirty bit -- this buffer may have been written since we
+	 * decided to write it.
+	 */
+	if (!F_ISSET(bhp, BH_DIRTY)) {
+		if (wrotep != NULL)
+			*wrotep = 1;
+		return (0);
+	}
+
 	LOCKBUFFER(dbmp, bhp);
+
+	/*
+	 * If there were two writers, we may have just been waiting while the
+	 * other writer completed I/O on this buffer.  Check the dirty bit one
+	 * more time.
+	 */
+	if (!F_ISSET(bhp, BH_DIRTY)) {
+		UNLOCKBUFFER(dbmp, bhp);
+
+		if (wrotep != NULL)
+			*wrotep = 1;
+		return (0);
+	}
+
+	F_SET(bhp, BH_LOCKED);
 	UNLOCKREGION(dbmp);
 
 	if (restartp != NULL)
@@ -272,8 +294,9 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 		goto err;
 
 	/*
-	 * Call any pgout function.  We set the callpgin flag so that on
-	 * error we flag that the contents of the buffer may be trash.
+	 * Call any pgout function.  We set the callpgin flag so that we flag
+	 * that the contents of the buffer will need to be passed through pgin
+	 * before they are reused.
 	 */
 	if (mfp->ftype == 0)
 		ret = 0;
@@ -307,7 +330,7 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 		 * between the failing clauses to __db_lseek and __db_write and
 		 * this ret != 0.
 		 */
-		fail = NULL;
+		COMPQUIET(fail, NULL);
 		goto syserr;
 	}
 
@@ -320,18 +343,20 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 	if (wrotep != NULL)
 		*wrotep = 1;
 
-	/* Reacquire the region lock. */
+	/* Unlock the buffer and reacquire the region lock. */
+	UNLOCKBUFFER(dbmp, bhp);
 	LOCKREGION(dbmp);
 
-	/* Clean up the flags based on a successful write. */
-	F_SET(bhp, BH_CALLPGIN);
+	/*
+	 * Clean up the flags based on a successful write.
+	 *
+	 * If we rewrote the page, it will need processing by the pgin
+	 * routine before reuse.
+	 */
+	if (callpgin)
+		F_SET(bhp, BH_CALLPGIN);
 	F_CLR(bhp, BH_DIRTY | BH_LOCKED);
 
-	++mp->stat.st_page_clean;
-	--mp->stat.st_page_dirty;
-
-	UNLOCKBUFFER(dbmp, bhp);
-
 	/*
 	 * If we write a buffer for which a checkpoint is waiting, update
 	 * the count of pending buffers (both in the mpool as a whole and
@@ -344,23 +369,36 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 	 *
 	 * XXX:
 	 * We ignore errors from the sync -- it makes no sense to return an
-	 * error to the calling process, so set a flag causing the sync to
-	 * be retried later.
-	 *
-	 * If the buffer we wrote has a LSN larger than the current largest
-	 * we've written for this checkpoint, update the saved value.
+	 * error to the calling process, so set a flag causing the checkpoint
+	 * to be retried later.
 	 */
 	if (F_ISSET(bhp, BH_WRITE)) {
+		if (mfp->lsn_cnt == 1) {
+			UNLOCKREGION(dbmp);
+			syncfail = __db_fsync(dbmfp->fd) != 0;
+			LOCKREGION(dbmp);
+			if (syncfail)
+				F_SET(mp, MP_LSN_RETRY);
+
+		}
+
+		F_CLR(bhp, BH_WRITE);
+
+		/*
+		 * If the buffer just written has a larger LSN than the current
+		 * max LSN written for this checkpoint, update the saved value.
+		 */
 		if (log_compare(&lsn, &mp->lsn) > 0)
 			mp->lsn = lsn;
-		F_CLR(bhp, BH_WRITE);
 
 		--mp->lsn_cnt;
-
-		if (--mfp->lsn_cnt == 0 && __db_fsync(dbmfp->fd) != 0)
-			F_SET(mp, MP_LSN_RETRY);
+		--mfp->lsn_cnt;
 	}
 
+	/* Update the page clean/dirty statistics. */
+	++mp->stat.st_page_clean;
+	--mp->stat.st_page_dirty;
+
 	/* Update I/O statistics. */
 	++mp->stat.st_page_out;
 	++mfp->stat.st_page_out;
@@ -370,11 +408,20 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
 syserr:	__db_err(dbenv, "%s: %s failed for page %lu",
 	    __memp_fn(dbmfp), fail, (u_long)bhp->pgno);
 
-err:	UNLOCKBUFFER(dbmp, bhp);
+err:	/* Unlock the buffer and reacquire the region lock. */
+	UNLOCKBUFFER(dbmp, bhp);
 	LOCKREGION(dbmp);
+
+	/*
+	 * Clean up the flags based on a failure.
+	 *
+	 * The page remains dirty but we remove our lock.  If we rewrote the
+	 * page, it will need processing by the pgin routine before reuse.
+	 */
 	if (callpgin)
 		F_SET(bhp, BH_CALLPGIN);
 	F_CLR(bhp, BH_LOCKED);
+
 	return (ret);
 }
 
diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c
index 1010751c92..f5955c4c6f 100644
--- a/db2/mp/mp_fget.c
+++ b/db2/mp/mp_fget.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fget.c	10.32 (Sleepycat) 11/26/97";
+static const char sccsid[] = "@(#)mp_fget.c	10.33 (Sleepycat) 12/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -272,8 +272,17 @@ found:		/* Increment the reference count. */
 		 * discarded we know the buffer can't move and its contents
 		 * can't change.
 		 */
-		if (F_ISSET(bhp, BH_LOCKED)) {
+		for (cnt = 0; F_ISSET(bhp, BH_LOCKED); ++cnt) {
 			UNLOCKREGION(dbmp);
+
+			/*
+			 * Sleep so that we don't simply spin, switching locks.
+			 * (See the comment in include/mp.h.)
+			 */
+			if (cnt != 0 &&
+			    (__db_yield == NULL || __db_yield() != 0))
+				__db_sleep(0, 1);
+
 			LOCKBUFFER(dbmp, bhp);
 			/* Waiting for I/O to finish... */
 			UNLOCKBUFFER(dbmp, bhp);
diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c
index bdc4713863..0f41122373 100644
--- a/db2/mp/mp_fopen.c
+++ b/db2/mp/mp_fopen.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fopen.c	10.32 (Sleepycat) 11/26/97";
+static const char sccsid[] = "@(#)mp_fopen.c	10.37 (Sleepycat) 1/18/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -28,8 +28,8 @@ static const char sccsid[] = "@(#)mp_fopen.c	10.32 (Sleepycat) 11/26/97";
 #include "common_ext.h"
 
 static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
-static int __memp_mf_open __P((DB_MPOOL *, DB_MPOOLFILE *,
-    const char *, int, size_t, off_t, int, DBT *, u_int8_t *, MPOOLFILE **));
+static int __memp_mf_open __P((DB_MPOOL *, const char *,
+    int, size_t, db_pgno_t, int, DBT *, u_int8_t *, MPOOLFILE **));
 
 /*
  * memp_fopen --
@@ -84,7 +84,9 @@ __memp_fopen(dbmp, mfp, path,
 {
 	DB_ENV *dbenv;
 	DB_MPOOLFILE *dbmfp;
-	off_t size;
+	db_pgno_t last_pgno;
+	size_t size;
+	u_int32_t mbytes, bytes;
 	int ret;
 	u_int8_t idbuf[DB_FILE_ID_LEN];
 	char *rpath;
@@ -120,6 +122,7 @@ __memp_fopen(dbmp, mfp, path,
 			goto err;
 		}
 		size = 0;
+		last_pgno = 0;
 	} else {
 		/* Get the real name for this file and open it. */
 		if ((ret = __db_appname(dbenv,
@@ -133,17 +136,20 @@ __memp_fopen(dbmp, mfp, path,
 		}
 
 		/* Don't permit files that aren't a multiple of the pagesize. */
-		if ((ret = __db_ioinfo(rpath, dbmfp->fd, &size, NULL)) != 0) {
+		if ((ret = __db_ioinfo(rpath,
+		    dbmfp->fd, &mbytes, &bytes, NULL)) != 0) {
 			__db_err(dbenv, "%s: %s", rpath, strerror(ret));
 			goto err;
 		}
-		if (size % pagesize) {
+		if (bytes % pagesize) {
 			__db_err(dbenv,
 			    "%s: file size not a multiple of the pagesize",
 			    rpath);
 			ret = EINVAL;
 			goto err;
 		}
+		size = mbytes * MEGABYTE + bytes;
+		last_pgno = size == 0 ? 0 : (size - 1) / pagesize;
 
 		/*
 		 * Get the file id if we weren't given one.  Generated file id's
@@ -155,6 +161,7 @@ __memp_fopen(dbmp, mfp, path,
 				goto err;
 			fileid = idbuf;
 		}
+		FREES(rpath);
 	}
 
 	/*
@@ -166,8 +173,8 @@ __memp_fopen(dbmp, mfp, path,
 		LOCKREGION(dbmp);
 
 	if (mfp == NULL)
-		ret = __memp_mf_open(dbmp, dbmfp, path,
-		    ftype, pagesize, size, lsn_offset, pgcookie, fileid, &mfp);
+		ret = __memp_mf_open(dbmp, path, ftype,
+		    pagesize, last_pgno, lsn_offset, pgcookie, fileid, &mfp);
 	else {
 		++mfp->ref;
 		ret = 0;
@@ -216,7 +223,7 @@ __memp_fopen(dbmp, mfp, path,
 		if (LF_ISSET(DB_NOMMAP))
 			F_CLR(mfp, MP_CAN_MMAP);
 		if (size > (dbenv == NULL || dbenv->mp_mmapsize == 0 ?
-		    DB_MAXMMAPSIZE : (off_t)dbenv->mp_mmapsize))
+		    DB_MAXMMAPSIZE : dbenv->mp_mmapsize))
 			F_CLR(mfp, MP_CAN_MMAP);
 	}
 	dbmfp->addr = NULL;
@@ -253,14 +260,13 @@ err:	/*
  *	Open an MPOOLFILE.
  */
 static int
-__memp_mf_open(dbmp, dbmfp, path,
-    ftype, pagesize, size, lsn_offset, pgcookie, fileid, retp)
+__memp_mf_open(dbmp, path,
+    ftype, pagesize, last_pgno, lsn_offset, pgcookie, fileid, retp)
 	DB_MPOOL *dbmp;
-	DB_MPOOLFILE *dbmfp;
 	const char *path;
 	int ftype, lsn_offset;
 	size_t pagesize;
-	off_t size;
+	db_pgno_t last_pgno;
 	DBT *pgcookie;
 	u_int8_t *fileid;
 	MPOOLFILE **retp;
@@ -314,7 +320,7 @@ __memp_mf_open(dbmp, dbmfp, path,
 	 * it away.
 	 */
 	mfp->stat.st_pagesize = pagesize;
-	mfp->last_pgno = size == 0 ? 0 : (size - 1) / mfp->stat.st_pagesize;
+	mfp->last_pgno = last_pgno;
 
 	F_SET(mfp, MP_CAN_MMAP);
 	if (ISTEMPORARY)
diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c
index 38e86b8ac5..335ee9ff16 100644
--- a/db2/mp/mp_fput.c
+++ b/db2/mp/mp_fput.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_fput.c	10.16 (Sleepycat) 11/26/97";
+static const char sccsid[] = "@(#)mp_fput.c	10.17 (Sleepycat) 12/20/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -105,7 +105,7 @@ memp_fput(dbmfp, pgaddr, flags)
 #ifdef DEBUG
 	if (bhp->ref == 0) {
 		__db_err(dbmp->dbenv,
-		    "Internal error: bhp->ref on page %lu went negative.",
+    "Unpinned page returned: reference count on page %lu went negative.",
 		    (u_long)bhp->pgno);
 		abort();
 	}
diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c
index 6ff1131b6e..13a6c62d35 100644
--- a/db2/mp/mp_pr.c
+++ b/db2/mp/mp_pr.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_pr.c	10.20 (Sleepycat) 11/26/97";
+static const char sccsid[] = "@(#)mp_pr.c	10.21 (Sleepycat) 1/6/98";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -25,8 +25,6 @@ static const char sccsid[] = "@(#)mp_pr.c	10.20 (Sleepycat) 11/26/97";
 #include "db_shash.h"
 #include "mp.h"
 
-void __memp_debug __P((DB_MPOOL *, FILE *, int));
-
 static void __memp_pbh __P((FILE *, DB_MPOOL *, BH *, int));
 static void __memp_pdbmf __P((FILE *, DB_MPOOLFILE *, int));
 static void __memp_pmf __P((FILE *, MPOOLFILE *, int));
diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c
index 47a7f2ebca..6d16cf3cd4 100644
--- a/db2/mp/mp_sync.c
+++ b/db2/mp/mp_sync.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)mp_sync.c	10.17 (Sleepycat) 11/26/97";
+static const char sccsid[] = "@(#)mp_sync.c	10.19 (Sleepycat) 12/3/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -39,7 +39,7 @@ memp_sync(dbmp, lsnp)
 	DB_ENV *dbenv;
 	MPOOL *mp;
 	MPOOLFILE *mfp;
-	int ar_cnt, cnt, nalloc, next, notused, ret, wrote;
+	int ar_cnt, cnt, nalloc, next, ret, wrote;
 
 	dbenv = dbmp->dbenv;
 
@@ -180,32 +180,28 @@ memp_sync(dbmp, lsnp)
 
 		/* Write the buffer. */
 		mfp = R_ADDR(dbmp, bharray[next]->mf_offset);
-		ret =
-		    __memp_bhwrite(dbmp, mfp, bharray[next], &notused, &wrote);
+		ret = __memp_bhwrite(dbmp, mfp, bharray[next], NULL, &wrote);
 
 		/* Release the buffer. */
 		--bharray[next]->ref;
 
 		/* If there's an error, release the rest of the buffers. */
 		if (ret != 0 || !wrote) {
-			while (++next < ar_cnt)
-				--bharray[next]->ref;
-
-			if (ret != 0)
-				goto err;
-
 			/*
 			 * Any process syncing the shared memory buffer pool
 			 * had better be able to write to any underlying file.
 			 * Be understanding, but firm, on this point.
 			 */
-			if (!wrote) {
+			if (ret == 0) {
 				__db_err(dbenv, "%s: unable to flush page: %lu",
 				    __memp_fns(dbmp, mfp),
 				    (u_long)bharray[next]->pgno);
 				ret = EPERM;
-				goto err;
 			}
+
+			while (++next < ar_cnt)
+				--bharray[next]->ref;
+			goto err;
 		}
 	}
 	ret = mp->lsn_cnt ? DB_INCOMPLETE : 0;
@@ -242,7 +238,7 @@ memp_fsync(dbmfp)
 	BH *bhp, **bharray;
 	DB_MPOOL *dbmp;
 	size_t mf_offset;
-	int ar_cnt, cnt, nalloc, next, pincnt, notused, ret, wrote;
+	int ar_cnt, cnt, nalloc, next, pincnt, ret, wrote;
 
 	dbmp = dbmfp->dbmp;
 
@@ -333,7 +329,7 @@ memp_fsync(dbmfp)
 		}
 
 		/* Write the buffer. */
-		ret = __memp_pgwrite(dbmfp, bharray[next], &notused, &wrote);
+		ret = __memp_pgwrite(dbmfp, bharray[next], NULL, &wrote);
 
 		/* Release the buffer. */
 		--bharray[next]->ref;
@@ -379,7 +375,7 @@ memp_trickle(dbmp, pct, nwrotep)
 	MPOOL *mp;
 	MPOOLFILE *mfp;
 	u_long total;
-	int notused, ret, wrote;
+	int ret, wrote;
 
 	mp = dbmp->mp;
 	if (nwrotep != NULL)
@@ -423,8 +419,7 @@ loop:	total = mp->stat.st_page_clean + mp->stat.st_page_dirty;
 		if (F_ISSET(mfp, MP_TEMP))
 			continue;
 
-		if ((ret =
-		    __memp_bhwrite(dbmp, mfp, bhp, &notused, &wrote)) != 0)
+		if ((ret = __memp_bhwrite(dbmp, mfp, bhp, NULL, &wrote)) != 0)
 			goto err;
 
 		/*