about summary refs log tree commit diff
path: root/db2/log/log_put.c
diff options
context:
space:
mode:
Diffstat (limited to 'db2/log/log_put.c')
-rw-r--r--db2/log/log_put.c210
1 files changed, 121 insertions, 89 deletions
diff --git a/db2/log/log_put.c b/db2/log/log_put.c
index 225595f33e..92d9563301 100644
--- a/db2/log/log_put.c
+++ b/db2/log/log_put.c
@@ -7,7 +7,7 @@
 #include "config.h"
 
 #ifndef lint
-static const char sccsid[] = "@(#)log_put.c	10.14 (Sleepycat) 9/23/97";
+static const char sccsid[] = "@(#)log_put.c	10.20 (Sleepycat) 11/2/97";
 #endif /* not lint */
 
 #ifndef NO_SYSTEM_INCLUDES
@@ -29,9 +29,10 @@ static const char sccsid[] = "@(#)log_put.c	10.14 (Sleepycat) 9/23/97";
 #include "common_ext.h"
 
 static int __log_fill __P((DB_LOG *, void *, u_int32_t));
+static int __log_flush __P((DB_LOG *, const DB_LSN *));
 static int __log_newfd __P((DB_LOG *));
-static int __log_write __P((DB_LOG *, void *, u_int32_t));
 static int __log_putr __P((DB_LOG *, const DBT *, u_int32_t));
+static int __log_write __P((DB_LOG *, void *, u_int32_t));
 
 /*
  * log_put --
@@ -63,11 +64,8 @@ log_put(dblp, lsn, dbt, flags)
 	}
 
 	LOCK_LOGREGION(dblp);
-
 	ret = __log_put(dblp, lsn, dbt, flags);
-
 	UNLOCK_LOGREGION(dblp);
-
 	return (ret);
 }
 
@@ -102,14 +100,10 @@ __log_put(dblp, lsn, dbt, flags)
 			    "log_put: record larger than maximum file size");
 			return (EINVAL);
 		}
-		if (lp->b_off != 0) {
-			if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
-				return (ret);
-			if ((ret = __db_fsync(dblp->lfd)) != 0)
-				return (ret);
-			lp->s_lsn.file = lp->lsn.file;
-			lp->s_lsn.offset = lp->lsn.offset - 1;
-		}
+
+		/* Flush the log. */
+		if ((ret = __log_flush(dblp, NULL)) != 0)
+			return (ret);
 
 		/*
 		 * Save the last known offset from the previous file, we'll
@@ -117,9 +111,15 @@ __log_put(dblp, lsn, dbt, flags)
 		 */
 		lastoff = lp->lsn.offset;
 
+		/* Point the current LSN to the new file. */
 		++lp->lsn.file;
 		lp->lsn.offset = 0;
+
+		/* Reset the file write offset. */
 		lp->w_off = 0;
+
+		/* Reset the first-unwritten LSN for the buffer. */
+		lp->uw_lsn = lp->lsn;
 	} else
 		lastoff = 0;
 
@@ -149,56 +149,54 @@ __log_put(dblp, lsn, dbt, flags)
 	 *	Put out the checkpoint record (above).
 	 *	Save the LSN of the checkpoint in the shared region.
 	 *	Append the set of file name information into the log.
-	 *	Flush the current buffer contents to disk.
-	 *	Sync the log to disk.
-	 *	Save the time the checkpoint was written.
-	 *	Reset the bytes written since the last checkpoint.
 	 */
 	if (flags == DB_CHECKPOINT) {
 		lp->c_lsn = *lsn;
 
 		for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
 		    fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
-			t.data = ADDR(dblp, fnp->name_off);
+			t.data = R_ADDR(dblp, fnp->name_off);
 			t.size = strlen(t.data) + 1;
 			memset(&fid_dbt, 0, sizeof(fid_dbt));
-			fid_dbt.data = ADDR(dblp, fnp->fileid_off);
+			fid_dbt.data = R_ADDR(dblp, fnp->fileid_off);
 			fid_dbt.size = DB_FILE_ID_LEN;
 			if ((ret = __log_register_log(dblp, NULL, &r_unused,
 			    0, &t, &fid_dbt, fnp->id, fnp->s_type)) != 0)
 				return (ret);
 		}
-		if (lp->b_off != 0 &&
-		    (ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
-			return (ret);
-		(void)time(&lp->chkpt);
-		lp->written = 0;
-
-		if ((ret = __db_fsync(dblp->lfd)) != 0)
-			return (ret);
-		lp->s_lsn.file = lp->lsn.file;
-		lp->s_lsn.offset = lp->lsn.offset - 1;
 	}
 
-	/* We always flush on a checkpoint. */
-	if (flags == DB_FLUSH || flags == DB_CHECKPOINT) {
-		if (lp->b_off != 0 &&
-		    (ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
+	/*
+	 * On a checkpoint or when flush is requested, we:
+	 *	Flush the current buffer contents to disk.
+	 *	Sync the log to disk.
+	 */
+	if (flags == DB_FLUSH || flags == DB_CHECKPOINT)
+		if ((ret = __log_flush(dblp, NULL)) != 0)
 			return (ret);
 
-		if ((ret = __db_fsync(dblp->lfd)) != 0)
-			return (ret);
-		lp->s_lsn.file = lp->lsn.file;
-		lp->s_lsn.offset = lp->lsn.offset - 1;
+	/*
+	 * On a checkpoint, we:
+	 *	Save the time the checkpoint was written.
+	 *	Reset the bytes written since the last checkpoint.
+	 */
+	if (flags == DB_CHECKPOINT) {
+		(void)time(&lp->chkpt);
+		lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0;
 	}
 
 	/*
-	 * If we just did I/O, i.e., this LSN could have spanned the start of
-	 * the in-core buffer, we remember it so that we can flush correctly
-	 * during a sync.
+	 * When an application calls the log_flush routine, we need to figure
+	 * out if the current buffer needs to be flushed.  The problem is that
+	 * if a record spans buffers, it's possible for the record continued
+	 * in the current buffer to have begun in a previous buffer.  Each time
+	 * we write a buffer, we update the first-unwritten LSN to point to the
+	 * first LSN after that written buffer.  If we have a spanning record,
+	 * correct that value to be the LSN that started it all, here.
 	 */
 	if (lsn->offset < lp->w_off && lsn->offset + lp->len > lp->w_off)
-		lp->span_lsn = *lsn;
+		lp->uw_lsn = *lsn;
+
 	return (0);
 }
 
@@ -248,6 +246,24 @@ log_flush(dblp, lsn)
 	DB_LOG *dblp;
 	const DB_LSN *lsn;
 {
+	int ret;
+
+	LOCK_LOGREGION(dblp);
+	ret = __log_flush(dblp, lsn);
+	UNLOCK_LOGREGION(dblp);
+	return (ret);
+}
+
+/*
+ * __log_flush --
+ *	Write all records less than or equal to the specified LSN; internal
+ *	version.
+ */
+static int
+__log_flush(dblp, lsn)
+	DB_LOG *dblp;
+	const DB_LSN *lsn;
+{
 	DB_LSN t_lsn;
 	LOG *lp;
 	int ret;
@@ -255,60 +271,64 @@ log_flush(dblp, lsn)
 	ret = 0;
 	lp = dblp->lp;
 
-	LOCK_LOGREGION(dblp);
-
-	/* If no LSN specified, flush the entire log. */
+	/*
+	 * If no LSN specified, flush the entire log by setting the flush LSN
+	 * to the last LSN written in the log.  Otherwise, check that the LSN
+	 * isn't a non-existent record for the log.
+	 */
 	if (lsn == NULL) {
 		t_lsn.file = lp->lsn.file;
 		t_lsn.offset = lp->lsn.offset - lp->len;
 		lsn = &t_lsn;
-	}
-
-	/* If it's a non-existent record, it's an error. */
-	if (lsn->file > lp->lsn.file ||
-	    (lsn->file == lp->lsn.file && lsn->offset > lp->lsn.offset)) {
-		__db_err(dblp->dbenv, "log_flush: LSN past current end-of-log");
-		ret = EINVAL;
-		goto ret1;
-	}
-
-	/*
-	 * If it's from a previous file, we're done because we sync each
-	 * file when we move to a new one.
-	 */
-	if (lsn->file < lp->lsn.file)
-		goto ret1;
+	} else
+		if (lsn->file > lp->lsn.file ||
+		    (lsn->file == lp->lsn.file &&
+		    lsn->offset > lp->lsn.offset - lp->len)) {
+			__db_err(dblp->dbenv,
+			    "log_flush: LSN past current end-of-log");
+			return (EINVAL);
+		}
 
 	/*
-	 * If it's less than the last-sync'd offset, we've already sync'd
-	 * this LSN.
+	 * If the LSN is less than the last-sync'd LSN, we're done.  Note,
+	 * the last-sync LSN saved in s_lsn is the LSN of the first byte 
+	 * that has not yet been written to disk, so the test is <, not <=.
 	 */
-	if (lsn->offset <= lp->s_lsn.offset)
-		goto ret1;
+	if (lsn->file < lp->s_lsn.file ||
+	    (lsn->file == lp->s_lsn.file && lsn->offset < lp->s_lsn.offset))
+		return (0);
 
 	/*
 	 * We may need to write the current buffer.  We have to write the
-	 * current buffer if the sync LSN is greater than or equal to the
-	 * saved spanning-LSN.
+	 * current buffer if the flush LSN is greater than or equal to the
+	 * first-unwritten LSN (uw_lsn).  If we write the buffer, then we
+	 * update the first-unwritten LSN.
 	 */
-	if (lsn->file >= lp->span_lsn.file &&
-	    lsn->offset >= lp->span_lsn.offset)
+	if (lp->b_off != 0 &&
+	    lsn->file >= lp->uw_lsn.file && lsn->offset >= lp->uw_lsn.offset)
 		if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
-			goto ret1;
+			return (ret);
 
-	/* Acquire a file descriptor if we don't have one. */
-	if (dblp->lfname != dblp->lp->lsn.file &&
-	    (ret = __log_newfd(dblp)) != 0)
-		goto ret1;
+	/*
+	 * It's possible that this thread may never have written to this log
+	 * file.  Acquire a file descriptor if we don't already have one.
+	 */
+	if (dblp->lfname != dblp->lp->lsn.file)
+		if ((ret = __log_newfd(dblp)) != 0)
+			return (ret);
 
+	/* Sync all writes to disk. */
 	if ((ret = __db_fsync(dblp->lfd)) != 0)
-		goto ret1;
+		return (ret);
+	++lp->stat.st_scount;
 
-	lp->s_lsn.file = lp->lsn.file;
-	lp->s_lsn.offset = lsn->offset;
+	/*
+	 * Set the last-synced LSN, the first LSN after the last record
+	 * that we know is on disk.
+	 */
+	lp->s_lsn = lp->uw_lsn;
 
-ret1:	UNLOCK_LOGREGION(dblp);
-	return (ret);
+	return (0);
 }
 
 /*
@@ -385,17 +405,32 @@ __log_write(dblp, addr, len)
 	 * Seek to the offset in the file (someone may have written it
 	 * since we last did).
 	 */
-	if ((ret = __db_lseek(dblp->lfd, 0, 0, lp->w_off, SEEK_SET)) != 0)
+	if ((ret = __db_seek(dblp->lfd, 0, 0, lp->w_off, SEEK_SET)) != 0)
 		return (ret);
 	if ((ret = __db_write(dblp->lfd, addr, len, &nw)) != 0)
 		return (ret);
 	if (nw != (int32_t)len)
 		return (EIO);
 
-	/* Update the seek offset and reset the buffer offset. */
+	/*
+	 * Reset the buffer offset, update the seek offset, and update the
+	 * first-unwritten LSN.
+	 */
 	lp->b_off = 0;
 	lp->w_off += len;
-	lp->written += len;
+	lp->uw_lsn.file = lp->lsn.file;
+	lp->uw_lsn.offset = lp->w_off;
+
+	/* Update written statistics. */
+	if ((lp->stat.st_w_bytes += len) >= MEGABYTE) {
+		lp->stat.st_w_bytes -= MEGABYTE;
+		++lp->stat.st_w_mbytes;
+	}
+	if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) {
+		lp->stat.st_wc_bytes -= MEGABYTE;
+		++lp->stat.st_wc_mbytes;
+	}
+	++lp->stat.st_wcount;
 
 	return (0);
 }
@@ -415,11 +450,8 @@ log_file(dblp, lsn, namep, len)
 	char *p;
 
 	LOCK_LOGREGION(dblp);
-
 	ret = __log_name(dblp, lsn->file, &p);
-
 	UNLOCK_LOGREGION(dblp);
-
 	if (ret != 0)
 		return (ret);
 
@@ -429,7 +461,7 @@ log_file(dblp, lsn, namep, len)
 		return (ENOMEM);
 	}
 	(void)strcpy(namep, p);
-	free(p);
+	__db_free(p);
 
 	return (0);
 }
@@ -455,7 +487,7 @@ __log_newfd(dblp)
 	dblp->lfname = dblp->lp->lsn.file;
 	if ((ret = __log_name(dblp, dblp->lfname, &p)) != 0)
 		return (ret);
-	if ((ret = __db_fdopen(p,
+	if ((ret = __db_open(p,
 	    DB_CREATE | DB_SEQUENTIAL,
 	    DB_CREATE | DB_SEQUENTIAL,
 	    dblp->lp->persist.mode, &dblp->lfd)) != 0)
@@ -472,14 +504,14 @@ __log_newfd(dblp)
  * PUBLIC: int __log_name __P((DB_LOG *, int, char **));
  */
 int
-__log_name(dblp, fileno, namep)
+__log_name(dblp, filenumber, namep)
 	DB_LOG *dblp;
 	char **namep;
-	int fileno;
+	int filenumber;
 {
 	char name[sizeof(LFNAME) + 10];
 
-	(void)snprintf(name, sizeof(name), LFNAME, fileno);
+	(void)snprintf(name, sizeof(name), LFNAME, filenumber);
 	return (__db_appname(dblp->dbenv,
 	    DB_APP_LOG, dblp->dir, name, NULL, namep));
 }