diff options
Diffstat (limited to 'db2')
105 files changed, 3593 insertions, 1850 deletions
diff --git a/db2/Makefile b/db2/Makefile index e6b35aa51b..8e5cea7b17 100644 --- a/db2/Makefile +++ b/db2/Makefile @@ -58,8 +58,8 @@ libdb-routines := bt_close bt_compare bt_conv bt_cursor bt_delete \ bt_split bt_stat btree_auto db db_appinit db_apprec \ db_auto \ db_byteorder db_conv db_dispatch db_dup db_err db_log2 \ - db_os_abs db_os_dir db_os_fid db_os_lseek db_os_mmap \ - db_os_open db_os_rw db_os_sleep db_os_stat db_os_unlink \ + os_abs os_dir os_fid os_fsync os_func os_map os_oflags \ + os_open os_rpath os_rw os_seek os_sleep os_stat os_unlink \ db_overflow db_pr db_rec db_region db_ret db_salloc \ db_shash db_thread hash hash_auto hash_conv hash_debug \ hash_dup hash_func hash_page hash_rec hash_stat lock \ diff --git a/db2/btree/bt_cursor.c b/db2/btree/bt_cursor.c index a1266bcd3c..e5f3faeb70 100644 --- a/db2/btree/bt_cursor.c +++ b/db2/btree/bt_cursor.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_cursor.c 10.33 (Sleepycat) 9/24/97"; +static const char sccsid[] = "@(#)bt_cursor.c 10.35 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -62,10 +62,10 @@ __bam_cursor(dbp, txn, dbcp) DEBUG_LWRITE(dbp, txn, "bam_cursor", NULL, NULL, 0); - if ((dbc = (DBC *)calloc(1, sizeof(DBC))) == NULL) + if ((dbc = (DBC *)__db_calloc(1, sizeof(DBC))) == NULL) return (ENOMEM); - if ((cp = (CURSOR *)calloc(1, sizeof(CURSOR))) == NULL) { - free(dbc); + if ((cp = (CURSOR *)__db_calloc(1, sizeof(CURSOR))) == NULL) { + __db_free(dbc); return (ENOMEM); } @@ -474,7 +474,7 @@ __bam_c_rget(dbp, cp, key, data, flags) __bam_stkrel(dbp); err: (void)memp_fput(dbp->mpf, cp->page, 0); - free(dbt.data); + __db_free(dbt.data); return (ret); } @@ -1422,7 +1422,7 @@ __bam_c_physdel(dbp, cp, h) DB_LOCK lock; db_indx_t indx; db_pgno_t pgno, next_pgno, prev_pgno; - int local, ret; + int local, normal, ret; t = dbp->internal; ret = 0; @@ -1457,51 +1457,65 @@ __bam_c_physdel(dbp, cp, h) local = 0; /* - * If we're deleting a duplicate entry, call the common code to do - * the work. + * If we're deleting a duplicate entry and there are other duplicate + * entries remaining, call the common code to do the work and fix up + * the parent page as necessary. Otherwise, do a normal btree delete. + * + * There are 5 possible cases: + * + * 1. It's not a duplicate item: do a normal btree delete. + * 2. It's a duplicate item: + * 2a: We delete an item from a page of duplicates, but there are + * more items on the page. + * 2b: We delete the last item from a page of duplicates, deleting + * the last duplicate. + * 2c: We delete the last item from a page of duplicates, but there + * is a previous page of duplicates. + * 2d: We delete the last item from a page of duplicates, but there + * is a following page of duplicates. + * + * In the case of: + * + * 1: There's nothing further to do. + * 2a: There's nothing further to do. + * 2b: Do the normal btree delete instead of a duplicate delete, as + * that deletes both the duplicate chain and the parent page's + * entry. + * 2c: There's nothing further to do. + * 2d: Delete the duplicate, and update the parent page's entry. */ if (TYPE(h) == P_DUPLICATE) { pgno = PGNO(h); prev_pgno = PREV_PGNO(h); next_pgno = NEXT_PGNO(h); - if ((ret = __db_drem(dbp, &h, indx, __bam_free)) != 0) - goto err; - /* - * There are 4 cases: - * - * 1. We removed an item on a page, but there are more items - * on the page. - * 2. We removed the last item on a page, removing the last - * duplicate. - * 3. We removed the last item on a page, but there is a - * following page of duplicates. - * 4. We removed the last item on a page, but there is a - * previous page of duplicates. - * - * In case 1, h != NULL, h->pgno == pgno - * In case 2, h == NULL, - * prev_pgno == PGNO_INVALID, next_pgno == PGNO_INVALID - * In case 3, h != NULL, next_pgno != PGNO_INVALID - * In case 4, h == NULL, prev_pgno != PGNO_INVALID - * - * In case 1, there's nothing else to do. - * In case 2, remove the entry from the parent page. - * In case 3 or 4, if the deleted page was the first in a chain - * of duplicate pages, update the parent page's entry. - * - * Test: - * If there were previous pages of duplicates or we didn't - * empty the current page of duplicates, we don't need to - * touch the parent page. - */ - if (prev_pgno != PGNO_INVALID || (h != NULL && pgno == h->pgno)) - goto done; + if (NUM_ENT(h) == 1 && + prev_pgno == PGNO_INVALID && next_pgno == PGNO_INVALID) + normal = 1; + else { + normal = 0; - /* - * Release any page we're holding and the lock on the deleted - * page. - */ + /* Delete the duplicate. */ + if ((ret = __db_drem(dbp, &h, indx, __bam_free)) != 0) + goto err; + + /* + * 2a: h != NULL, h->pgno == pgno + * 2b: We don't reach this clause, as the above test + * was true. + * 2c: h == NULL, prev_pgno != PGNO_INVALID + * 2d: h != NULL, next_pgno != PGNO_INVALID + * + * Test for 2a and 2c: if we didn't empty the current + * page or there was a previous page of duplicates, we + * don't need to touch the parent page. + */ + if ((h != NULL && pgno == h->pgno) || + prev_pgno != PGNO_INVALID) + goto done; + } + + /* Release any page we're holding and its lock. */ if (local) { if (h != NULL) (void)memp_fput(dbp->mpf, h, 0); @@ -1519,37 +1533,33 @@ __bam_c_physdel(dbp, cp, h) } local = 1; - /* - * If we deleted the last duplicate, we can fall out and do a - * normal btree delete in the context of the parent page. If - * not, we have to update the parent's page. - */ + /* Switch to the parent page's entry. */ indx = cp->indx; - if (next_pgno != PGNO_INVALID) { - /* - * Copy, delete, update and re-insert the parent page's - * entry. - */ - bo = *GET_BOVERFLOW(h, indx); - (void)__db_ditem(dbp, h, indx, BOVERFLOW_SIZE); - bo.pgno = next_pgno; - memset(&dbt, 0, sizeof(dbt)); - dbt.data = &bo; - dbt.size = BOVERFLOW_SIZE; - (void)__db_pitem(dbp, - h, indx, BOVERFLOW_SIZE, &dbt, NULL); - - /* Discard the parent page. */ - (void)memp_fput(dbp->mpf, h, 0); - (void)__BT_TLPUT(dbp, lock); - local = 0; + if (normal) + goto btd; - goto done; - } + /* + * Copy, delete, update, add-back the parent page's data entry. + * + * XXX + * This may be a performance/logging problem. We should add a + * log message which simply logs/updates a random set of bytes + * on a page, and use it instead of doing a delete/add pair. + */ + indx += O_INDX; + bo = *GET_BOVERFLOW(h, indx); + (void)__db_ditem(dbp, h, indx, BOVERFLOW_SIZE); + bo.pgno = next_pgno; + memset(&dbt, 0, sizeof(dbt)); + dbt.data = &bo; + dbt.size = BOVERFLOW_SIZE; + (void)__db_pitem(dbp, h, indx, BOVERFLOW_SIZE, &dbt, NULL); + (void)memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY); + goto done; } /* Otherwise, do a normal btree delete. */ - if ((ret = __bam_ditem(dbp, h, indx)) != 0) +btd: if ((ret = __bam_ditem(dbp, h, indx)) != 0) goto err; if ((ret = __bam_ditem(dbp, h, indx)) != 0) goto err; @@ -1584,7 +1594,7 @@ __bam_c_physdel(dbp, cp, h) } ret = __bam_dpage(dbp, &dbt); - free(dbt.data); + __db_free(dbt.data); } err: diff --git a/db2/btree/bt_delete.c b/db2/btree/bt_delete.c index 98929540e4..9593d0109c 100644 --- a/db2/btree/bt_delete.c +++ b/db2/btree/bt_delete.c @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_delete.c 10.21 (Sleepycat) 9/3/97"; +static const char sccsid[] = "@(#)bt_delete.c 10.22 (Sleepycat) 11/2/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -266,9 +266,10 @@ __bam_ditem(dbp, h, indx) case B_DUPLICATE: case B_OVERFLOW: nbytes = BINTERNAL_SIZE(bi->len); + bo = (BOVERFLOW *)bi->data; goto offpage; case B_KEYDATA: - nbytes = BKEYDATA_SIZE(bi->len); + nbytes = BINTERNAL_SIZE(bi->len); break; default: return (__db_pgfmt(dbp, h->pgno)); @@ -289,7 +290,7 @@ __bam_ditem(dbp, h, indx) if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX]) return (__bam_adjindx(dbp, h, indx, indx - P_INDX, 0)); - if (indx < (u_int32_t)(NUM_ENT(h) - P_INDX) && + if (indx + P_INDX < (u_int32_t)NUM_ENT(h) && h->inp[indx] == h->inp[indx + P_INDX]) return (__bam_adjindx(dbp, h, indx, indx + O_INDX, 0)); @@ -301,9 +302,9 @@ __bam_ditem(dbp, h, indx) case B_DUPLICATE: case B_OVERFLOW: nbytes = BOVERFLOW_SIZE; + bo = GET_BOVERFLOW(h, indx); offpage: /* Delete duplicate/offpage chains. */ - bo = GET_BOVERFLOW(h, indx); if (B_TYPE(bo->type) == B_DUPLICATE) { if ((ret = __db_ddup(dbp, bo->pgno, __bam_free)) != 0) @@ -523,7 +524,7 @@ __bam_dpages(dbp, t) /* * If we deleted the next-to-last item from the root page, the tree - * has collapsed a level. Try and write lock the remaining root + 1 + * can collapse a level. Try and write lock the remaining root + 1 * page and copy it onto the root page. If we can't get the lock, * that's okay, the tree just stays a level deeper than we'd like. */ @@ -546,8 +547,8 @@ __bam_dpages(dbp, t) b.data = P_ENTRY(epg->page, 0); b.size = BINTERNAL_SIZE(((BINTERNAL *)b.data)->len); __bam_rsplit_log(dbp->dbenv->lg_info, dbp->txn, - &h->lsn, 0, dbp->log_fileid, h->pgno, &a, &b, - &epg->page->lsn); + &h->lsn, 0, dbp->log_fileid, h->pgno, &a, + RE_NREC(epg->page), &b, &epg->page->lsn); } /* @@ -565,15 +566,19 @@ __bam_dpages(dbp, t) if (TYPE(h) == P_IRECNO || (TYPE(h) == P_IBTREE && F_ISSET(dbp, DB_BT_RECNUM))) RE_NREC_SET(epg->page, rcnt); + (void)memp_fset(dbp->mpf, epg->page, DB_MPOOL_DIRTY); - /* Free the last page in that level of the btree. */ - ++t->lstat.bt_freed; + /* + * Free the last page in that level of the btree and discard + * the lock. (The call to __bam_free discards our reference + * to the page.) + */ (void)__bam_free(dbp, h); + (void)__BT_TLPUT(dbp, lock); + ++t->lstat.bt_freed; /* Adjust the cursors. */ __bam_ca_move(dbp, t, h->pgno, PGNO_ROOT); - - (void)__BT_TLPUT(dbp, lock); } /* Release the top page in the subtree. */ diff --git a/db2/btree/bt_open.c b/db2/btree/bt_open.c index 354888c6c2..2361f69a3e 100644 --- a/db2/btree/bt_open.c +++ b/db2/btree/bt_open.c @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_open.c 10.20 (Sleepycat) 8/19/97"; +static const char sccsid[] = "@(#)bt_open.c 10.21 (Sleepycat) 10/25/97"; #endif /* not lint */ /* @@ -95,7 +95,7 @@ __bam_open(dbp, type, dbinfo) int ret; /* Allocate the btree internal structure. */ - if ((t = (BTREE *)calloc(1, sizeof(BTREE))) == NULL) + if ((t = (BTREE *)__db_calloc(1, sizeof(BTREE))) == NULL) return (ENOMEM); t->bt_sp = t->bt_csp = t->bt_stack; @@ -179,7 +179,7 @@ einval: ret = EINVAL; err: if (t != NULL) { /* If we allocated room for key/data return, discard it. */ if (t->bt_rkey.data != NULL) - free(t->bt_rkey.data); + __db_free(t->bt_rkey.data); FREE(t, sizeof(BTREE)); } @@ -201,7 +201,7 @@ __bam_bdup(orig, new) ot = orig->internal; - if ((t = (BTREE *)calloc(1, sizeof(*t))) == NULL) + if ((t = (BTREE *)__db_calloc(1, sizeof(*t))) == NULL) return (ENOMEM); /* @@ -248,7 +248,7 @@ __bam_keyalloc(t) * Recno keys are always the same size, and we don't want to have * to check for space on each return. Allocate it now. */ - if ((t->bt_rkey.data = (void *)malloc(sizeof(db_recno_t))) == NULL) + if ((t->bt_rkey.data = (void *)__db_malloc(sizeof(db_recno_t))) == NULL) return (ENOMEM); t->bt_rkey.ulen = sizeof(db_recno_t); return (0); diff --git a/db2/btree/bt_put.c b/db2/btree/bt_put.c index af09f76d41..b3d775bb0f 100644 --- a/db2/btree/bt_put.c +++ b/db2/btree/bt_put.c @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_put.c 10.25 (Sleepycat) 9/17/97"; +static const char sccsid[] = "@(#)bt_put.c 10.31 (Sleepycat) 10/26/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -66,7 +66,10 @@ static const char sccsid[] = "@(#)bt_put.c 10.25 (Sleepycat) 9/17/97"; static int __bam_fixed __P((BTREE *, DBT *)); static int __bam_lookup __P((DB *, DBT *, int *)); static int __bam_ndup __P((DB *, PAGE *, u_int32_t)); -static int __bam_partial __P((DB *, DBT *, PAGE *, u_int32_t)); +static int __bam_ovput __P((DB *, PAGE *, u_int32_t, DBT *)); +static int __bam_partial __P((DB *, DBT *, PAGE *, u_int32_t, u_int32_t)); +static u_int32_t + __bam_partsize __P((DB *, DBT *, PAGE *, u_int32_t)); /* * __bam_put -- @@ -334,21 +337,6 @@ slow: return (__bam_search(dbp, key, S_INSERT, 1, NULL, exactp)); } /* - * OVPUT -- - * Copy an overflow item onto a page. - */ -#undef OVPUT -#define OVPUT(h, indx, bo) do { \ - DBT __hdr; \ - memset(&__hdr, 0, sizeof(__hdr)); \ - __hdr.data = &bo; \ - __hdr.size = BOVERFLOW_SIZE; \ - if ((ret = __db_pitem(dbp, \ - h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0) \ - return (ret); \ -} while (0) - -/* * __bam_iitem -- * Insert an item into the tree. * @@ -365,19 +353,18 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) { BTREE *t; BKEYDATA *bk; - BOVERFLOW kbo, dbo; DBT tdbt; PAGE *h; db_indx_t indx; - u_int32_t have_bytes, need_bytes, needed; - int bigkey, bigdata, dcopy, dupadjust, ret; + u_int32_t data_size, have_bytes, need_bytes, needed; + int bigkey, bigdata, dupadjust, replace, ret; t = dbp->internal; h = *hp; indx = *indxp; - dupadjust = 0; bk = NULL; /* XXX: Shut the compiler up. */ + dupadjust = replace = 0; /* * If it's a page of duplicates, call the common code to do the work. @@ -385,7 +372,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) * !!! * Here's where the hp and indxp are important. The duplicate code * may decide to rework/rearrange the pages and indices we're using, - * so the caller must understand that the stack has to change. + * so the caller must understand that the page stack may change. */ if (TYPE(h) == P_DUPLICATE) { /* Adjust the index for the new item if it's a DB_AFTER op. */ @@ -401,24 +388,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) return (__db_dput(dbp, data, hp, indxp, __bam_new)); } - /* - * XXX - * Handle partial puts. - * - * This is truly awful from a performance standput. We don't optimize - * for partial puts at all, we delete the record and add it back in, - * regardless of size or if we're simply overwriting current data. - * The hash access method does this a lot better than we do, and we're - * eventually going to have to fix it. - */ - if (F_ISSET(data, DB_DBT_PARTIAL)) { - tdbt = *data; - if ((ret = __bam_partial(dbp, &tdbt, h, indx)) != 0) - return (ret); - data = &tdbt; - } - - /* If it's a short fixed-length record, fix it up. */ + /* Handle fixed-length records: build the real record. */ if (F_ISSET(dbp, DB_RE_FIXEDLEN) && data->size != t->bt_recno->re_len) { tdbt = *data; if ((ret = __bam_fixed(t, &tdbt)) != 0) @@ -427,30 +397,15 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) } /* - * If the key or data item won't fit on a page, store it in the - * overflow pages. - * - * !!! - * From this point on, we have to recover the allocated overflow - * pages on error. + * Figure out how much space the data will take, including if it's a + * partial record. If either of the key or data items won't fit on + * a page, we'll have to store them on overflow pages. */ - bigkey = bigdata = 0; - if (LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize) { - B_TSET(kbo.type, B_OVERFLOW, 0); - kbo.tlen = key->size; - if ((ret = __db_poff(dbp, key, &kbo.pgno, __bam_new)) != 0) - goto err; - bigkey = 1; - } - if (data->size > t->bt_ovflsize) { - B_TSET(dbo.type, B_OVERFLOW, 0); - dbo.tlen = data->size; - if ((ret = __db_poff(dbp, data, &dbo.pgno, __bam_new)) != 0) - goto err; - bigdata = 1; - } + bigkey = LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize; + data_size = F_ISSET(data, DB_DBT_PARTIAL) ? + __bam_partsize(dbp, data, h, indx) : data->size; + bigdata = data_size > t->bt_ovflsize; - dcopy = 0; needed = 0; if (LF_ISSET(BI_NEWKEY)) { /* If BI_NEWKEY is set we're adding a new key and data pair. */ @@ -461,7 +416,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) if (bigdata) needed += BOVERFLOW_PSIZE; else - needed += BKEYDATA_PSIZE(data->size); + needed += BKEYDATA_PSIZE(data_size); } else { /* * We're either overwriting the data item of a key/data pair @@ -482,16 +437,8 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) if (bigdata) need_bytes += BOVERFLOW_PSIZE; else - need_bytes += BKEYDATA_PSIZE(data->size); + need_bytes += BKEYDATA_PSIZE(data_size); - /* - * If we're overwriting a data item, we copy it if it's not a - * special record type and it's the same size (including any - * alignment) and do a delete/insert otherwise. - */ - if (op == DB_CURRENT && !bigdata && - B_TYPE(bk->type) == B_KEYDATA && have_bytes == need_bytes) - dcopy = 1; if (have_bytes < need_bytes) needed += need_bytes - have_bytes; } @@ -505,9 +452,15 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) * check in the btree split code, so we don't undo it there!?!? */ if (P_FREESPACE(h) < needed || - (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey)) { - ret = DB_NEEDSPLIT; - goto err; + (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey)) + return (DB_NEEDSPLIT); + + /* Handle partial puts: build the real record. */ + if (F_ISSET(data, DB_DBT_PARTIAL)) { + tdbt = *data; + if ((ret = __bam_partial(dbp, &tdbt, h, indx, data_size)) != 0) + return (ret); + data = &tdbt; } /* @@ -515,10 +468,10 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) * * 1. Append a new key/data pair. * 2. Insert a new key/data pair. - * 3. Copy the data item. - * 4. Delete/insert the data item. - * 5. Append a new data item. - * 6. Insert a new data item. + * 3. Append a new data item (a new duplicate). + * 4. Insert a new data item (a new duplicate). + * 5. Overflow item: delete and re-add the data item. + * 6. Replace the data item. */ if (LF_ISSET(BI_NEWKEY)) { switch (op) { @@ -533,42 +486,17 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) } /* Add the key. */ - if (bigkey) - OVPUT(h, indx, kbo); - else { - DBT __data; - memset(&__data, 0, sizeof(__data)); - __data.data = key->data; - __data.size = key->size; + if (bigkey) { + if ((ret = __bam_ovput(dbp, h, indx, key)) != 0) + return (ret); + } else if ((ret = __db_pitem(dbp, h, indx, - BKEYDATA_SIZE(key->size), NULL, &__data)) != 0) - goto err; - } + BKEYDATA_SIZE(key->size), NULL, key)) != 0) + return (ret); ++indx; } else { switch (op) { - case DB_CURRENT: /* 3. Copy the data item. */ - /* - * If we're not logging and it's possible, overwrite - * the current item. - * - * XXX - * We should add a separate logging message so that - * we can do this anytime it's possible, including - * for partial record puts. - */ - if (dcopy && !DB_LOGGING(dbp)) { - bk->len = data->size; - memcpy(bk->data, data->data, data->size); - goto done; - } - /* 4. Delete/insert the data item. */ - if (TYPE(h) == P_LBTREE) - ++indx; - if ((ret = __bam_ditem(dbp, h, indx)) != 0) - goto err; - break; - case DB_AFTER: /* 5. Append a new data item. */ + case DB_AFTER: /* 3. Append a new data item. */ if (TYPE(h) == P_LBTREE) { /* * Adjust the cursor and copy in the key for @@ -576,7 +504,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) */ if ((ret = __bam_adjindx(dbp, h, indx + P_INDX, indx, 1)) != 0) - goto err; + return (ret); indx += 3; dupadjust = 1; @@ -589,7 +517,7 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) *indxp += 1; } break; - case DB_BEFORE: /* 6. Insert a new data item. */ + case DB_BEFORE: /* 4. Insert a new data item. */ if (TYPE(h) == P_LBTREE) { /* * Adjust the cursor and copy in the key for @@ -597,43 +525,62 @@ __bam_iitem(dbp, hp, indxp, key, data, op, flags) */ if ((ret = __bam_adjindx(dbp, h, indx, indx, 1)) != 0) - goto err; + return (ret); ++indx; dupadjust = 1; } else __bam_ca_di(dbp, h->pgno, indx, 1); break; + case DB_CURRENT: + if (TYPE(h) == P_LBTREE) + ++indx; + + /* + * 5. Delete/re-add the data item. + * + * If we're dealing with offpage items, we have to + * delete and then re-add the item. + */ + if (bigdata || B_TYPE(bk->type) == B_OVERFLOW) { + if ((ret = __bam_ditem(dbp, h, indx)) != 0) + return (ret); + break; + } + + /* 6. Replace the data item. */ + replace = 1; + break; default: abort(); } } /* Add the data. */ - if (bigdata) - OVPUT(h, indx, dbo); - else { + if (bigdata) { + if ((ret = __bam_ovput(dbp, h, indx, data)) != 0) + return (ret); + } else { BKEYDATA __bk; - DBT __hdr, __data; - memset(&__data, 0, sizeof(__data)); - __data.data = data->data; - __data.size = data->size; + DBT __hdr; if (LF_ISSET(BI_DELETED)) { B_TSET(__bk.type, B_KEYDATA, 1); - __bk.len = __data.size; + __bk.len = data->size; __hdr.data = &__bk; __hdr.size = SSZA(BKEYDATA, data); ret = __db_pitem(dbp, h, indx, - BKEYDATA_SIZE(__data.size), &__hdr, &__data); - } else + BKEYDATA_SIZE(data->size), &__hdr, data); + } else if (replace) + ret = __bam_ritem(dbp, h, indx, data); + else ret = __db_pitem(dbp, h, indx, - BKEYDATA_SIZE(data->size), NULL, &__data); + BKEYDATA_SIZE(data->size), NULL, data); if (ret != 0) - goto err; + return (ret); } -done: ++t->lstat.bt_added; + ++t->lstat.bt_added; ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY); @@ -645,22 +592,206 @@ done: ++t->lstat.bt_added; if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) { --indx; if ((ret = __bam_ndup(dbp, h, indx)) != 0) - goto err; + return (ret); } if (t->bt_recno != NULL) F_SET(t->bt_recno, RECNO_MODIFIED); - if (0) { -err: if (bigkey) - (void)__db_doff(dbp, kbo.pgno, __bam_free); - if (bigdata) - (void)__db_doff(dbp, dbo.pgno, __bam_free); - } return (ret); } /* + * __bam_partsize -- + * Figure out how much space a partial data item is in total. + */ +static u_int32_t +__bam_partsize(dbp, data, h, indx) + DB *dbp; + DBT *data; + PAGE *h; + u_int32_t indx; +{ + BKEYDATA *bk; + u_int32_t nbytes; + + /* + * Figure out how much total space we'll need. If the record doesn't + * already exist, it's simply the data we're provided. + */ + if (indx >= NUM_ENT(h)) + return (data->doff + data->size); + + /* + * Otherwise, it's the data provided plus any already existing data + * that we're not replacing. + */ + bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); + nbytes = + B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len; + + /* + * There are really two cases here: + * + * Case 1: We are replacing some bytes that do not exist (i.e., they + * are past the end of the record). In this case the number of bytes + * we are replacing is irrelevant and all we care about is how many + * bytes we are going to add from offset. So, the new record length + * is going to be the size of the new bytes (size) plus wherever those + * new bytes begin (doff). + * + * Case 2: All the bytes we are replacing exist. Therefore, the new + * size is the oldsize (nbytes) minus the bytes we are replacing (dlen) + * plus the bytes we are adding (size). + */ + if (nbytes < data->doff + data->dlen) /* Case 1 */ + return (data->doff + data->size); + + return (nbytes + data->size - data->dlen); /* Case 2 */ +} + +/* + * OVPUT -- + * Copy an overflow item onto a page. + */ +#undef OVPUT +#define OVPUT(h, indx, bo) do { \ + DBT __hdr; \ + memset(&__hdr, 0, sizeof(__hdr)); \ + __hdr.data = &bo; \ + __hdr.size = BOVERFLOW_SIZE; \ + if ((ret = __db_pitem(dbp, \ + h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0) \ + return (ret); \ +} while (0) + +/* + * __bam_ovput -- + * Build an overflow item and put it on the page. + */ +static int +__bam_ovput(dbp, h, indx, item) + DB *dbp; + PAGE *h; + u_int32_t indx; + DBT *item; +{ + BOVERFLOW bo; + int ret; + + B_TSET(bo.type, B_OVERFLOW, 0); + bo.tlen = item->size; + if ((ret = __db_poff(dbp, item, &bo.pgno, __bam_new)) != 0) + return (ret); + + OVPUT(h, indx, bo); + + return (0); +} + +/* + * __bam_ritem -- + * Replace an item on a page. + * + * PUBLIC: int __bam_ritem __P((DB *, PAGE *, u_int32_t, DBT *)); + */ +int +__bam_ritem(dbp, h, indx, data) + DB *dbp; + PAGE *h; + u_int32_t indx; + DBT *data; +{ + BKEYDATA *bk; + DBT orig, repl; + db_indx_t lo, ln, min, off, prefix, suffix; + int32_t nbytes; + int cnt, ret; + u_int8_t *p, *t; + + /* + * Replace a single item onto a page. The logic figuring out where + * to insert and whether it fits is handled in the caller. All we do + * here is manage the page shuffling. + */ + bk = GET_BKEYDATA(h, indx); + + /* Log the change. */ + if (DB_LOGGING(dbp)) { + /* + * We might as well check to see if the two data items share + * a common prefix and suffix -- it can save us a lot of log + * message if they're large. + */ + min = data->size < bk->len ? data->size : bk->len; + for (prefix = 0, + p = bk->data, t = data->data; + prefix < min && *p == *t; ++prefix, ++p, ++t) + ; + + min -= prefix; + for (suffix = 0, + p = (u_int8_t *)bk->data + bk->len - 1, + t = (u_int8_t *)data->data + data->size - 1; + suffix < min && *p == *t; ++suffix, --p, --t) + ; + + /* We only log the parts of the keys that have changed. */ + orig.data = (u_int8_t *)bk->data + prefix; + orig.size = bk->len - (prefix + suffix); + repl.data = (u_int8_t *)data->data + prefix; + repl.size = data->size - (prefix + suffix); + if ((ret = __bam_repl_log(dbp->dbenv->lg_info, dbp->txn, + &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h), + (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type), + &orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0) + return (ret); + } + + /* + * Set references to the first in-use byte on the page and the + * first byte of the item being replaced. + */ + p = (u_int8_t *)h + HOFFSET(h); + t = (u_int8_t *)bk; + + /* + * If the entry is growing in size, shift the beginning of the data + * part of the page down. If the entry is shrinking in size, shift + * the beginning of the data part of the page up. Use memmove(3), + * the regions overlap. + */ + lo = BKEYDATA_SIZE(bk->len); + ln = BKEYDATA_SIZE(data->size); + if (lo != ln) { + nbytes = lo - ln; /* Signed difference. */ + if (p == t) /* First index is fast. */ + h->inp[indx] += nbytes; + else { /* Else, shift the page. */ + memmove(p + nbytes, p, t - p); + + /* Adjust the indices' offsets. */ + off = h->inp[indx]; + for (cnt = 0; cnt < NUM_ENT(h); ++cnt) + if (h->inp[cnt] <= off) + h->inp[cnt] += nbytes; + } + + /* Clean up the page and adjust the item's reference. */ + HOFFSET(h) += nbytes; + t += nbytes; + } + + /* Copy the new item onto the page. */ + bk = (BKEYDATA *)t; + B_TSET(bk->type, B_KEYDATA, 0); + bk->len = data->size; + memcpy(bk->data, data->data, data->size); + + return (0); +} + +/* * __bam_ndup -- * Check to see if the duplicate set at indx should have its own page. * If it should, create it. @@ -766,16 +897,21 @@ __bam_fixed(t, dbt) rp = t->bt_recno; /* - * If using fixed-length records, and the record is long, return - * EINVAL. If it's short, pad it out. Use the record data return - * memory, it's only short-term. + * If database contains fixed-length records, and the record is long, + * return EINVAL. */ if (dbt->size > rp->re_len) return (EINVAL); + + /* + * The caller checked to see if it was just right, so we know it's + * short. Pad it out. We use the record data return memory, it's + * only a short-term use. + */ if (t->bt_rdata.ulen < rp->re_len) { t->bt_rdata.data = t->bt_rdata.data == NULL ? - (void *)malloc(rp->re_len) : - (void *)realloc(t->bt_rdata.data, rp->re_len); + (void *)__db_malloc(rp->re_len) : + (void *)__db_realloc(t->bt_rdata.data, rp->re_len); if (t->bt_rdata.data == NULL) { t->bt_rdata.ulen = 0; return (ENOMEM); @@ -786,12 +922,16 @@ __bam_fixed(t, dbt) memset((u_int8_t *)t->bt_rdata.data + dbt->size, rp->re_pad, rp->re_len - dbt->size); - /* Set the DBT to reference our new record. */ + /* + * Clean up our flags and other information just in case, and + * change the caller's DBT to reference our created record. + */ t->bt_rdata.size = rp->re_len; t->bt_rdata.dlen = 0; t->bt_rdata.doff = 0; t->bt_rdata.flags = 0; *dbt = t->bt_rdata; + return (0); } @@ -800,47 +940,28 @@ __bam_fixed(t, dbt) * Build the real record for a partial put. */ static int -__bam_partial(dbp, dbt, h, indx) +__bam_partial(dbp, dbt, h, indx, nbytes) DB *dbp; DBT *dbt; PAGE *h; - u_int32_t indx; + u_int32_t indx, nbytes; { BTREE *t; BKEYDATA *bk, tbk; BOVERFLOW *bo; DBT copy; - u_int32_t len, nbytes, tlen; + u_int32_t len, tlen; int ret; u_int8_t *p; bo = NULL; /* XXX: Shut the compiler up. */ t = dbp->internal; - /* - * Figure out how much total space we'll need. Worst case is where - * the record is 0 bytes long, in which case doff causes the record - * to extend, and the put data is appended to it. - */ - if (indx < NUM_ENT(h)) { - bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); - if (B_TYPE(bk->type) == B_OVERFLOW) { - bo = (BOVERFLOW *)bk; - nbytes = bo->tlen; - } else - nbytes = bk->len; - } else { - bk = &tbk; - B_TSET(bk->type, B_KEYDATA, 0); - nbytes = bk->len = 0; - } - nbytes += dbt->doff + dbt->size + dbt->dlen; - - /* Allocate the space. */ + /* We use the record data return memory, it's only a short-term use. */ if (t->bt_rdata.ulen < nbytes) { t->bt_rdata.data = t->bt_rdata.data == NULL ? - (void *)malloc(nbytes) : - (void *)realloc(t->bt_rdata.data, nbytes); + (void *)__db_malloc(nbytes) : + (void *)__db_realloc(t->bt_rdata.data, nbytes); if (t->bt_rdata.data == NULL) { t->bt_rdata.ulen = 0; return (ENOMEM); @@ -848,6 +969,16 @@ __bam_partial(dbp, dbt, h, indx) t->bt_rdata.ulen = nbytes; } + /* Find the current record. */ + if (indx < NUM_ENT(h)) { + bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); + bo = (BOVERFLOW *)bk; + } else { + bk = &tbk; + B_TSET(bk->type, B_KEYDATA, 0); + bk->len = 0; + } + /* We use nul bytes for extending the record, get it over with. */ memset(t->bt_rdata.data, 0, nbytes); diff --git a/db2/btree/bt_rec.c b/db2/btree/bt_rec.c index 9aeb395f27..c0b7c8ae4c 100644 --- a/db2/btree/bt_rec.c +++ b/db2/btree/bt_rec.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_rec.c 10.14 (Sleepycat) 9/6/97"; +static const char sccsid[] = "@(#)bt_rec.c 10.17 (Sleepycat) 11/2/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -298,8 +298,8 @@ __bam_split_recover(logp, dbtp, lsnp, redo, info) goto done; /* Allocate and initialize new left/right child pages. */ - if ((_lp = (PAGE *)malloc(file_dbp->pgsize)) == NULL || - (_rp = (PAGE *)malloc(file_dbp->pgsize)) == NULL) { + if ((_lp = (PAGE *)__db_malloc(file_dbp->pgsize)) == NULL || + (_rp = (PAGE *)__db_malloc(file_dbp->pgsize)) == NULL) { ret = ENOMEM; __db_err(file_dbp->dbenv, "%s", strerror(ret)); goto out; @@ -490,9 +490,9 @@ out: /* Free any pages that weren't dirtied. */ /* Free any allocated space. */ if (_lp != NULL) - free(_lp); + __db_free(_lp); if (_rp != NULL) - free(_rp); + __db_free(_rp); REC_CLOSE; } @@ -541,7 +541,8 @@ __bam_rsplit_recover(logp, dbtp, lsnp, redo, info) } else if (cmp_n == 0 && !redo) { /* Need to undo update described. */ P_INIT(pagep, file_dbp->pgsize, PGNO_ROOT, - PGNO_INVALID, PGNO_INVALID, pagep->level + 1, TYPE(pagep)); + argp->nrec, PGNO_INVALID, pagep->level + 1, + file_dbp->type == DB_BTREE ? P_IBTREE : P_IRECNO); if ((ret = __db_pitem(file_dbp, pagep, 0, argp->rootent.size, &argp->rootent, NULL)) != 0) goto out; @@ -764,3 +765,106 @@ __bam_cdel_recover(logp, dbtp, lsnp, redo, info) out: REC_CLOSE; } + +/* + * __bam_repl_recover -- + * Recovery function for page item replacement. + * + * PUBLIC: int __bam_repl_recover + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__bam_repl_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __bam_repl_args *argp; + BKEYDATA *bk; + DB *file_dbp, *mdbp; + DBT dbt; + DB_MPOOLFILE *mpf; + PAGE *pagep; + int cmp_n, cmp_p, modified, ret; + u_int8_t *p; + + REC_PRINT(__bam_repl_print); + REC_INTRO(__bam_repl_read); + + if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) { + (void)__db_pgerr(file_dbp, argp->pgno); + pagep = NULL; + goto out; + } + bk = GET_BKEYDATA(pagep, argp->indx); + + modified = 0; + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->lsn); + if (cmp_p == 0 && redo) { + /* + * Need to redo update described. + * + * Re-build the replacement item. + */ + memset(&dbt, 0, sizeof(dbt)); + dbt.size = argp->prefix + argp->suffix + argp->repl.size; + if ((dbt.data = __db_malloc(dbt.size)) == NULL) { + ret = ENOMEM; + goto err; + } + p = dbt.data; + memcpy(p, bk->data, argp->prefix); + p += argp->prefix; + memcpy(p, argp->repl.data, argp->repl.size); + p += argp->repl.size; + memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix); + + ret = __bam_ritem(file_dbp, pagep, argp->indx, &dbt); + __db_free(dbt.data); + if (ret != 0) + goto err; + + LSN(pagep) = *lsnp; + modified = 1; + } else if (cmp_n == 0 && !redo) { + /* + * Need to undo update described. + * + * Re-build the original item. + */ + memset(&dbt, 0, sizeof(dbt)); + dbt.size = argp->prefix + argp->suffix + argp->orig.size; + if ((dbt.data = __db_malloc(dbt.size)) == NULL) { + ret = ENOMEM; + goto err; + } + p = dbt.data; + memcpy(p, bk->data, argp->prefix); + p += argp->prefix; + memcpy(p, argp->orig.data, argp->orig.size); + p += argp->orig.size; + memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix); + + ret = __bam_ritem(file_dbp, pagep, argp->indx, &dbt); + __db_free(dbt.data); + if (ret != 0) + goto err; + + /* Reset the deleted flag, if necessary. */ + if (argp->isdeleted) + B_DSET(GET_BKEYDATA(pagep, argp->indx)->type); + + LSN(pagep) = argp->lsn; + modified = 1; + } + if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0) + *lsnp = argp->prev_lsn; + + if (0) { +err: (void)memp_fput(mpf, pagep, 0); + } +out: REC_CLOSE; +} diff --git a/db2/btree/bt_recno.c b/db2/btree/bt_recno.c index f7c5cffdc6..5e1cbc426c 100644 --- a/db2/btree/bt_recno.c +++ b/db2/btree/bt_recno.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_recno.c 10.19 (Sleepycat) 9/20/97"; +static const char sccsid[] = "@(#)bt_recno.c 10.22 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -75,7 +75,7 @@ __ram_open(dbp, type, dbinfo) ret = 0; /* Allocate and initialize the private RECNO structure. */ - if ((rp = (RECNO *)calloc(1, sizeof(*rp))) == NULL) + if ((rp = (RECNO *)__db_calloc(1, sizeof(*rp))) == NULL) return (ENOMEM); if (dbinfo != NULL) { @@ -140,7 +140,7 @@ __ram_open(dbp, type, dbinfo) err: /* If we mmap'd a source file, discard it. */ if (rp->re_smap != NULL) - (void)__db_munmap(rp->re_smap, rp->re_msize); + (void)__db_unmap(rp->re_smap, rp->re_msize); /* If we opened a source file, discard it. */ if (rp->re_fd != -1) @@ -151,7 +151,7 @@ err: /* If we mmap'd a source file, discard it. */ /* If we allocated room for key/data return, discard it. */ t = dbp->internal; if (t != NULL && t->bt_rkey.data != NULL) - free(t->bt_rkey.data); + __db_free(t->bt_rkey.data); FREE(rp, sizeof(*rp)); @@ -175,10 +175,10 @@ __ram_cursor(dbp, txn, dbcp) DEBUG_LWRITE(dbp, txn, "ram_cursor", NULL, NULL, 0); - if ((dbc = (DBC *)calloc(1, sizeof(DBC))) == NULL) + if ((dbc = (DBC *)__db_calloc(1, sizeof(DBC))) == NULL) return (ENOMEM); - if ((cp = (RCURSOR *)calloc(1, sizeof(RCURSOR))) == NULL) { - free(dbc); + if ((cp = (RCURSOR *)__db_calloc(1, sizeof(RCURSOR))) == NULL) { + __db_free(dbc); return (ENOMEM); } @@ -359,7 +359,7 @@ __ram_close(argdbp) /* Close any underlying mmap region. */ if (rp->re_smap != NULL) - (void)__db_munmap(rp->re_smap, rp->re_msize); + (void)__db_unmap(rp->re_smap, rp->re_msize); /* Close any backing source file descriptor. */ if (rp->re_fd != -1) @@ -814,8 +814,8 @@ __ram_update(dbp, recno, can_create) if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { if (t->bt_rdata.ulen < rp->re_len) { t->bt_rdata.data = t->bt_rdata.data == NULL ? - (void *)malloc(rp->re_len) : - (void *)realloc(t->bt_rdata.data, rp->re_len); + (void *)__db_malloc(rp->re_len) : + (void *)__db_realloc(t->bt_rdata.data, rp->re_len); if (t->bt_rdata.data == NULL) { t->bt_rdata.ulen = 0; return (ENOMEM); @@ -853,7 +853,7 @@ __ram_source(dbp, rp, fname) oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0; if ((ret = - __db_fdopen(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) { + __db_open(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) { __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret)); goto err; } @@ -866,15 +866,16 @@ __ram_source(dbp, rp, fname) * compiler will perpetrate, doing the comparison in a portable way is * flatly impossible. Hope that mmap fails if the file is too large. */ - if ((ret = - __db_stat(dbp->dbenv, rp->re_source, rp->re_fd, &size, NULL)) != 0) + if ((ret = __db_ioinfo(rp->re_source, rp->re_fd, &size, NULL)) != 0) { + __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret)); goto err; + } if (size == 0) { F_SET(rp, RECNO_EOF); return (0); } - if ((ret = __db_mmap(rp->re_fd, (size_t)size, 1, 1, &rp->re_smap)) != 0) + if ((ret = __db_map(rp->re_fd, (size_t)size, 1, 1, &rp->re_smap)) != 0) goto err; rp->re_cmap = rp->re_smap; rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size); @@ -940,7 +941,7 @@ __ram_writeback(dbp) * open will fail. */ if (rp->re_smap != NULL) { - (void)__db_munmap(rp->re_smap, rp->re_msize); + (void)__db_unmap(rp->re_smap, rp->re_msize); rp->re_smap = NULL; } @@ -951,7 +952,7 @@ __ram_writeback(dbp) } /* Open the file, truncating it. */ - if ((ret = __db_fdopen(rp->re_source, + if ((ret = __db_open(rp->re_source, DB_SEQUENTIAL | DB_TRUNCATE, DB_SEQUENTIAL | DB_TRUNCATE, 0, &fd)) != 0) { __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret)); @@ -974,7 +975,7 @@ __ram_writeback(dbp) */ delim = rp->re_delim; if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { - if ((pad = malloc(rp->re_len)) == NULL) { + if ((pad = (u_int8_t *)__db_malloc(rp->re_len)) == NULL) { ret = ENOMEM; goto err; } @@ -1051,8 +1052,8 @@ __ram_fmap(dbp, top) rp = t->bt_recno; if (t->bt_rdata.ulen < rp->re_len) { t->bt_rdata.data = t->bt_rdata.data == NULL ? - (void *)malloc(rp->re_len) : - (void *)realloc(t->bt_rdata.data, rp->re_len); + (void *)__db_malloc(rp->re_len) : + (void *)__db_realloc(t->bt_rdata.data, rp->re_len); if (t->bt_rdata.data == NULL) { t->bt_rdata.ulen = 0; return (ENOMEM); diff --git a/db2/btree/bt_search.c b/db2/btree/bt_search.c index fa3e018313..a21a8208bc 100644 --- a/db2/btree/bt_search.c +++ b/db2/btree/bt_search.c @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_search.c 10.7 (Sleepycat) 9/3/97"; +static const char sccsid[] = "@(#)bt_search.c 10.8 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -323,7 +323,7 @@ __bam_stkgrow(t) entries = t->bt_esp - t->bt_sp; - if ((p = (EPG *)calloc(entries * 2, sizeof(EPG))) == NULL) + if ((p = (EPG *)__db_calloc(entries * 2, sizeof(EPG))) == NULL) return (ENOMEM); memcpy(p, t->bt_sp, entries * sizeof(EPG)); if (t->bt_sp != t->bt_stack) diff --git a/db2/btree/bt_split.c b/db2/btree/bt_split.c index 25cfacc4d0..bc09131b00 100644 --- a/db2/btree/bt_split.c +++ b/db2/btree/bt_split.c @@ -44,7 +44,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_split.c 10.14 (Sleepycat) 9/3/97"; +static const char sccsid[] = "@(#)bt_split.c 10.17 (Sleepycat) 11/2/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -255,7 +255,7 @@ __bam_page(dbp, pp, cp) cp->page->level, TYPE(cp->page)); /* Create new left page for the split. */ - if ((lp = (PAGE *)malloc(dbp->pgsize)) == NULL) { + if ((lp = (PAGE *)__db_malloc(dbp->pgsize)) == NULL) { ret = ENOMEM; goto err; } @@ -389,6 +389,9 @@ __bam_broot(dbp, rootp, lp, rp) P_INIT(rootp, dbp->pgsize, PGNO_ROOT, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IBTREE); + memset(&data, 0, sizeof(data)); + memset(&hdr, 0, sizeof(hdr)); + /* * The btree comparison code guarantees that the left-most key on any * level of the tree is never used, so it doesn't need to be filled in. @@ -399,15 +402,12 @@ __bam_broot(dbp, rootp, lp, rp) if (F_ISSET(dbp, DB_BT_RECNUM)) { bi.nrecs = __bam_total(lp); RE_NREC_SET(rootp, bi.nrecs); - } - memset(&hdr, 0, sizeof(hdr)); + } else + bi.nrecs = 0; hdr.data = &bi; hdr.size = SSZA(BINTERNAL, data); - memset(&data, 0, sizeof(data)); - data.data = (char *)""; - data.size = 0; if ((ret = - __db_pitem(dbp, rootp, 0, BINTERNAL_SIZE(0), &hdr, &data)) != 0) + __db_pitem(dbp, rootp, 0, BINTERNAL_SIZE(0), &hdr, NULL)) != 0) return (ret); switch (TYPE(rp)) { @@ -431,9 +431,10 @@ __bam_broot(dbp, rootp, lp, rp) return (ret); /* Increment the overflow ref count. */ - if (B_TYPE(child_bi->type) == B_OVERFLOW && (ret = - __db_ioff(dbp, ((BOVERFLOW *)(child_bi->data))->pgno)) != 0) - return (ret); + if (B_TYPE(child_bi->type) == B_OVERFLOW) + if ((ret = __db_ovref(dbp, + ((BOVERFLOW *)(child_bi->data))->pgno, 1)) != 0) + return (ret); break; case P_LBTREE: /* Copy the first key of the child page onto the root page. */ @@ -473,9 +474,10 @@ __bam_broot(dbp, rootp, lp, rp) return (ret); /* Increment the overflow ref count. */ - if (B_TYPE(child_bk->type) == B_OVERFLOW && (ret = - __db_ioff(dbp, ((BOVERFLOW *)child_bk)->pgno)) != 0) - return (ret); + if (B_TYPE(child_bk->type) == B_OVERFLOW) + if ((ret = __db_ovref(dbp, + ((BOVERFLOW *)child_bk)->pgno, 1)) != 0) + return (ret); break; default: return (__db_pgfmt(dbp, rp->pgno)); @@ -604,9 +606,10 @@ __bam_pinsert(dbp, parent, lchild, rchild) return (ret); /* Increment the overflow ref count. */ - if (B_TYPE(child_bi->type) == B_OVERFLOW && (ret = - __db_ioff(dbp, ((BOVERFLOW *)(child_bi->data))->pgno)) != 0) - return (ret); + if (B_TYPE(child_bi->type) == B_OVERFLOW) + if ((ret = __db_ovref(dbp, + ((BOVERFLOW *)(child_bi->data))->pgno, 1)) != 0) + return (ret); break; case P_LBTREE: child_bk = GET_BKEYDATA(rchild, 0); @@ -673,9 +676,10 @@ noprefix: nksize = child_bk->len; return (ret); /* Increment the overflow ref count. */ - if (B_TYPE(child_bk->type) == B_OVERFLOW && (ret = - __db_ioff(dbp, ((BOVERFLOW *)child_bk)->pgno)) != 0) - return (ret); + if (B_TYPE(child_bk->type) == B_OVERFLOW) + if ((ret = __db_ovref(dbp, + ((BOVERFLOW *)child_bk)->pgno, 1)) != 0) + return (ret); break; default: return (__db_pgfmt(dbp, rchild->pgno)); diff --git a/db2/btree/bt_stat.c b/db2/btree/bt_stat.c index ab3bc4c431..e88b5dac2d 100644 --- a/db2/btree/bt_stat.c +++ b/db2/btree/bt_stat.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)bt_stat.c 10.12 (Sleepycat) 9/3/97"; +static const char sccsid[] = "@(#)bt_stat.c 10.14 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -61,7 +61,7 @@ __bam_stat(argdbp, spp, db_malloc, flags) /* Allocate and clear the structure. */ if ((sp = db_malloc == NULL ? - (DB_BTREE_STAT *)malloc(sizeof(*sp)) : + (DB_BTREE_STAT *)__db_malloc(sizeof(*sp)) : (DB_BTREE_STAT *)db_malloc(sizeof(*sp))) == NULL) { ret = ENOMEM; goto err; @@ -100,14 +100,13 @@ __bam_stat(argdbp, spp, db_malloc, flags) if (F_ISSET(meta, BTM_RENUMBER)) sp->bt_flags |= DB_RENUMBER; - /* - * Get the maxkey, minkey, re_len and re_pad fields from the - * metadata. - */ + /* Get the remaining metadata fields. */ sp->bt_minkey = meta->minkey; sp->bt_maxkey = meta->maxkey; sp->bt_re_len = meta->re_len; sp->bt_re_pad = meta->re_pad; + sp->bt_magic = meta->magic; + sp->bt_version = meta->version; /* Get the page size from the DB. */ sp->bt_pagesize = dbp->pgsize; diff --git a/db2/btree/btree.src b/db2/btree/btree.src index 7c8c4b125f..6145696d28 100644 --- a/db2/btree/btree.src +++ b/db2/btree/btree.src @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)btree.src 10.4 (Sleepycat) 8/27/97"; +static const char sccsid[] = "@(#)btree.src 10.6 (Sleepycat) 11/2/97"; #endif /* not lint */ PREFIX bam @@ -75,6 +75,7 @@ END * * pgno: the page number of the page copied over the root. * pgdbt: the page being copied on the root page. + * nrec: the tree's record count. * rootent: last entry on the root page. * rootlsn: the root page's original lsn. */ @@ -82,6 +83,7 @@ BEGIN rsplit ARG fileid u_int32_t lu ARG pgno db_pgno_t lu DBT pgdbt DBT s +ARG nrec db_pgno_t lu DBT rootent DBT s POINTER rootlsn DB_LSN * lu END @@ -135,3 +137,24 @@ ARG pgno db_pgno_t lu POINTER lsn DB_LSN * lu ARG indx u_int32_t lu END + +/* + * BTREE-repl: used to log the replacement of an item. + * + * pgno: the page modified. + * lsn: the page's original lsn. + * orig: the original data. + * new: the replacement data. + * duplicate: the prefix of the replacement that matches the original. + */ +BEGIN repl +ARG fileid u_int32_t lu +ARG pgno db_pgno_t lu +POINTER lsn DB_LSN * lu +ARG indx u_int32_t lu +ARG isdeleted u_int32_t lu +DBT orig DBT s +DBT repl DBT s +ARG prefix u_int32_t lu +ARG suffix u_int32_t lu +END diff --git a/db2/btree/btree_auto.c b/db2/btree/btree_auto.c index 353ee7bc27..45232bbc41 100644 --- a/db2/btree/btree_auto.c +++ b/db2/btree/btree_auto.c @@ -57,7 +57,7 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags, + sizeof(pgno) + sizeof(ptype) + sizeof(next); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -92,7 +92,7 @@ int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -137,7 +137,7 @@ __bam_pg_alloc_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tptype: %lu\n", (u_long)argp->ptype); printf("\tnext: %lu\n", (u_long)argp->next); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -152,7 +152,7 @@ __bam_pg_alloc_read(recbuf, argpp) __bam_pg_alloc_args *argp; u_int8_t *bp; - argp = (__bam_pg_alloc_args *)malloc(sizeof(__bam_pg_alloc_args) + + argp = (__bam_pg_alloc_args *)__db_malloc(sizeof(__bam_pg_alloc_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -219,7 +219,7 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags, + sizeof(*meta_lsn) + sizeof(u_int32_t) + (header == NULL ? 0 : header->size) + sizeof(next); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -257,7 +257,7 @@ int __bam_pg_free_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -308,7 +308,7 @@ __bam_pg_free_print(notused1, dbtp, lsnp, notused3, notused4) printf("\n"); printf("\tnext: %lu\n", (u_long)argp->next); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -323,7 +323,7 @@ __bam_pg_free_read(recbuf, argpp) __bam_pg_free_args *argp; u_int8_t *bp; - argp = (__bam_pg_free_args *)malloc(sizeof(__bam_pg_free_args) + + argp = (__bam_pg_free_args *)__db_malloc(sizeof(__bam_pg_free_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -400,7 +400,7 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags, + sizeof(npgno) + sizeof(*nlsn) + sizeof(u_int32_t) + (pg == NULL ? 0 : pg->size); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -452,7 +452,7 @@ int __bam_split_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -509,7 +509,7 @@ __bam_split_print(notused1, dbtp, lsnp, notused3, notused4) } printf("\n"); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -524,7 +524,7 @@ __bam_split_read(recbuf, argpp) __bam_split_args *argp; u_int8_t *bp; - argp = (__bam_split_args *)malloc(sizeof(__bam_split_args) + + argp = (__bam_split_args *)__db_malloc(sizeof(__bam_split_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -563,11 +563,11 @@ __bam_split_read(recbuf, argpp) /* * PUBLIC: int __bam_rsplit_log * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, db_pgno_t, DBT *, DBT *, - * PUBLIC: DB_LSN *)); + * PUBLIC: u_int32_t, db_pgno_t, DBT *, db_pgno_t, + * PUBLIC: DBT *, DB_LSN *)); */ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags, - fileid, pgno, pgdbt, rootent, rootlsn) + fileid, pgno, pgdbt, nrec, rootent, rootlsn) DB_LOG *logp; DB_TXN *txnid; DB_LSN *ret_lsnp; @@ -575,6 +575,7 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags, u_int32_t fileid; db_pgno_t pgno; DBT *pgdbt; + db_pgno_t nrec; DBT *rootent; DB_LSN * rootlsn; { @@ -597,9 +598,10 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags, + sizeof(fileid) + sizeof(pgno) + sizeof(u_int32_t) + (pgdbt == NULL ? 0 : pgdbt->size) + + sizeof(nrec) + sizeof(u_int32_t) + (rootent == NULL ? 0 : rootent->size) + sizeof(*rootlsn); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -623,6 +625,8 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags, memcpy(bp, pgdbt->data, pgdbt->size); bp += pgdbt->size; } + memcpy(bp, &nrec, sizeof(nrec)); + bp += sizeof(nrec); if (rootent == NULL) { zero = 0; memcpy(bp, &zero, sizeof(u_int32_t)); @@ -645,7 +649,7 @@ int __bam_rsplit_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -692,6 +696,7 @@ __bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4) printf("%#x ", c); } printf("\n"); + printf("\tnrec: %lu\n", (u_long)argp->nrec); printf("\trootent: "); for (i = 0; i < argp->rootent.size; i++) { c = ((char *)argp->rootent.data)[i]; @@ -704,7 +709,7 @@ __bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4) printf("\trootlsn: [%lu][%lu]\n", (u_long)argp->rootlsn.file, (u_long)argp->rootlsn.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -719,7 +724,7 @@ __bam_rsplit_read(recbuf, argpp) __bam_rsplit_args *argp; u_int8_t *bp; - argp = (__bam_rsplit_args *)malloc(sizeof(__bam_rsplit_args) + + argp = (__bam_rsplit_args *)__db_malloc(sizeof(__bam_rsplit_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -739,6 +744,8 @@ __bam_rsplit_read(recbuf, argpp) bp += sizeof(u_int32_t); argp->pgdbt.data = bp; bp += argp->pgdbt.size; + memcpy(&argp->nrec, bp, sizeof(argp->nrec)); + bp += sizeof(argp->nrec); memcpy(&argp->rootent.size, bp, sizeof(u_int32_t)); bp += sizeof(u_int32_t); argp->rootent.data = bp; @@ -789,7 +796,7 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags, + sizeof(indx) + sizeof(indx_copy) + sizeof(is_insert); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -821,7 +828,7 @@ int __bam_adj_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -865,7 +872,7 @@ __bam_adj_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tindx_copy: %lu\n", (u_long)argp->indx_copy); printf("\tis_insert: %lu\n", (u_long)argp->is_insert); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -880,7 +887,7 @@ __bam_adj_read(recbuf, argpp) __bam_adj_args *argp; u_int8_t *bp; - argp = (__bam_adj_args *)malloc(sizeof(__bam_adj_args) + + argp = (__bam_adj_args *)__db_malloc(sizeof(__bam_adj_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -948,7 +955,7 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags, + sizeof(indx) + sizeof(adjust) + sizeof(total); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -980,7 +987,7 @@ int __bam_cadjust_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -1024,7 +1031,7 @@ __bam_cadjust_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tadjust: %ld\n", (long)argp->adjust); printf("\ttotal: %ld\n", (long)argp->total); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -1039,7 +1046,7 @@ __bam_cadjust_read(recbuf, argpp) __bam_cadjust_args *argp; u_int8_t *bp; - argp = (__bam_cadjust_args *)malloc(sizeof(__bam_cadjust_args) + + argp = (__bam_cadjust_args *)__db_malloc(sizeof(__bam_cadjust_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -1102,7 +1109,7 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags, + sizeof(pgno) + sizeof(*lsn) + sizeof(indx); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -1130,7 +1137,7 @@ int __bam_cdel_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -1172,7 +1179,7 @@ __bam_cdel_print(notused1, dbtp, lsnp, notused3, notused4) (u_long)argp->lsn.file, (u_long)argp->lsn.offset); printf("\tindx: %lu\n", (u_long)argp->indx); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -1187,7 +1194,7 @@ __bam_cdel_read(recbuf, argpp) __bam_cdel_args *argp; u_int8_t *bp; - argp = (__bam_cdel_args *)malloc(sizeof(__bam_cdel_args) + + argp = (__bam_cdel_args *)__db_malloc(sizeof(__bam_cdel_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -1212,6 +1219,225 @@ __bam_cdel_read(recbuf, argpp) } /* + * PUBLIC: int __bam_repl_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, DBT *, DBT *, u_int32_t, + * PUBLIC: u_int32_t)); + */ +int __bam_repl_log(logp, txnid, ret_lsnp, flags, + fileid, pgno, lsn, indx, isdeleted, orig, + repl, prefix, suffix) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN * lsn; + u_int32_t indx; + u_int32_t isdeleted; + DBT *orig; + DBT *repl; + u_int32_t prefix; + u_int32_t suffix; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t zero; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_bam_repl; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(fileid) + + sizeof(pgno) + + sizeof(*lsn) + + sizeof(indx) + + sizeof(isdeleted) + + sizeof(u_int32_t) + (orig == NULL ? 0 : orig->size) + + sizeof(u_int32_t) + (repl == NULL ? 0 : repl->size) + + sizeof(prefix) + + sizeof(suffix); + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &pgno, sizeof(pgno)); + bp += sizeof(pgno); + if (lsn != NULL) + memcpy(bp, lsn, sizeof(*lsn)); + else + memset(bp, 0, sizeof(*lsn)); + bp += sizeof(*lsn); + memcpy(bp, &indx, sizeof(indx)); + bp += sizeof(indx); + memcpy(bp, &isdeleted, sizeof(isdeleted)); + bp += sizeof(isdeleted); + if (orig == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &orig->size, sizeof(orig->size)); + bp += sizeof(orig->size); + memcpy(bp, orig->data, orig->size); + bp += orig->size; + } + if (repl == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &repl->size, sizeof(repl->size)); + bp += sizeof(repl->size); + memcpy(bp, repl->data, repl->size); + bp += repl->size; + } + memcpy(bp, &prefix, sizeof(prefix)); + bp += sizeof(prefix); + memcpy(bp, &suffix, sizeof(suffix)); + bp += sizeof(suffix); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + __db_free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __bam_repl_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__bam_repl_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __bam_repl_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if ((ret = __bam_repl_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]bam_repl: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tlsn: [%lu][%lu]\n", + (u_long)argp->lsn.file, (u_long)argp->lsn.offset); + printf("\tindx: %lu\n", (u_long)argp->indx); + printf("\tisdeleted: %lu\n", (u_long)argp->isdeleted); + printf("\torig: "); + for (i = 0; i < argp->orig.size; i++) { + c = ((char *)argp->orig.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\trepl: "); + for (i = 0; i < argp->repl.size; i++) { + c = ((char *)argp->repl.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\tprefix: %lu\n", (u_long)argp->prefix); + printf("\tsuffix: %lu\n", (u_long)argp->suffix); + printf("\n"); + __db_free(argp); + return (0); +} + +/* + * PUBLIC: int __bam_repl_read __P((void *, __bam_repl_args **)); + */ +int +__bam_repl_read(recbuf, argpp) + void *recbuf; + __bam_repl_args **argpp; +{ + __bam_repl_args *argp; + u_int8_t *bp; + + argp = (__bam_repl_args *)__db_malloc(sizeof(__bam_repl_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->pgno, bp, sizeof(argp->pgno)); + bp += sizeof(argp->pgno); + memcpy(&argp->lsn, bp, sizeof(argp->lsn)); + bp += sizeof(argp->lsn); + memcpy(&argp->indx, bp, sizeof(argp->indx)); + bp += sizeof(argp->indx); + memcpy(&argp->isdeleted, bp, sizeof(argp->isdeleted)); + bp += sizeof(argp->isdeleted); + memcpy(&argp->orig.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->orig.data = bp; + bp += argp->orig.size; + memcpy(&argp->repl.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->repl.data = bp; + bp += argp->repl.size; + memcpy(&argp->prefix, bp, sizeof(argp->prefix)); + bp += sizeof(argp->prefix); + memcpy(&argp->suffix, bp, sizeof(argp->suffix)); + bp += sizeof(argp->suffix); + *argpp = argp; + return (0); +} + +/* * PUBLIC: int __bam_init_print __P((DB_ENV *)); */ int @@ -1241,6 +1467,9 @@ __bam_init_print(dbenv) if ((ret = __db_add_recovery(dbenv, __bam_cdel_print, DB_bam_cdel)) != 0) return (ret); + if ((ret = __db_add_recovery(dbenv, + __bam_repl_print, DB_bam_repl)) != 0) + return (ret); return (0); } @@ -1274,6 +1503,9 @@ __bam_init_recover(dbenv) if ((ret = __db_add_recovery(dbenv, __bam_cdel_recover, DB_bam_cdel)) != 0) return (ret); + if ((ret = __db_add_recovery(dbenv, + __bam_repl_recover, DB_bam_repl)) != 0) + return (ret); return (0); } diff --git a/db2/common/db_appinit.c b/db2/common/db_appinit.c index 51d9262859..74ba9ff426 100644 --- a/db2/common/db_appinit.c +++ b/db2/common/db_appinit.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_appinit.c 10.33 (Sleepycat) 8/28/97"; +static const char sccsid[] = "@(#)db_appinit.c 10.36 (Sleepycat) 10/28/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -93,6 +93,10 @@ db_appinit(db_home, db_config, dbenv, flags) LF_ISSET(RECOVERY_FLAGS) != RECOVERY_FLAGS) return (__db_ferr(dbenv, "db_appinit", 1)); + /* Convert the db_appinit(3) flags. */ + if (LF_ISSET(DB_THREAD)) + F_SET(dbenv, DB_ENV_THREAD); + fp = NULL; /* Set the database home. */ @@ -126,7 +130,7 @@ db_appinit(db_home, db_config, dbenv, flags) goto err; /* Indicate that the path names have been set. */ - F_SET(dbenv, DB_APP_INIT); + F_SET(dbenv, DB_ENV_APPINIT); /* * If we are doing recovery, remove all the regions. @@ -300,7 +304,8 @@ __db_appname(dbenv, appname, dir, file, fdp, namep) * return. */ if (file != NULL && __db_abspath(file)) - return ((*namep = (char *)strdup(file)) == NULL ? ENOMEM : 0); + return ((*namep = + (char *)__db_strdup(file)) == NULL ? ENOMEM : 0); if (dir != NULL && __db_abspath(dir)) { a = dir; goto done; @@ -335,7 +340,7 @@ __db_appname(dbenv, appname, dir, file, fdp, namep) */ retry: switch (appname) { case DB_APP_NONE: - if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) { + if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_APPINIT)) { if (dir == NULL) goto tmp; a = dir; @@ -355,7 +360,7 @@ retry: switch (appname) { tmp_create = 1; goto tmp; } - if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) + if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_APPINIT)) a = PATH_DOT; else { a = dbenv->db_home; @@ -367,7 +372,7 @@ retry: switch (appname) { } break; case DB_APP_LOG: - if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) { + if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_APPINIT)) { if (dir == NULL) goto tmp; a = dir; @@ -385,7 +390,7 @@ retry: switch (appname) { } tmp_create = 1; - if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) + if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_APPINIT)) goto tmp; else { a = dbenv->db_home; @@ -396,7 +401,7 @@ retry: switch (appname) { /* Reference a file from the appropriate temporary directory. */ if (0) { -tmp: if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) { +tmp: if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_APPINIT)) { memset(&etmp, 0, sizeof(etmp)); if ((ret = __db_tmp_dir(&etmp, DB_USE_ENVIRON)) != 0) return (ret); @@ -412,7 +417,7 @@ done: len = (c == NULL ? 0 : strlen(c) + 1) + (file == NULL ? 0 : strlen(file) + 1); - if ((start = (char *)malloc(len)) == NULL) { + if ((start = (char *)__db_malloc(len)) == NULL) { __db_err(dbenv, "%s", strerror(ENOMEM)); if (tmp_free) FREES(etmp.db_tmp_dir); @@ -484,7 +489,7 @@ __db_home(dbenv, db_home, flags) if (p == NULL) return (0); - if ((dbenv->db_home = (char *)strdup(p)) == NULL) { + if ((dbenv->db_home = (char *)__db_strdup(p)) == NULL) { __db_err(dbenv, "%s", strerror(ENOMEM)); return (ENOMEM); } @@ -509,7 +514,7 @@ __db_parse(dbenv, s) * We need to strdup the argument in case the caller passed us * static data. */ - if ((local_s = (char *)strdup(s)) == NULL) + if ((local_s = (char *)__db_strdup(s)) == NULL) return (ENOMEM); tp = local_s; @@ -526,14 +531,15 @@ illegal: ret = EINVAL; #define DATA_INIT_CNT 20 /* Start with 20 data slots. */ if (!strcmp(name, "DB_DATA_DIR")) { if (dbenv->db_data_dir == NULL) { - if ((dbenv->db_data_dir = (char **)calloc(DATA_INIT_CNT, + if ((dbenv->db_data_dir = + (char **)__db_calloc(DATA_INIT_CNT, sizeof(char **))) == NULL) goto nomem; dbenv->data_cnt = DATA_INIT_CNT; } else if (dbenv->data_next == dbenv->data_cnt - 1) { dbenv->data_cnt *= 2; if ((dbenv->db_data_dir = - (char **)realloc(dbenv->db_data_dir, + (char **)__db_realloc(dbenv->db_data_dir, dbenv->data_cnt * sizeof(char **))) == NULL) goto nomem; } @@ -549,7 +555,7 @@ illegal: ret = EINVAL; } else goto err; - if ((*p = (char *)strdup(value)) == NULL) { + if ((*p = (char *)__db_strdup(value)) == NULL) { nomem: ret = ENOMEM; __db_err(dbenv, "%s", strerror(ENOMEM)); } @@ -623,7 +629,7 @@ __db_tmp_dir(dbenv, flags) if (!Special2FSSpec(kTemporaryFolderType, kOnSystemDisk, 0, &spec)) { p = FSp2FullPath(&spec); - sTempFolder = malloc(strlen(p) + 1); + sTempFolder = __db_malloc(strlen(p) + 1); strcpy(sTempFolder, p); p = sTempFolder; } @@ -639,7 +645,7 @@ __db_tmp_dir(dbenv, flags) if (p == NULL) return (0); - if ((dbenv->db_tmp_dir = (char *)strdup(p)) == NULL) { + if ((dbenv->db_tmp_dir = (char *)__db_strdup(p)) == NULL) { __db_err(dbenv, "%s", strerror(ENOMEM)); return (ENOMEM); } @@ -722,7 +728,7 @@ __db_tmp_open(dbenv, dir, fdp) (void)sigprocmask(SIG_BLOCK, &set, &oset); #endif #define DB_TEMPOPEN DB_CREATE | DB_EXCL | DB_TEMPORARY - if ((ret = __db_fdopen(buf, + if ((ret = __db_open(buf, DB_TEMPOPEN, DB_TEMPOPEN, S_IRUSR | S_IWUSR, fdp)) == 0) { #ifdef HAVE_SIGFILLSET (void)sigprocmask(SIG_SETMASK, &oset, NULL); diff --git a/db2/common/db_apprec.c b/db2/common/db_apprec.c index 2e94673731..ac0176d70f 100644 --- a/db2/common/db_apprec.c +++ b/db2/common/db_apprec.c @@ -11,7 +11,7 @@ static const char copyright[] = "@(#) Copyright (c) 1997\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_apprec.c 10.16 (Sleepycat) 8/27/97"; +static const char sccsid[] = "@(#)db_apprec.c 10.18 (Sleepycat) 9/30/97"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -31,12 +31,6 @@ static const char sccsid[] = "@(#)db_apprec.c 10.16 (Sleepycat) 8/27/97"; #include "txn.h" #include "common_ext.h" -#define FREE_DBT(L, D) { \ - if (F_ISSET((L), DB_AM_THREAD) && (D).data != NULL) \ - free((D).data); \ - (D).data = NULL; \ - } \ - /* * __db_apprec -- * Perform recovery. @@ -52,34 +46,41 @@ __db_apprec(dbenv, flags) DB_LOG *lp; DB_LSN ckp_lsn, first_lsn, lsn, tmp_lsn; time_t now; - int first_flag, ret; + int first_flag, is_thread, ret; void *txninfo; + lp = dbenv->lg_info; + /* Initialize the transaction list. */ if ((ret = __db_txnlist_init(&txninfo)) != 0) return (ret); /* + * Save the state of the thread flag -- we don't need it on at the + * moment because we're single-threaded until recovery is complete. + */ + is_thread = F_ISSET(lp, DB_AM_THREAD); + F_CLR(lp, DB_AM_THREAD); + + /* * Read forward through the log, opening the appropriate files so that * we can call recovery routines. In general, we start at the last * checkpoint prior to the last checkpointed LSN. For catastrophic * recovery, we begin at the first LSN that appears in any log file * (log_get figures this out for us when we pass it the DB_FIRST flag). */ - lp = dbenv->lg_info; if (LF_ISSET(DB_RECOVER_FATAL)) first_flag = DB_FIRST; else { - if ((ret = __log_findckp(lp, &lsn)) == DB_NOTFOUND) + if ((ret = __log_findckp(lp, &lsn)) == DB_NOTFOUND) { + F_SET(lp, is_thread); return (0); + } first_flag = DB_SET; } /* If we're a threaded application, we have to allocate space. */ memset(&data, 0, sizeof(data)); - if (F_ISSET(lp, DB_AM_THREAD)) - F_SET(&data, DB_DBT_MALLOC); - if ((ret = log_get(lp, &lsn, &data, first_flag)) != 0) { __db_err(dbenv, "Failure: unable to get log record"); if (first_flag == DB_SET) @@ -93,7 +94,6 @@ __db_apprec(dbenv, flags) first_lsn = lsn; for (;;) { ret = __db_dispatch(lp, &data, &lsn, TXN_OPENFILES, txninfo); - FREE_DBT(lp, data); if (ret != 0 && ret != DB_TXN_CKP) goto msgerr; if ((ret = @@ -103,7 +103,6 @@ __db_apprec(dbenv, flags) break; } } - FREE_DBT(lp, data); /* * Initialize the ckp_lsn to 0,0. If we never find a valid @@ -116,7 +115,6 @@ __db_apprec(dbenv, flags) tmp_lsn = lsn; ret = __db_dispatch(lp, &data, &lsn, TXN_BACKWARD_ROLL, txninfo); - FREE_DBT(lp, data); if (ret == DB_TXN_CKP) { if (IS_ZERO_LSN(ckp_lsn)) ckp_lsn = tmp_lsn; @@ -124,20 +122,17 @@ __db_apprec(dbenv, flags) } else if (ret != 0) goto msgerr; } - FREE_DBT(lp, data); if (ret != 0 && ret != DB_NOTFOUND) goto err; for (ret = log_get(lp, &lsn, &data, DB_NEXT); ret == 0; ret = log_get(lp, &lsn, &data, DB_NEXT)) { ret = __db_dispatch(lp, &data, &lsn, TXN_FORWARD_ROLL, txninfo); - FREE_DBT(lp, data); if (ret == DB_TXN_CKP) ret = 0; else if (ret != 0) goto msgerr; } - FREE_DBT(lp, data); if (ret != DB_NOTFOUND) goto err; @@ -165,11 +160,12 @@ __db_apprec(dbenv, flags) (u_long)dbenv->tx_info->region->last_ckp.offset); } + F_SET(lp, is_thread); return (0); msgerr: __db_err(dbenv, "Recovery function for LSN %lu %lu failed", (u_long)lsn.file, (u_long)lsn.offset); -err: FREE_DBT(lp, data); +err: F_SET(lp, is_thread); return (ret); } diff --git a/db2/common/db_byteorder.c b/db2/common/db_byteorder.c index a8d7715455..e486132073 100644 --- a/db2/common/db_byteorder.c +++ b/db2/common/db_byteorder.c @@ -8,20 +8,20 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_byteorder.c 10.3 (Sleepycat) 6/21/97"; +static const char sccsid[] = "@(#)db_byteorder.c 10.4 (Sleepycat) 9/4/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <errno.h> +#ifdef HAVE_ENDIAN_H +#include <endian.h> +#if BYTE_ORDER == BIG_ENDIAN +#define WORDS_BIGENDIAN 1 +#endif #endif -#ifdef HAVE_ENDIAN_H -# include <endian.h> -# if BYTE_ORDER == BIG_ENDIAN -# define WORDS_BIGENDIAN 1 -# endif +#include <errno.h> #endif #include "db_int.h" diff --git a/db2/common/db_region.c b/db2/common/db_region.c index 86d79a8148..3e8cd2dc66 100644 --- a/db2/common/db_region.c +++ b/db2/common/db_region.c @@ -43,7 +43,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_region.c 10.13 (Sleepycat) 8/27/97"; +static const char sccsid[] = "@(#)db_region.c 10.15 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -114,7 +114,7 @@ __db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp) * attempts to create the region will return failure in one of the * attempts. */ - if (fd == -1 && (ret = __db_fdopen(name, + if (fd == -1 && (ret = __db_open(name, DB_CREATE | DB_EXCL, DB_CREATE | DB_EXCL, mode, &fd)) != 0) { if (ret != EEXIST) __db_err(dbenv, @@ -131,6 +131,42 @@ __db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp) if ((ret = __db_rmap(dbenv, fd, size, &rp)) != 0) goto err; + /* Initialize the region. */ + if ((ret = __db_rinit(dbenv, rp, fd, size, 1)) != 0) + goto err; + + if (name != NULL) + FREES(name); + + *(void **)retp = rp; + return (0); + +err: if (fd != -1) { + if (rp != NULL) + (void)__db_unmap(rp, rp->size); + (void)__db_unlink(name); + (void)__db_close(fd); + } + if (name != NULL) + FREES(name); + return (ret); +} + +/* + * __db_rinit -- + * Initialize the region. + * + * PUBLIC: int __db_rinit __P((DB_ENV *, RLAYOUT *, int, size_t, int)); + */ +int +__db_rinit(dbenv, rp, fd, size, lock_region) + DB_ENV *dbenv; + RLAYOUT *rp; + size_t size; + int fd, lock_region; +{ + int ret; + /* * Initialize the common information. * @@ -141,9 +177,12 @@ __db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp) * file permissions games, but we can't because WNT filesystems won't * open a file mode 0. * - * So, the process that's creating the region always acquires the lock - * before the setting the version number. Any process joining always - * checks the version number before attempting to acquire the lock. + * If the lock_region flag is set, the process creating the region + * acquires the lock before the setting the version number. Any + * process joining the region checks the version number before + * attempting to acquire the lock. (The lock_region flag may not be + * set -- the mpool code sometimes malloc's private regions but still + * needs to initialize them, specifically, the mutex for threads.) * * We have to check the version number first, because if the version * number has not been written, it's possible that the mutex has not @@ -151,30 +190,16 @@ __db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp) * random behavior. If the version number isn't there (the file size * is too small) or it's 0, we know that the region is being created. */ - (void)__db_mutex_init(&rp->lock, MUTEX_LOCK_OFFSET(rp, &rp->lock)); - (void)__db_mutex_lock(&rp->lock, - fd, dbenv == NULL ? NULL : dbenv->db_yield); + __db_mutex_init(&rp->lock, MUTEX_LOCK_OFFSET(rp, &rp->lock)); + if (lock_region && (ret = __db_mutex_lock(&rp->lock, fd)) != 0) + return (ret); rp->refcnt = 1; rp->size = size; rp->flags = 0; db_version(&rp->majver, &rp->minver, &rp->patch); - if (name != NULL) - FREES(name); - - *(void **)retp = rp; return (0); - -err: if (fd != -1) { - if (rp != NULL) - (void)__db_munmap(rp, rp->size); - (void)__db_unlink(name); - (void)__db_close(fd); - } - if (name != NULL) - FREES(name); - return (ret); } /* @@ -205,7 +230,7 @@ __db_ropen(dbenv, appname, path, file, flags, fdp, retp) return (ret); /* Open the file. */ - if ((ret = __db_fdopen(name, flags, DB_MUTEXDEBUG, 0, &fd)) != 0) { + if ((ret = __db_open(name, flags, DB_MUTEXDEBUG, 0, &fd)) != 0) { __db_err(dbenv, "region open: %s: %s", name, strerror(ret)); goto err2; } @@ -225,8 +250,10 @@ __db_ropen(dbenv, appname, path, file, flags, fdp, retp) * flatly impossible. Hope that mmap fails if the file is too large. * */ - if ((ret = __db_stat(dbenv, name, fd, &size1, NULL)) != 0) + if ((ret = __db_ioinfo(name, fd, &size1, NULL)) != 0) { + __db_err(dbenv, "%s: %s", name, strerror(ret)); goto err2; + } /* Check to make sure the first block has been written. */ if ((size_t)size1 < sizeof(RLAYOUT)) { @@ -249,16 +276,17 @@ __db_ropen(dbenv, appname, path, file, flags, fdp, retp) /* Get the region lock. */ if (!LF_ISSET(DB_MUTEXDEBUG)) - (void)__db_mutex_lock(&rp->lock, - fd, dbenv == NULL ? NULL : dbenv->db_yield); + (void)__db_mutex_lock(&rp->lock, fd); /* * The file may have been half-written if we were descheduled between * getting the size of the file and checking the major version. Check * to make sure we got the entire file. */ - if ((ret = __db_stat(dbenv, name, fd, &size2, NULL)) != 0) + if ((ret = __db_ioinfo(name, fd, &size2, NULL)) != 0) { + __db_err(dbenv, "%s: %s", name, strerror(ret)); goto err1; + } if (size1 != size2) { ret = EAGAIN; goto err1; @@ -285,7 +313,7 @@ __db_ropen(dbenv, appname, path, file, flags, fdp, retp) err1: if (!LF_ISSET(DB_MUTEXDEBUG)) (void)__db_mutex_unlock(&rp->lock, fd); err2: if (rp != NULL) - (void)__db_munmap(rp, rp->size); + (void)__db_unmap(rp, rp->size); if (fd != -1) (void)__db_close(fd); FREES(name); @@ -312,8 +340,7 @@ __db_rclose(dbenv, fd, ptr) fail = NULL; /* Get the lock. */ - if ((ret = __db_mutex_lock(&rp->lock, - fd, dbenv == NULL ? NULL : dbenv->db_yield)) != 0) { + if ((ret = __db_mutex_lock(&rp->lock, fd)) != 0) { fail = "lock get"; goto err; } @@ -328,7 +355,7 @@ __db_rclose(dbenv, fd, ptr) } /* Discard the region. */ - if ((t_ret = __db_munmap(ptr, rp->size)) != 0 && fail == NULL) { + if ((t_ret = __db_unmap(ptr, rp->size)) != 0 && fail == NULL) { ret = t_ret; fail = "munmap"; } @@ -392,8 +419,7 @@ __db_runlink(dbenv, appname, path, file, force) /* Open and lock the region. */ if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0) goto err1; - (void)__db_mutex_lock(&rp->lock, - fd, dbenv == NULL ? NULL : dbenv->db_yield); + (void)__db_mutex_lock(&rp->lock, fd); /* If the region is currently being deleted, fail. */ if (F_ISSET(rp, DB_R_DELETED)) { @@ -434,8 +460,7 @@ __db_runlink(dbenv, appname, path, file, force) /* Not a clue. Try to clear the DB_R_DELETED flag. */ if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0) goto err1; - (void)__db_mutex_lock(&rp->lock, - fd, dbenv == NULL ? NULL : dbenv->db_yield); + (void)__db_mutex_lock(&rp->lock, fd); F_CLR(rp, DB_R_DELETED); /* FALLTHROUGH */ @@ -472,7 +497,7 @@ __db_rgrow(dbenv, fd, incr) char buf[__DB_VMPAGESIZE]; /* Seek to the end of the region. */ - if ((ret = __db_lseek(fd, 0, 0, 0, SEEK_END)) != 0) + if ((ret = __db_seek(fd, 0, 0, 0, SEEK_END)) != 0) goto err; /* Write nuls to the new bytes. */ @@ -500,7 +525,7 @@ __db_rgrow(dbenv, fd, incr) incr -= incr % __DB_VMPAGESIZE; /* Write the last page, not the page after the last. */ - if ((ret = __db_lseek(fd, 0, 0, incr - __DB_VMPAGESIZE, SEEK_CUR)) != 0) + if ((ret = __db_seek(fd, 0, 0, incr - __DB_VMPAGESIZE, SEEK_CUR)) != 0) goto err; if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0) goto err; @@ -531,7 +556,7 @@ __db_rremap(dbenv, ptr, oldsize, newsize, fd, retp) { int ret; - if ((ret = __db_munmap(ptr, oldsize)) != 0) { + if ((ret = __db_unmap(ptr, oldsize)) != 0) { __db_err(dbenv, "region remap: munmap: %s", strerror(ret)); return (ret); } @@ -553,7 +578,7 @@ __db_rmap(dbenv, fd, size, retp) RLAYOUT *rp; int ret; - if ((ret = __db_mmap(fd, size, 0, 0, &rp)) != 0) { + if ((ret = __db_map(fd, size, 0, 0, (void **)&rp)) != 0) { __db_err(dbenv, "region map: mmap %s", strerror(ret)); return (ret); } diff --git a/db2/db.h b/db2/db.h index 6911002ed5..fb2d6bb3da 100644 --- a/db2/db.h +++ b/db2/db.h @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)db.h.src 10.77 (Sleepycat) 9/24/97 + * @(#)db.h.src 10.91 (Sleepycat) 11/3/97 */ #ifndef _DB_H_ @@ -28,9 +28,15 @@ * XXX * Handle function prototypes and the keyword "const". This steps on name * space that DB doesn't control, but all of the other solutions are worse. + * + * XXX + * While Microsoft's compiler is ANSI C compliant, it doesn't have _STDC_ + * defined by default, you specify a command line flag or #pragma to turn + * it on. Don't do that, however, because some of Microsoft's own header + * files won't compile. */ #undef __P -#if defined(__STDC__) || defined(__cplusplus) +#if defined(__STDC__) || defined(__cplusplus) || defined(_MSC_VER) #define __P(protos) protos /* ANSI C prototypes */ #else #define const @@ -67,8 +73,8 @@ #define DB_VERSION_MAJOR 2 #define DB_VERSION_MINOR 3 -#define DB_VERSION_PATCH 10 -#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.10: (9/24/97)" +#define DB_VERSION_PATCH 12 +#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.12: (11/3/97)" typedef u_int32_t db_pgno_t; /* Page number type. */ typedef u_int16_t db_indx_t; /* Page offset type. */ @@ -93,6 +99,7 @@ struct __db_lockregion; typedef struct __db_lockregion DB_LOCKREGION; struct __db_lockreq; typedef struct __db_lockreq DB_LOCKREQ; struct __db_locktab; typedef struct __db_locktab DB_LOCKTAB; struct __db_log; typedef struct __db_log DB_LOG; +struct __db_log_stat; typedef struct __db_log_stat DB_LOG_STAT; struct __db_lsn; typedef struct __db_lsn DB_LSN; struct __db_mpool; typedef struct __db_mpool DB_MPOOL; struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT; @@ -122,6 +129,31 @@ struct __db_dbt { }; /* + * DB configuration. There are a set of functions which the application + * can replace with its own versions. + */ +#define DB_FUNC_CALLOC 1 /* ANSI C calloc. */ +#define DB_FUNC_CLOSE 2 /* POSIX 1003.1 close. */ +#define DB_FUNC_DIRFREE 3 /* DB: free directory list. */ +#define DB_FUNC_DIRLIST 4 /* DB: create directory list. */ +#define DB_FUNC_EXISTS 5 /* DB: return if file exists. */ +#define DB_FUNC_FREE 6 /* ANSI C free. */ +#define DB_FUNC_FSYNC 7 /* POSIX 1003.1 fsync. */ +#define DB_FUNC_IOINFO 8 /* DB: return file I/O information. */ +#define DB_FUNC_MALLOC 9 /* ANSI C malloc. */ +#define DB_FUNC_MAP 10 /* DB: map file into shared memory. */ +#define DB_FUNC_OPEN 11 /* POSIX 1003.1 open. */ +#define DB_FUNC_READ 12 /* POSIX 1003.1 read. */ +#define DB_FUNC_REALLOC 13 /* ANSI C realloc. */ +#define DB_FUNC_SEEK 14 /* POSIX 1003.1 lseek. */ +#define DB_FUNC_SLEEP 15 /* DB: sleep secs/usecs. */ +#define DB_FUNC_STRDUP 16 /* ANSI C strdup. */ +#define DB_FUNC_UNLINK 17 /* POSIX 1003.1 unlink. */ +#define DB_FUNC_UNMAP 18 /* DB: unmap shared memory file. */ +#define DB_FUNC_WRITE 19 /* POSIX 1003.1 write. */ +#define DB_FUNC_YIELD 20 /* DB: yield thread to scheduler. */ + +/* * Database configuration and initialization. */ /* @@ -134,21 +166,20 @@ struct __db_dbt { /* * Flags understood by db_appinit(3). * - * DB_APP_INIT and DB_MUTEXDEBUG are internal only, and not documented. + * DB_MUTEXDEBUG is internal only, and not documented. */ /* 0x00007 COMMON MASK. */ -#define DB_APP_INIT 0x00008 /* Appinit called, paths initialized. */ -#define DB_INIT_LOCK 0x00010 /* Initialize locking. */ -#define DB_INIT_LOG 0x00020 /* Initialize logging. */ -#define DB_INIT_MPOOL 0x00040 /* Initialize mpool. */ -#define DB_INIT_TXN 0x00080 /* Initialize transactions. */ -#define DB_MPOOL_PRIVATE 0x00100 /* Mpool: private memory pool. */ -#define DB_MUTEXDEBUG 0x00200 /* Do not get/set mutexes in regions. */ -#define DB_RECOVER 0x00400 /* Run normal recovery. */ -#define DB_RECOVER_FATAL 0x00800 /* Run catastrophic recovery. */ -#define DB_TXN_NOSYNC 0x01000 /* Do not sync log on commit. */ -#define DB_USE_ENVIRON 0x02000 /* Use the environment. */ -#define DB_USE_ENVIRON_ROOT 0x04000 /* Use the environment if root. */ +#define DB_INIT_LOCK 0x00008 /* Initialize locking. */ +#define DB_INIT_LOG 0x00010 /* Initialize logging. */ +#define DB_INIT_MPOOL 0x00020 /* Initialize mpool. */ +#define DB_INIT_TXN 0x00040 /* Initialize transactions. */ +#define DB_MPOOL_PRIVATE 0x00080 /* Mpool: private memory pool. */ +#define DB_MUTEXDEBUG 0x00100 /* Do not get/set mutexes in regions. */ +#define DB_RECOVER 0x00200 /* Run normal recovery. */ +#define DB_RECOVER_FATAL 0x00400 /* Run catastrophic recovery. */ +#define DB_TXN_NOSYNC 0x00800 /* Do not sync log on commit. */ +#define DB_USE_ENVIRON 0x01000 /* Use the environment. */ +#define DB_USE_ENVIRON_ROOT 0x02000 /* Use the environment if root. */ /* CURRENTLY UNUSED LOCK FLAGS. */ #define DB_TXN_LOCK_2PL 0x00000 /* Two-phase locking. */ @@ -209,7 +240,6 @@ struct __db_env { int lk_modes; /* Number of lock modes in table. */ unsigned int lk_max; /* Maximum number of locks. */ u_int32_t lk_detect; /* Deadlock detect on every conflict. */ - int (*db_yield) __P((void)); /* Yield function for threads. */ /* Logging. */ DB_LOG *lg_info; /* Return from log_open(). */ @@ -226,6 +256,9 @@ struct __db_env { int (*tx_recover) /* Dispatch function for recovery. */ __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +#define DB_ENV_APPINIT 0x01 /* Paths initialized by db_appinit(). */ +#define DB_ENV_STANDALONE 0x02 /* Test: freestanding environment. */ +#define DB_ENV_THREAD 0x04 /* DB_ENV is multi-threaded. */ u_int32_t flags; /* Flags. */ }; @@ -301,7 +334,7 @@ struct __db_info { #define DB_CURRENT 0x000010 /* c_get(), c_put(), log_get() */ #define DB_FIRST 0x000020 /* c_get(), log_get() */ #define DB_FLUSH 0x000040 /* log_put() */ -#define DB_GET_RECNO 0x000080 /* c_get() */ +#define DB_GET_RECNO 0x000080 /* get(), c_get() */ #define DB_KEYFIRST 0x000100 /* c_put() */ #define DB_KEYLAST 0x000200 /* c_put() */ #define DB_LAST 0x000400 /* c_get(), log_get() */ @@ -312,7 +345,7 @@ struct __db_info { #define DB_RECORDCOUNT 0x008000 /* stat() */ #define DB_SET 0x010000 /* c_get(), log_get() */ #define DB_SET_RANGE 0x020000 /* c_get() */ -#define DB_SET_RECNO 0x040000 /* get(), c_get() */ +#define DB_SET_RECNO 0x040000 /* c_get() */ /* DB (user visible) error return codes. */ #define DB_INCOMPLETE ( -1) /* Sync didn't finish. */ @@ -472,6 +505,8 @@ struct __db_bt_stat { u_int32_t bt_get; /* Items retrieved. */ u_int32_t bt_cache_hit; /* Hits in fast-insert code. */ u_int32_t bt_cache_miss; /* Misses in fast-insert code. */ + u_int32_t bt_magic; /* Magic number. */ + u_int32_t bt_version; /* Version number. */ }; #if defined(__cplusplus) @@ -479,6 +514,7 @@ extern "C" { #endif int db_appinit __P((const char *, char * const *, DB_ENV *, int)); int db_appexit __P((DB_ENV *)); +int db_jump_set __P((void *, int)); int db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **)); char *db_version __P((int *, int *, int *)); #if defined(__cplusplus) @@ -576,6 +612,22 @@ struct __db_lsn { u_int32_t offset; /* File offset. */ }; +/* Log statistics structure. */ +struct __db_log_stat { + u_int32_t st_magic; /* Log file magic number. */ + u_int32_t st_version; /* Log file version number. */ + int st_mode; /* Log file mode. */ + u_int32_t st_lg_max; /* Maximum log file size. */ + u_int32_t st_w_bytes; /* Bytes to log. */ + u_int32_t st_w_mbytes; /* Megabytes to log. */ + u_int32_t st_wc_bytes; /* Bytes to log since checkpoint. */ + u_int32_t st_wc_mbytes; /* Megabytes to log since checkpoint. */ + u_int32_t st_wcount; /* Total syncs to the log. */ + u_int32_t st_scount; /* Total writes to the log. */ + u_int32_t st_region_wait; /* Region lock granted after wait. */ + u_int32_t st_region_nowait; /* Region lock granted without wait. */ +}; + #if defined(__cplusplus) extern "C" { #endif @@ -588,6 +640,7 @@ int log_get __P((DB_LOG *, DB_LSN *, DBT *, int)); int log_open __P((const char *, int, int, DB_ENV *, DB_LOG **)); int log_put __P((DB_LOG *, DB_LSN *, const DBT *, int)); int log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *)); +int log_stat __P((DB_LOG *, DB_LOG_STAT **, void *(*)(size_t))); int log_unlink __P((const char *, int, DB_ENV *)); int log_unregister __P((DB_LOG *, u_int32_t)); #if defined(__cplusplus) @@ -610,30 +663,35 @@ int log_unregister __P((DB_LOG *, u_int32_t)); /* Mpool statistics structure. */ struct __db_mpool_stat { size_t st_cachesize; /* Cache size. */ - unsigned long st_cache_hit; /* Pages found in the cache. */ - unsigned long st_cache_miss; /* Pages not found in the cache. */ - unsigned long st_map; /* Pages from mapped files. */ - unsigned long st_page_create; /* Pages created in the cache. */ - unsigned long st_page_in; /* Pages read in. */ - unsigned long st_page_out; /* Pages written out. */ - unsigned long st_ro_evict; /* Read-only pages evicted. */ - unsigned long st_rw_evict; /* Read-write pages evicted. */ - unsigned long st_hash_buckets; /* Number of hash buckets. */ - unsigned long st_hash_searches; /* Total hash chain searches. */ - unsigned long st_hash_longest; /* Longest hash chain searched. */ - unsigned long st_hash_examined; /* Total hash entries searched. */ + u_int32_t st_cache_hit; /* Pages found in the cache. */ + u_int32_t st_cache_miss; /* Pages not found in the cache. */ + u_int32_t st_map; /* Pages from mapped files. */ + u_int32_t st_page_create; /* Pages created in the cache. */ + u_int32_t st_page_in; /* Pages read in. */ + u_int32_t st_page_out; /* Pages written out. */ + u_int32_t st_ro_evict; /* Clean pages forced from the cache. */ + u_int32_t st_rw_evict; /* Dirty pages forced from the cache. */ + u_int32_t st_hash_buckets; /* Number of hash buckets. */ + u_int32_t st_hash_searches; /* Total hash chain searches. */ + u_int32_t st_hash_longest; /* Longest hash chain searched. */ + u_int32_t st_hash_examined; /* Total hash entries searched. */ + u_int32_t st_page_clean; /* Clean pages. */ + u_int32_t st_page_dirty; /* Dirty pages. */ + u_int32_t st_page_trickle; /* Pages written by memp_trickle. */ + u_int32_t st_region_wait; /* Region lock granted after wait. */ + u_int32_t st_region_nowait; /* Region lock granted without wait. */ }; /* Mpool file statistics structure. */ struct __db_mpool_fstat { char *file_name; /* File name. */ size_t st_pagesize; /* Page size. */ - unsigned long st_cache_hit; /* Pages found in the cache. */ - unsigned long st_cache_miss; /* Pages not found in the cache. */ - unsigned long st_map; /* Pages from mapped files. */ - unsigned long st_page_create; /* Pages created in the cache. */ - unsigned long st_page_in; /* Pages read in. */ - unsigned long st_page_out; /* Pages written out. */ + u_int32_t st_cache_hit; /* Pages found in the cache. */ + u_int32_t st_cache_miss; /* Pages not found in the cache. */ + u_int32_t st_map; /* Pages from mapped files. */ + u_int32_t st_page_create; /* Pages created in the cache. */ + u_int32_t st_page_in; /* Pages read in. */ + u_int32_t st_page_out; /* Pages written out. */ }; #if defined(__cplusplus) @@ -654,6 +712,7 @@ int memp_register __P((DB_MPOOL *, int, int memp_stat __P((DB_MPOOL *, DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, void *(*)(size_t))); int memp_sync __P((DB_MPOOL *, DB_LSN *)); +int memp_trickle __P((DB_MPOOL *, int, int *)); int memp_unlink __P((const char *, int, DB_ENV *)); #if defined(__cplusplus) }; diff --git a/db2/db/db.c b/db2/db/db.c index 9ebe73cf6e..50b14eba7c 100644 --- a/db2/db/db.c +++ b/db2/db/db.c @@ -44,7 +44,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db.c 10.41 (Sleepycat) 9/23/97"; +static const char sccsid[] = "@(#)db.c 10.44 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -125,13 +125,19 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) if ((ret = __db_fchk(dbenv, "db_open", flags, OKFLAGS)) != 0) return (ret); + if (dbenv != NULL && + LF_ISSET(DB_THREAD) && !F_ISSET(dbenv, DB_ENV_THREAD)) { + __db_err(dbenv, "environment not created using DB_THREAD"); + return (EINVAL); + } + /* Initialize for error return. */ fd = -1; need_fileid = 1; real_name = NULL; /* Allocate the DB structure, reference the DB_ENV structure. */ - if ((dbp = (DB *)calloc(1, sizeof(DB))) == NULL) { + if ((dbp = (DB *)__db_calloc(1, sizeof(DB))) == NULL) { __db_err(dbenv, "%s", strerror(ENOMEM)); return (ENOMEM); } @@ -239,7 +245,7 @@ db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp) */ retry_cnt = 0; open_retry: if (LF_ISSET(DB_CREATE)) { - if ((ret = __db_fdopen(real_name, flags | DB_EXCL, + if ((ret = __db_open(real_name, flags | DB_EXCL, OKFLAGS | DB_EXCL, mode, &fd)) != 0) if (ret == EEXIST) { LF_CLR(DB_CREATE); @@ -250,7 +256,7 @@ open_retry: if (LF_ISSET(DB_CREATE)) { goto err; } } else - if ((ret = __db_fdopen(real_name, + if ((ret = __db_open(real_name, flags, OKFLAGS, mode, &fd)) != 0) { __db_err(dbenv, "%s: %s", fname, strerror(ret)); goto err; @@ -264,8 +270,11 @@ open_retry: if (LF_ISSET(DB_CREATE)) { */ if (dbp->pgsize == 0) { if ((ret = - __db_stat(dbenv, real_name, fd, NULL, &io)) != 0) + __db_ioinfo(real_name, fd, NULL, &io)) != 0) { + __db_err(dbenv, + "%s: %s", real_name, strerror(ret)); goto err; + } if (io < 512) io = 512; if (io > 16 * 1024) @@ -477,7 +486,7 @@ empty: /* if (dbenv == NULL) { if ((dbp->mp_dbenv = - (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) { + (DB_ENV *)__db_calloc(sizeof(DB_ENV), 1)) == NULL) { ret = ENOMEM; goto err; } @@ -491,9 +500,9 @@ empty: /* restore = 1; } envp->mp_size = cachesize; - F_SET(envp, DB_MPOOL_PRIVATE); - if ((ret = memp_open(NULL, - DB_CREATE, S_IRUSR | S_IWUSR, envp, &dbp->mp)) != 0) + if ((ret = memp_open(NULL, DB_CREATE | DB_MPOOL_PRIVATE | + (F_ISSET(dbp, DB_AM_THREAD) ? DB_THREAD : 0), + S_IRUSR | S_IWUSR, envp, &dbp->mp)) != 0) goto err; if (restore) *dbenv = t_dbenv; @@ -725,7 +734,8 @@ db_close(dbp, flags) } /* Sync the memory pool. */ - if ((t_ret = memp_fsync(dbp->mpf)) != 0 && ret == 0) + if ((t_ret = memp_fsync(dbp->mpf)) != 0 && + t_ret != DB_INCOMPLETE && ret == 0) ret = t_ret; /* Close the memory pool file. */ diff --git a/db2/db/db.src b/db2/db/db.src index a3e2f7b75c..07d98123ac 100644 --- a/db2/db/db.src +++ b/db2/db/db.src @@ -3,7 +3,7 @@ * * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. - * @(#)db.src 10.3 (Sleepycat) 8/18/97 + * @(#)db.src 10.4 (Sleepycat) 11/2/97 */ #include "config.h" @@ -81,15 +81,17 @@ POINTER nextlsn DB_LSN * lu END /* - * ovref -- Handles increment of overflow page reference count. + * ovref -- Handles increment/decrement of overflow page reference count. * * fileid: identifies the file being modified. - * pgno: page number being incremented. - * lsn the page's original lsn. + * pgno: page number whose ref count is being incremented/decremented. + * adjust: the adjustment being made. + * lsn: the page's original lsn. */ BEGIN ovref ARG fileid u_int32_t lu ARG pgno db_pgno_t lu +ARG adjust int32_t ld POINTER lsn DB_LSN * lu END diff --git a/db2/db/db_auto.c b/db2/db/db_auto.c index 6922504383..d40d964542 100644 --- a/db2/db/db_auto.c +++ b/db2/db/db_auto.c @@ -62,7 +62,7 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags, + sizeof(u_int32_t) + (hdr == NULL ? 0 : hdr->size) + sizeof(u_int32_t) + (dbt == NULL ? 0 : dbt->size) + sizeof(*pagelsn); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -114,7 +114,7 @@ int __db_addrem_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -176,7 +176,7 @@ __db_addrem_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tpagelsn: [%lu][%lu]\n", (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -191,7 +191,7 @@ __db_addrem_read(recbuf, argpp) __db_addrem_args *argp; u_int8_t *bp; - argp = (__db_addrem_args *)malloc(sizeof(__db_addrem_args) + + argp = (__db_addrem_args *)__db_malloc(sizeof(__db_addrem_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -266,7 +266,7 @@ int __db_split_log(logp, txnid, ret_lsnp, flags, + sizeof(pgno) + sizeof(u_int32_t) + (pageimage == NULL ? 0 : pageimage->size) + sizeof(*pagelsn); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -304,7 +304,7 @@ int __db_split_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -355,7 +355,7 @@ __db_split_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tpagelsn: [%lu][%lu]\n", (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -370,7 +370,7 @@ __db_split_read(recbuf, argpp) __db_split_args *argp; u_int8_t *bp; - argp = (__db_split_args *)malloc(sizeof(__db_split_args) + + argp = (__db_split_args *)__db_malloc(sizeof(__db_split_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -447,7 +447,7 @@ int __db_big_log(logp, txnid, ret_lsnp, flags, + sizeof(*pagelsn) + sizeof(*prevlsn) + sizeof(*nextlsn); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -499,7 +499,7 @@ int __db_big_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -556,7 +556,7 @@ __db_big_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tnextlsn: [%lu][%lu]\n", (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -571,7 +571,7 @@ __db_big_read(recbuf, argpp) __db_big_args *argp; u_int8_t *bp; - argp = (__db_big_args *)malloc(sizeof(__db_big_args) + + argp = (__db_big_args *)__db_malloc(sizeof(__db_big_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -610,16 +610,17 @@ __db_big_read(recbuf, argpp) /* * PUBLIC: int __db_ovref_log * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *)); + * PUBLIC: u_int32_t, db_pgno_t, int32_t, DB_LSN *)); */ int __db_ovref_log(logp, txnid, ret_lsnp, flags, - fileid, pgno, lsn) + fileid, pgno, adjust, lsn) DB_LOG *logp; DB_TXN *txnid; DB_LSN *ret_lsnp; u_int32_t flags; u_int32_t fileid; db_pgno_t pgno; + int32_t adjust; DB_LSN * lsn; { DBT logrec; @@ -639,8 +640,9 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags, logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + sizeof(fileid) + sizeof(pgno) + + sizeof(adjust) + sizeof(*lsn); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -654,6 +656,8 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags, bp += sizeof(fileid); memcpy(bp, &pgno, sizeof(pgno)); bp += sizeof(pgno); + memcpy(bp, &adjust, sizeof(adjust)); + bp += sizeof(adjust); if (lsn != NULL) memcpy(bp, lsn, sizeof(*lsn)); else @@ -666,7 +670,7 @@ int __db_ovref_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -704,10 +708,11 @@ __db_ovref_print(notused1, dbtp, lsnp, notused3, notused4) (u_long)argp->prev_lsn.offset); printf("\tfileid: %lu\n", (u_long)argp->fileid); printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tadjust: %ld\n", (long)argp->adjust); printf("\tlsn: [%lu][%lu]\n", (u_long)argp->lsn.file, (u_long)argp->lsn.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -722,7 +727,7 @@ __db_ovref_read(recbuf, argpp) __db_ovref_args *argp; u_int8_t *bp; - argp = (__db_ovref_args *)malloc(sizeof(__db_ovref_args) + + argp = (__db_ovref_args *)__db_malloc(sizeof(__db_ovref_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -738,6 +743,8 @@ __db_ovref_read(recbuf, argpp) bp += sizeof(argp->fileid); memcpy(&argp->pgno, bp, sizeof(argp->pgno)); bp += sizeof(argp->pgno); + memcpy(&argp->adjust, bp, sizeof(argp->adjust)); + bp += sizeof(argp->adjust); memcpy(&argp->lsn, bp, sizeof(argp->lsn)); bp += sizeof(argp->lsn); *argpp = argp; @@ -787,7 +794,7 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags, + sizeof(*lsn_prev) + sizeof(next) + sizeof(*lsn_next); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -827,7 +834,7 @@ int __db_relink_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -874,7 +881,7 @@ __db_relink_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tlsn_next: [%lu][%lu]\n", (u_long)argp->lsn_next.file, (u_long)argp->lsn_next.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -889,7 +896,7 @@ __db_relink_read(recbuf, argpp) __db_relink_args *argp; u_int8_t *bp; - argp = (__db_relink_args *)malloc(sizeof(__db_relink_args) + + argp = (__db_relink_args *)__db_malloc(sizeof(__db_relink_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -957,7 +964,7 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags, + sizeof(*lsn) + sizeof(nextpgno) + sizeof(*nextlsn); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -990,7 +997,7 @@ int __db_addpage_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -1034,7 +1041,7 @@ __db_addpage_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tnextlsn: [%lu][%lu]\n", (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -1049,7 +1056,7 @@ __db_addpage_read(recbuf, argpp) __db_addpage_args *argp; u_int8_t *bp; - argp = (__db_addpage_args *)malloc(sizeof(__db_addpage_args) + + argp = (__db_addpage_args *)__db_malloc(sizeof(__db_addpage_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -1114,7 +1121,7 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags, + sizeof(u_int32_t) + (key == NULL ? 0 : key->size) + sizeof(u_int32_t) + (data == NULL ? 0 : data->size) + sizeof(arg_flags); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -1165,7 +1172,7 @@ int __db_debug_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -1231,7 +1238,7 @@ __db_debug_print(notused1, dbtp, lsnp, notused3, notused4) printf("\n"); printf("\targ_flags: %lu\n", (u_long)argp->arg_flags); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -1246,7 +1253,7 @@ __db_debug_read(recbuf, argpp) __db_debug_args *argp; u_int8_t *bp; - argp = (__db_debug_args *)malloc(sizeof(__db_debug_args) + + argp = (__db_debug_args *)__db_malloc(sizeof(__db_debug_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -1303,7 +1310,7 @@ int __db_noop_log(logp, txnid, ret_lsnp, flags) } else lsnp = &txnid->last_lsn; logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -1320,7 +1327,7 @@ int __db_noop_log(logp, txnid, ret_lsnp, flags) ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -1357,7 +1364,7 @@ __db_noop_print(notused1, dbtp, lsnp, notused3, notused4) (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -1372,7 +1379,7 @@ __db_noop_read(recbuf, argpp) __db_noop_args *argp; u_int8_t *bp; - argp = (__db_noop_args *)malloc(sizeof(__db_noop_args) + + argp = (__db_noop_args *)__db_malloc(sizeof(__db_noop_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); diff --git a/db2/db/db_dispatch.c b/db2/db/db_dispatch.c index 3d7b162d75..a4bcdb7628 100644 --- a/db2/db/db_dispatch.c +++ b/db2/db/db_dispatch.c @@ -43,7 +43,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_dispatch.c 10.5 (Sleepycat) 7/2/97"; +static const char sccsid[] = "@(#)db_dispatch.c 10.6 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -155,12 +155,12 @@ __db_add_recovery(dbenv, func, ndx) if (ndx >= dispatch_size) { if (dispatch_table == NULL) dispatch_table = (int (**) - __P((DB_LOG *, DBT *, DB_LSN *, int, void *))) - malloc(DB_user_BEGIN * sizeof(dispatch_table[0])); + __P((DB_LOG *, DBT *, DB_LSN *, int, void *))) + __db_malloc(DB_user_BEGIN * sizeof(dispatch_table[0])); else dispatch_table = (int (**) __P((DB_LOG *, DBT *, DB_LSN *, int, void *))) - realloc(dispatch_table, (DB_user_BEGIN + + __db_realloc(dispatch_table, (DB_user_BEGIN + dispatch_size) * sizeof(dispatch_table[0])); if (dispatch_table == NULL) { __db_err(dbenv, "%s", strerror(ENOMEM)); @@ -187,8 +187,8 @@ __db_txnlist_init(retp) { __db_txnhead *headp; - if ((headp = - (struct __db_txnhead *)malloc(sizeof(struct __db_txnhead))) == NULL) + if ((headp = (struct __db_txnhead *) + __db_malloc(sizeof(struct __db_txnhead))) == NULL) return (ENOMEM); LIST_INIT(&headp->head); @@ -212,7 +212,7 @@ __db_txnlist_add(listp, txnid) __db_txnhead *hp; __db_txnlist *elp; - if ((elp = (__db_txnlist *)malloc(sizeof(__db_txnlist))) == NULL) + if ((elp = (__db_txnlist *)__db_malloc(sizeof(__db_txnlist))) == NULL) return (ENOMEM); elp->txnid = txnid; diff --git a/db2/db/db_dup.c b/db2/db/db_dup.c index 66c6c2616a..faeefa0744 100644 --- a/db2/db/db_dup.c +++ b/db2/db/db_dup.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_dup.c 10.9 (Sleepycat) 9/3/97"; +static const char sccsid[] = "@(#)db_dup.c 10.10 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -217,7 +217,7 @@ __db_dsplit(dbp, hp, indxp, size, newfunc) indx = *indxp; /* Create a temporary page to do compaction onto. */ - if ((tp = (PAGE *)malloc(dbp->pgsize)) == NULL) + if ((tp = (PAGE *)__db_malloc(dbp->pgsize)) == NULL) return (ENOMEM); #ifdef DEBUG memset(tp, 0xff, dbp->pgsize); diff --git a/db2/db/db_overflow.c b/db2/db/db_overflow.c index 2340e9e358..8c6619f228 100644 --- a/db2/db/db_overflow.c +++ b/db2/db/db_overflow.c @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_overflow.c 10.4 (Sleepycat) 7/2/97"; +static const char sccsid[] = "@(#)db_overflow.c 10.7 (Sleepycat) 11/2/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -121,14 +121,14 @@ __db_goff(dbp, dbt, tlen, pgno, bpp, bpsz) } } else if (F_ISSET(dbt, DB_DBT_MALLOC)) { dbt->data = dbp->db_malloc == NULL ? - (void *)malloc(needed + 1) : + (void *)__db_malloc(needed + 1) : (void *)dbp->db_malloc(needed + 1); if (dbt->data == NULL) return (ENOMEM); } else if (*bpsz == 0 || *bpsz < needed) { *bpp = (*bpp == NULL ? - (void *)malloc(needed + 1) : - (void *)realloc(*bpp, needed + 1)); + (void *)__db_malloc(needed + 1) : + (void *)__db_realloc(*bpp, needed + 1)); if (*bpp == NULL) return (ENOMEM); *bpsz = needed + 1; @@ -256,15 +256,16 @@ __db_poff(dbp, dbt, pgnop, newfunc) } /* - * __db_ioff -- - * Increment the reference count on an overflow page. + * __db_ovref -- + * Increment/decrement the reference count on an overflow page. * - * PUBLIC: int __db_ioff __P((DB *, db_pgno_t)); + * PUBLIC: int __db_ovref __P((DB *, db_pgno_t, int)); */ int -__db_ioff(dbp, pgno) +__db_ovref(dbp, pgno, adjust) DB *dbp; db_pgno_t pgno; + int adjust; { PAGE *h; int ret; @@ -274,10 +275,12 @@ __db_ioff(dbp, pgno) return (ret); } - ++OV_REF(h); - if (DB_LOGGING(dbp) && (ret = __db_ovref_log(dbp->dbenv->lg_info, - dbp->txn, &LSN(h), 0, dbp->log_fileid, h->pgno, &LSN(h))) != 0) - return (ret); + if (DB_LOGGING(dbp)) + if ((ret = __db_ovref_log(dbp->dbenv->lg_info, dbp->txn, + &LSN(h), 0, dbp->log_fileid, h->pgno, (int32_t)adjust, + &LSN(h))) != 0) + return (ret); + OV_REF(h) += adjust; (void)memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY); return (0); @@ -311,9 +314,8 @@ __db_doff(dbp, pgno, freefunc) * one key/data item, decrement the reference count and return. */ if (TYPE(pagep) == P_OVERFLOW && OV_REF(pagep) > 1) { - --OV_REF(pagep); - (void)memp_fput(dbp->mpf, pagep, DB_MPOOL_DIRTY); - return (0); + (void)memp_fput(dbp->mpf, pagep, 0); + return (__db_ovref(dbp, pgno, -1)); } if (DB_LOGGING(dbp)) { diff --git a/db2/db/db_pr.c b/db2/db/db_pr.c index 09d8057da4..6b6171a13c 100644 --- a/db2/db/db_pr.c +++ b/db2/db/db_pr.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_pr.c 10.17 (Sleepycat) 9/15/97"; +static const char sccsid[] = "@(#)db_pr.c 10.19 (Sleepycat) 11/2/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -183,7 +183,6 @@ __db_prbtree(dbp) }; BTMETA *mp; BTREE *t; - DB_LOCK lock; EPG *epg; FILE *fp; RECNO *rp; @@ -195,8 +194,6 @@ __db_prbtree(dbp) (void)fprintf(fp, "%s\nOn-page metadata:\n", DB_LINE); i = PGNO_METADATA; - if ((ret = __bam_lget(dbp, 0, PGNO_METADATA, DB_LOCK_READ, &lock)) != 0) - return (ret); if ((ret = __bam_pget(dbp, (PAGE **)&mp, &i, 0)) != 0) return (ret); @@ -211,7 +208,6 @@ __db_prbtree(dbp) __db_prflags(mp->flags, mfn); (void)fprintf(fp, "\n"); (void)memp_fput(dbp->mpf, mp, 0); - (void)__bam_lput(dbp, lock); (void)fprintf(fp, "%s\nDB_INFO:\n", DB_LINE); (void)fprintf(fp, "bt_maxkey: %lu bt_minkey: %lu\n", @@ -416,7 +412,8 @@ __db_prpage(h, all) (TYPE(h) == P_LRECNO && h->pgno == PGNO_ROOT)) fprintf(fp, " total records: %4lu", (u_long)RE_NREC(h)); fprintf(fp, "\n"); - if (TYPE(h) == P_LBTREE || TYPE(h) == P_LRECNO) + if (TYPE(h) == P_LBTREE || TYPE(h) == P_LRECNO || + TYPE(h) == P_DUPLICATE || TYPE(h) == P_OVERFLOW) fprintf(fp, " prev: %4lu next: %4lu", (u_long)PREV_PGNO(h), (u_long)NEXT_PGNO(h)); if (TYPE(h) == P_IBTREE || TYPE(h) == P_LBTREE) diff --git a/db2/db/db_rec.c b/db2/db/db_rec.c index 900b0ed579..2c9ca9abe0 100644 --- a/db2/db/db_rec.c +++ b/db2/db/db_rec.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_rec.c 10.8 (Sleepycat) 8/22/97"; +static const char sccsid[] = "@(#)db_rec.c 10.10 (Sleepycat) 11/2/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -330,7 +330,7 @@ out: REC_CLOSE; /* * __db_ovref_recover -- - * Recovery function for __db_ioff(). + * Recovery function for __db_ovref(). * * PUBLIC: int __db_ovref_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); */ @@ -357,22 +357,21 @@ __db_ovref_recover(logp, dbtp, lsnp, redo, info) } modified = 0; - if (log_compare(lsnp, &argp->lsn) == 0 && redo) { + if (log_compare(&LSN(pagep), &argp->lsn) == 0 && redo) { /* Need to redo update described. */ - ++OV_REF(pagep); + OV_REF(pagep) += argp->adjust; pagep->lsn = *lsnp; modified = 1; } else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) { /* Need to undo update described. */ - --OV_REF(pagep); + OV_REF(pagep) -= argp->adjust; pagep->lsn = argp->lsn; modified = 1; } - ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0); - - *lsnp = argp->prev_lsn; + if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0) + *lsnp = argp->prev_lsn; out: REC_CLOSE; } @@ -413,7 +412,7 @@ __db_relink_recover(logp, dbtp, lsnp, redo, info) goto next; } modified = 0; - if (log_compare(lsnp, &argp->lsn) == 0 && redo) { + if (log_compare(&LSN(pagep), &argp->lsn) == 0 && redo) { /* Redo the relink. */ pagep->lsn = *lsnp; modified = 1; @@ -438,7 +437,7 @@ next: if ((ret = memp_fget(mpf, &argp->next, 0, &pagep)) != 0) { goto prev; } modified = 0; - if (log_compare(lsnp, &argp->lsn_next) == 0 && redo) { + if (log_compare(&LSN(pagep), &argp->lsn_next) == 0 && redo) { /* Redo the relink. */ pagep->prev_pgno = argp->prev; @@ -464,7 +463,7 @@ prev: if ((ret = memp_fget(mpf, &argp->prev, 0, &pagep)) != 0) { goto done; } modified = 0; - if (log_compare(lsnp, &argp->lsn_prev) == 0 && redo) { + if (log_compare(&LSN(pagep), &argp->lsn_prev) == 0 && redo) { /* Redo the relink. */ pagep->next_pgno = argp->next; diff --git a/db2/db/db_ret.c b/db2/db/db_ret.c index ee2bc82f87..bcec308b95 100644 --- a/db2/db/db_ret.c +++ b/db2/db/db_ret.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_ret.c 10.7 (Sleepycat) 9/15/97"; +static const char sccsid[] = "@(#)db_ret.c 10.8 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -122,7 +122,7 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc) */ if (F_ISSET(dbt, DB_DBT_MALLOC)) { dbt->data = db_malloc == NULL ? - (void *)malloc(len + 1) : + (void *)__db_malloc(len + 1) : (void *)db_malloc(len + 1); if (dbt->data == NULL) return (ENOMEM); @@ -134,8 +134,8 @@ __db_retcopy(dbt, data, len, memp, memsize, db_malloc) } else { if (*memsize == 0 || *memsize < len) { *memp = *memp == NULL ? - (void *)malloc(len + 1) : - (void *)realloc(*memp, len + 1); + (void *)__db_malloc(len + 1) : + (void *)__db_realloc(*memp, len + 1); if (*memp == NULL) { *memsize = 0; return (ENOMEM); diff --git a/db2/db/db_thread.c b/db2/db/db_thread.c index 170baf5345..d9086918dd 100644 --- a/db2/db/db_thread.c +++ b/db2/db/db_thread.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_thread.c 8.12 (Sleepycat) 9/23/97"; +static const char sccsid[] = "@(#)db_thread.c 8.13 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -42,8 +42,7 @@ __db_gethandle(dbp, am_func, dbpp) DB *ret_dbp; int ret, t_ret; - if ((ret = __db_mutex_lock((db_mutex_t *)dbp->mutexp, -1, - dbp->dbenv == NULL ? NULL : dbp->dbenv->db_yield)) != 0) + if ((ret = __db_mutex_lock((db_mutex_t *)dbp->mutexp, -1)) != 0) return (ret); if ((ret_dbp = LIST_FIRST(&dbp->handleq)) != NULL) @@ -51,7 +50,7 @@ __db_gethandle(dbp, am_func, dbpp) LIST_REMOVE(ret_dbp, links); else { /* Allocate a new handle. */ - if ((ret_dbp = (DB *)malloc(sizeof(*dbp))) == NULL) { + if ((ret_dbp = (DB *)__db_malloc(sizeof(*dbp))) == NULL) { ret = ENOMEM; goto err; } @@ -94,8 +93,7 @@ __db_puthandle(dbp) int ret; master = dbp->master; - if ((ret = __db_mutex_lock((db_mutex_t *)master->mutexp, -1, - dbp->dbenv == NULL ? NULL : dbp->dbenv->db_yield)) != 0) + if ((ret = __db_mutex_lock((db_mutex_t *)master->mutexp, -1)) != 0) return (ret); LIST_INSERT_HEAD(&master->handleq, dbp, links); diff --git a/db2/db185/db185.c b/db2/db185/db185.c index bf5e37edcb..1affdcdf0d 100644 --- a/db2/db185/db185.c +++ b/db2/db185/db185.c @@ -11,7 +11,7 @@ static const char copyright[] = "@(#) Copyright (c) 1997\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db185.c 8.13 (Sleepycat) 8/24/97"; +static const char sccsid[] = "@(#)db185.c 8.14 (Sleepycat) 10/25/97"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -52,7 +52,7 @@ __dbopen(file, oflags, mode, type, openinfo) DB_INFO dbinfo, *dbinfop; int s_errno; - if ((db185p = (DB185 *)calloc(1, sizeof(DB185))) == NULL) + if ((db185p = (DB185 *)__db_calloc(1, sizeof(DB185))) == NULL) return (NULL); dbinfop = NULL; memset(&dbinfo, 0, sizeof(dbinfo)); @@ -119,7 +119,7 @@ __dbopen(file, oflags, mode, type, openinfo) */ if (file != NULL) { if (oflags & O_CREAT && __db_exists(file, NULL) != 0) - (void)close(open(file, oflags, mode)); + (void)__os_close(open(file, oflags, mode)); dbinfop->re_source = (char *)file; file = NULL; } @@ -131,7 +131,7 @@ __dbopen(file, oflags, mode, type, openinfo) */ #define BFMSG "DB: DB 1.85's recno bfname field is not supported.\n" if (ri->bfname != NULL) { - (void)write(2, BFMSG, sizeof(BFMSG) - 1); + (void)__os_write(2, BFMSG, sizeof(BFMSG) - 1); goto einval; } @@ -183,7 +183,7 @@ __dbopen(file, oflags, mode, type, openinfo) */ if ((__set_errno(db_open(file, type, __db_oflags(oflags), mode, NULL, dbinfop, &dbp))) != 0) { - free(db185p); + __db_free(db185p); return (NULL); } @@ -192,7 +192,7 @@ __dbopen(file, oflags, mode, type, openinfo) != 0) { s_errno = errno; (void)dbp->close(dbp, 0); - free(db185p); + __db_free(db185p); __set_errno(s_errno); return (NULL); } @@ -200,7 +200,7 @@ __dbopen(file, oflags, mode, type, openinfo) db185p->internal = dbp; return (db185p); -einval: free(db185p); +einval: __db_free(db185p); __set_errno(EINVAL); return (NULL); } @@ -216,7 +216,7 @@ db185_close(db185p) __set_errno(dbp->close(dbp, 0)); - free(db185p); + __db_free(db185p); return (errno == 0 ? 0 : -1); } @@ -461,7 +461,7 @@ db185_sync(db185p, flags) * We can't support the R_RECNOSYNC flag. */ #define RSMSG "DB: DB 1.85's R_RECNOSYNC sync flag is not supported.\n" - (void)write(2, RSMSG, sizeof(RSMSG) - 1); + (void)__os_write(2, RSMSG, sizeof(RSMSG) - 1); goto einval; default: goto einval; diff --git a/db2/db_int.h b/db2/db_int.h index 56dfddb73f..1f6c790345 100644 --- a/db2/db_int.h +++ b/db2/db_int.h @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)db_int.h.src 10.30 (Sleepycat) 9/23/97 + * @(#)db_int.h.src 10.36 (Sleepycat) 10/31/97 */ #ifndef _DB_INTERNAL_H_ @@ -12,6 +12,7 @@ #include "db.h" /* Standard DB include file. */ #include "queue.h" +#include "os_func.h" #include "os_ext.h" /******************************************************* @@ -64,12 +65,16 @@ #undef SSZA #define SSZA(name, field) ((int)&(((name *)0)->field[0])) +/* Macros to return per-process address, offsets based on shared regions. */ +#define R_ADDR(base, offset) ((void *)((u_int8_t *)((base)->addr) + offset)) +#define R_OFFSET(base, p) ((u_int8_t *)(p) - (u_int8_t *)(base)->addr) + /* Free and free-string macros that overwrite memory during debugging. */ #ifdef DEBUG #undef FREE #define FREE(p, len) { \ memset(p, 0xff, len); \ - free(p); \ + __db_free(p); \ } #undef FREES #define FREES(p) { \ @@ -78,17 +83,17 @@ #else #undef FREE #define FREE(p, len) { \ - free(p); \ + __db_free(p); \ } #undef FREES #define FREES(p) { \ - free(p); \ + __db_free(p); \ } #endif /* Structure used to print flag values. */ typedef struct __fn { - u_int32_t mask; /* Flag value. */ + u_int32_t mask; /* Flag value. */ const char *name; /* Flag name. */ } FN; @@ -163,10 +168,8 @@ typedef struct _db_mutex_t { off_t off; /* Backing file offset. */ u_long pid; /* Lock holder: 0 or process pid. */ #endif -#ifdef MUTEX_STATISTICS - u_long mutex_set_wait; /* Blocking mutex: required waiting. */ - u_long mutex_set_nowait; /* Blocking mutex: without waiting. */ -#endif + u_int32_t mutex_set_wait; /* Granted after wait. */ + u_int32_t mutex_set_nowait; /* Granted without waiting. */ } db_mutex_t; #include "mutex_ext.h" @@ -177,11 +180,10 @@ typedef struct _db_mutex_t { /* Lock/unlock a DB thread. */ #define DB_THREAD_LOCK(dbp) \ (F_ISSET(dbp, DB_AM_THREAD) ? \ - __db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1, \ - (dbp)->dbenv == NULL ? NULL : (dbp)->dbenv->db_yield) : 0) + __db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1) : 0) #define DB_THREAD_UNLOCK(dbp) \ (F_ISSET(dbp, DB_AM_THREAD) ? \ - __db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1) : 0) + __db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1) : 0) /* Btree/recno local statistics structure. */ struct __db_bt_lstat; typedef struct __db_bt_lstat DB_BTREE_LSTAT; @@ -260,7 +262,7 @@ typedef struct __dbpginfo { #define IS_ZERO_LSN(LSN) ((LSN).file == 0) /* Test if we need to log a change. */ -#define DB_LOGGING(dbp) \ +#define DB_LOGGING(dbp) \ (F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER)) #ifdef DEBUG diff --git a/db2/hash/hash.c b/db2/hash/hash.c index d986e08087..c08495378e 100644 --- a/db2/hash/hash.c +++ b/db2/hash/hash.c @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)hash.c 10.27 (Sleepycat) 9/15/97"; +static const char sccsid[] = "@(#)hash.c 10.33 (Sleepycat) 11/2/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -79,7 +79,7 @@ static int __ham_cursor __P((DB *, DB_TXN *, DBC **)); static int __ham_delete __P((DB *, DB_TXN *, DBT *, int)); static int __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, int)); static int __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, int)); -static void __ham_init_htab __P((HTAB *)); +static void __ham_init_htab __P((HTAB *, u_int)); static int __ham_lookup __P((HTAB *, HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t)); static int __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *)); @@ -106,7 +106,7 @@ __ham_open(dbp, dbinfo) dbenv = dbp->dbenv; - if ((hashp = (HTAB *)calloc(1, sizeof(HTAB))) == NULL) + if ((hashp = (HTAB *)__db_calloc(1, sizeof(HTAB))) == NULL) return (ENOMEM); hashp->dbp = dbp; @@ -175,10 +175,9 @@ __ham_open(dbp, dbinfo) goto out; } - hashp->hdr->nelem = dbinfo != NULL ? dbinfo->h_nelem : 0; hashp->hdr->ffactor = dbinfo != NULL && dbinfo->h_ffactor ? dbinfo->h_ffactor : 0; - __ham_init_htab(hashp); + __ham_init_htab(hashp, dbinfo != NULL ? dbinfo->h_nelem : 0); if (F_ISSET(dbp, DB_AM_DUP)) F_SET(hashp->hdr, DB_HASH_DUP); if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0) @@ -190,7 +189,7 @@ __ham_open(dbp, dbinfo) TAILQ_INSERT_TAIL(&dbp->curs_queue, curs, links); /* Allocate memory for our split buffer. */ - if ((hashp->split_buf = (PAGE *)malloc(dbp->pgsize)) == NULL) { + if ((hashp->split_buf = (PAGE *)__db_malloc(dbp->pgsize)) == NULL) { ret = ENOMEM; goto out; } @@ -265,13 +264,13 @@ __ham_close(dbp) * Returns 0 on No Error */ static void -__ham_init_htab(hashp) +__ham_init_htab(hashp, nelem) HTAB *hashp; + u_int nelem; { - u_int32_t nelem; int32_t l2, nbuckets; - nelem = hashp->hdr->nelem; + hashp->hdr->nelem = 0; hashp->hdr->pagesize = hashp->dbp->pgsize; ZERO_LSN(hashp->hdr->lsn); hashp->hdr->magic = DB_HASHMAGIC; @@ -502,11 +501,11 @@ __ham_c_init(dbp, txnid, dbcp) DBC *db_curs; HASH_CURSOR *new_curs; - if ((db_curs = (DBC *)calloc(sizeof(DBC), 1)) == NULL) + if ((db_curs = (DBC *)__db_calloc(sizeof(DBC), 1)) == NULL) return (ENOMEM); if ((new_curs = - (HASH_CURSOR *)calloc(sizeof(struct cursor_t), 1)) == NULL) { + (HASH_CURSOR *)__db_calloc(sizeof(struct cursor_t), 1)) == NULL) { FREE(db_curs, sizeof(DBC)); return (ENOMEM); } @@ -555,7 +554,7 @@ __ham_delete(dbp, txn, key, flags) hashp->hash_accesses++; if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_WRITE)) == 0) if (F_ISSET(hcp, H_OK)) - ret = __ham_del_pair(hashp, hcp); + ret = __ham_del_pair(hashp, hcp, 1); else ret = DB_NOTFOUND; @@ -669,30 +668,41 @@ __ham_c_del(cursor, flags) if ((ret = __ham_get_cpage(hashp, hcp, DB_LOCK_WRITE)) != 0) goto out; if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) { - ppgno = PREV_PGNO(hcp->dpagep); - - /* Remove item from duplicate page. */ - chg_pgno = hcp->dpgno; - if ((ret = __db_drem(hashp->dbp, - &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0) - goto out; - /* + * We are about to remove a duplicate from offpage. + * * There are 4 cases. - * 1. We removed an item on a page, but nothing else changed. - * 2. We removed the last item on a page, but there is a + * 1. We will remove an item on a page, but there are more + * items on that page. + * 2. We will remove the last item on a page, but there is a * following page of duplicates. - * 3. We removed the last item on a page, this page was the + * 3. We will remove the last item on a page, this page was the * last page in a duplicate set, but there were dups before * it. - * 4. We removed the last item on a page, removing the last + * 4. We will remove the last item on a page, removing the last * duplicate. * In case 1 hcp->dpagep is unchanged. * In case 2 hcp->dpagep comes back pointing to the next dup * page. * In case 3 hcp->dpagep comes back NULL. * In case 4 hcp->dpagep comes back NULL. + * + * Case 4 results in deleting the pair off the master page. + * The normal code for doing this knows how to delete the + * duplicates, so we will handle this case in the normal code. */ + ppgno = PREV_PGNO(hcp->dpagep); + if (ppgno == PGNO_INVALID && + NEXT_PGNO(hcp->dpagep) == PGNO_INVALID && + NUM_ENT(hcp->dpagep) == 1) + goto normal; + + /* Remove item from duplicate page. */ + chg_pgno = hcp->dpgno; + if ((ret = __db_drem(hashp->dbp, + &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0) + goto out; + if (hcp->dpagep == NULL) { if (ppgno != PGNO_INVALID) { /* Case 3 */ hcp->dpgno = ppgno; @@ -702,7 +712,7 @@ __ham_c_del(cursor, flags) hcp->dndx = NUM_ENT(hcp->dpagep); F_SET(hcp, H_DELETED); } else { /* Case 4 */ - ret = __ham_del_pair(hashp, hcp); + ret = __ham_del_pair(hashp, hcp, 1); hcp->dpgno = PGNO_INVALID; /* * Delpair updated the cursor queue, so we @@ -718,14 +728,14 @@ __ham_c_del(cursor, flags) H_DATAINDEX(hcp->bndx))), &hcp->dpgno, sizeof(db_pgno_t)); F_SET(hcp, H_DELETED); - } else /* Case 1 */ + } else /* Case 1 */ F_SET(hcp, H_DELETED); if (chg_pgno != PGNO_INVALID) __ham_c_update(hashp, hcp, chg_pgno, 0, 0, 1); } else if (F_ISSET(hcp, H_ISDUP)) { /* on page */ if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) == LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx)) - ret = __ham_del_pair(hashp, hcp); + ret = __ham_del_pair(hashp, hcp, 1); else { DBT repldbt; @@ -736,14 +746,14 @@ __ham_c_del(cursor, flags) repldbt.size = 0; ret = __ham_replpair(hashp, hcp, &repldbt, 0); hcp->dup_tlen -= DUP_SIZE(hcp->dup_len); + F_SET(hcp, H_DELETED); __ham_c_update(hashp, hcp, hcp->pgno, DUP_SIZE(hcp->dup_len), 0, 1); - F_SET(hcp, H_DELETED); } } else /* Not a duplicate */ - ret = __ham_del_pair(hashp, hcp); +normal: ret = __ham_del_pair(hashp, hcp, 1); out: if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0) t_ret = ret; @@ -975,8 +985,8 @@ int __ham_expand_table(hashp) HTAB *hashp; { - u_int32_t old_bucket, new_bucket; - u_int32_t spare_ndx; + DB_LSN new_lsn; + u_int32_t old_bucket, new_bucket, spare_ndx; int ret; ret = 0; @@ -984,9 +994,30 @@ __ham_expand_table(hashp) if (ret) return (ret); - if (DB_LOGGING(hashp->dbp)) { - DB_LSN new_lsn; + /* + * If the split point is about to increase, make sure that we + * have enough extra pages. The calculation here is weird. + * We'd like to do this after we've upped max_bucket, but it's + * too late then because we've logged the meta-data split. What + * we'll do between then and now is increment max bucket and then + * see what the log of one greater than that is; here we have to + * look at the log of max + 2. VERY NASTY STUFF. + */ + if (__db_log2(hashp->hdr->max_bucket + 2) > hashp->hdr->ovfl_point) { + /* + * We are about to shift the split point. Make sure that + * if the next doubling is going to be big (more than 8 + * pages), we have some extra pages around. + */ + if (hashp->hdr->max_bucket + 1 >= 8 && + hashp->hdr->spares[hashp->hdr->ovfl_point] < + hashp->hdr->spares[hashp->hdr->ovfl_point - 1] + + hashp->hdr->ovfl_point + 1) + __ham_init_ovflpages(hashp); + } + /* Now we can log the meta-data split. */ + if (DB_LOGGING(hashp->dbp)) { if ((ret = __ham_splitmeta_log(hashp->dbp->dbenv->lg_info, (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, hashp->dbp->log_fileid, @@ -1003,22 +1034,11 @@ __ham_expand_table(hashp) old_bucket = (hashp->hdr->max_bucket & hashp->hdr->low_mask); /* - * If the split point is increasing (hdr.max_bucket's log base 2 - * increases), max sure that we have enough extra pages, then - * copy the current contents of the spare split bucket to the - * next bucket. + * If the split point is increasing, copy the current contents + * of the spare split bucket to the next bucket. */ spare_ndx = __db_log2(hashp->hdr->max_bucket + 1); if (spare_ndx > hashp->hdr->ovfl_point) { - /* - * We are about to shift the split point. Make sure that - * if the next doubling is going to be big (more than 8 - * pages), we have some extra pages around. - */ - if (hashp->hdr->spares[hashp->hdr->ovfl_point] == 0 && - new_bucket >= 8) - __ham_init_ovflpages(hashp); - hashp->hdr->spares[spare_ndx] = hashp->hdr->spares[hashp->hdr->ovfl_point]; hashp->hdr->ovfl_point = spare_ndx; @@ -1306,7 +1326,7 @@ __ham_init_dbt(dbt, size, bufp, sizep) memset(dbt, 0, sizeof(*dbt)); if (*sizep < size) { if ((*bufp = (void *)(*bufp == NULL ? - malloc(size) : realloc(*bufp, size))) == NULL) { + __db_malloc(size) : __db_realloc(*bufp, size))) == NULL) { *sizep = 0; return (ENOMEM); } @@ -1352,9 +1372,20 @@ __ham_c_update(hashp, hcp, chg_pgno, len, add, dup) if (!dup && add) return; - page_deleted = chg_pgno != PGNO_INVALID && - ((!dup && chg_pgno != hcp->pgno) || - (dup && chg_pgno != hcp->dpgno)); + /* + * Determine if a page was deleted. If this is a regular update + * (i.e., not dup) then the deleted page's number will be that in + * chg_pgno, and the pgno in the cursor will be different. If this + * was an onpage-duplicate, then the same conditions apply. If this + * was an off-page duplicate, then we need to verify if hcp->dpgno + * is the same (no delete) or different (delete) than chg_pgno. + */ + if (!dup || hcp->dpgno == PGNO_INVALID) + page_deleted = + chg_pgno != PGNO_INVALID && chg_pgno != hcp->pgno; + else + page_deleted = + chg_pgno != PGNO_INVALID && chg_pgno != hcp->dpgno; hp = hcp->db_cursor->dbp->master->internal; DB_THREAD_LOCK(hp->dbp); @@ -1432,7 +1463,7 @@ __ham_hdup(orig, new) DBC *curs; int ret; - if ((hashp = (HTAB *)malloc(sizeof(HTAB))) == NULL) + if ((hashp = (HTAB *)__db_malloc(sizeof(HTAB))) == NULL) return (ENOMEM); new->internal = hashp; @@ -1441,7 +1472,7 @@ __ham_hdup(orig, new) hashp->hlock = 0; hashp->hdr = NULL; hashp->hash = ((HTAB *)orig->internal)->hash; - if ((hashp->split_buf = (PAGE *)malloc(orig->pgsize)) == NULL) + if ((hashp->split_buf = (PAGE *)__db_malloc(orig->pgsize)) == NULL) return (ENOMEM); hashp->local_errno = 0; hashp->hash_accesses = 0; diff --git a/db2/hash/hash.src b/db2/hash/hash.src index 04a98d3cb3..8cbcee73f7 100644 --- a/db2/hash/hash.src +++ b/db2/hash/hash.src @@ -43,7 +43,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)hash.src 10.1 (Sleepycat) 4/12/97 + * @(#)hash.src 10.2 (Sleepycat) 11/2/97 */ #include "config.h" @@ -207,5 +207,27 @@ ARG fileid u_int32_t lu ARG start_pgno db_pgno_t lu ARG npages u_int32_t lu ARG free_pgno db_pgno_t lu +ARG ovflpoint u_int32_t lu POINTER metalsn DB_LSN * lu END + +/* + * Used when we empty the first page in a bucket and there are pages + * after it. The page after it gets copied into the bucket page (since + * bucket pages have to be in fixed locations). + * pgno: the bucket page + * pagelsn: the old LSN on the bucket page + * next_pgno: the page number of the next page + * nnext_pgno: page after next_pgno (may need to change its prev) + * nnextlsn: the LSN of nnext_pgno. + */ +BEGIN copypage +ARG fileid u_int32_t lu +ARG pgno db_pgno_t lu +POINTER pagelsn DB_LSN * lu +ARG next_pgno db_pgno_t lu +POINTER nextlsn DB_LSN * lu +ARG nnext_pgno db_pgno_t lu +POINTER nnextlsn DB_LSN * lu +DBT page DBT s +END diff --git a/db2/hash/hash_auto.c b/db2/hash/hash_auto.c index 2279de9668..4820eb8611 100644 --- a/db2/hash/hash_auto.c +++ b/db2/hash/hash_auto.c @@ -61,7 +61,7 @@ int __ham_insdel_log(logp, txnid, ret_lsnp, flags, + sizeof(*pagelsn) + sizeof(u_int32_t) + (key == NULL ? 0 : key->size) + sizeof(u_int32_t) + (data == NULL ? 0 : data->size); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -111,7 +111,7 @@ int __ham_insdel_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -172,7 +172,7 @@ __ham_insdel_print(notused1, dbtp, lsnp, notused3, notused4) } printf("\n"); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -187,7 +187,7 @@ __ham_insdel_read(recbuf, argpp) __ham_insdel_args *argp; u_int8_t *bp; - argp = (__ham_insdel_args *)malloc(sizeof(__ham_insdel_args) + + argp = (__ham_insdel_args *)__db_malloc(sizeof(__ham_insdel_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -266,7 +266,7 @@ int __ham_newpage_log(logp, txnid, ret_lsnp, flags, + sizeof(*pagelsn) + sizeof(next_pgno) + sizeof(*nextlsn); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -308,7 +308,7 @@ int __ham_newpage_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -356,7 +356,7 @@ __ham_newpage_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tnextlsn: [%lu][%lu]\n", (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -371,7 +371,7 @@ __ham_newpage_read(recbuf, argpp) __ham_newpage_args *argp; u_int8_t *bp; - argp = (__ham_newpage_args *)malloc(sizeof(__ham_newpage_args) + + argp = (__ham_newpage_args *)__db_malloc(sizeof(__ham_newpage_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -441,7 +441,7 @@ int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags, + sizeof(ovflpoint) + sizeof(spares) + sizeof(*metalsn); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -471,7 +471,7 @@ int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -514,7 +514,7 @@ __ham_splitmeta_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tmetalsn: [%lu][%lu]\n", (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -529,7 +529,7 @@ __ham_splitmeta_read(recbuf, argpp) __ham_splitmeta_args *argp; u_int8_t *bp; - argp = (__ham_splitmeta_args *)malloc(sizeof(__ham_splitmeta_args) + + argp = (__ham_splitmeta_args *)__db_malloc(sizeof(__ham_splitmeta_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -594,7 +594,7 @@ int __ham_splitdata_log(logp, txnid, ret_lsnp, flags, + sizeof(pgno) + sizeof(u_int32_t) + (pageimage == NULL ? 0 : pageimage->size) + sizeof(*pagelsn); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -632,7 +632,7 @@ int __ham_splitdata_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -683,7 +683,7 @@ __ham_splitdata_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tpagelsn: [%lu][%lu]\n", (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -698,7 +698,7 @@ __ham_splitdata_read(recbuf, argpp) __ham_splitdata_args *argp; u_int8_t *bp; - argp = (__ham_splitdata_args *)malloc(sizeof(__ham_splitdata_args) + + argp = (__ham_splitdata_args *)__db_malloc(sizeof(__ham_splitdata_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -772,7 +772,7 @@ int __ham_replace_log(logp, txnid, ret_lsnp, flags, + sizeof(u_int32_t) + (olditem == NULL ? 0 : olditem->size) + sizeof(u_int32_t) + (newitem == NULL ? 0 : newitem->size) + sizeof(makedup); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -824,7 +824,7 @@ int __ham_replace_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -886,7 +886,7 @@ __ham_replace_print(notused1, dbtp, lsnp, notused3, notused4) printf("\n"); printf("\tmakedup: %lu\n", (u_long)argp->makedup); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -901,7 +901,7 @@ __ham_replace_read(recbuf, argpp) __ham_replace_args *argp; u_int8_t *bp; - argp = (__ham_replace_args *)malloc(sizeof(__ham_replace_args) + + argp = (__ham_replace_args *)__db_malloc(sizeof(__ham_replace_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -985,7 +985,7 @@ int __ham_newpgno_log(logp, txnid, ret_lsnp, flags, + sizeof(new_type) + sizeof(*pagelsn) + sizeof(*metalsn); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -1026,7 +1026,7 @@ int __ham_newpgno_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -1074,7 +1074,7 @@ __ham_newpgno_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tmetalsn: [%lu][%lu]\n", (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -1089,7 +1089,7 @@ __ham_newpgno_read(recbuf, argpp) __ham_newpgno_args *argp; u_int8_t *bp; - argp = (__ham_newpgno_args *)malloc(sizeof(__ham_newpgno_args) + + argp = (__ham_newpgno_args *)__db_malloc(sizeof(__ham_newpgno_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -1127,10 +1127,10 @@ __ham_newpgno_read(recbuf, argpp) * PUBLIC: int __ham_ovfl_log * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, db_pgno_t, - * PUBLIC: DB_LSN *)); + * PUBLIC: u_int32_t, DB_LSN *)); */ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags, - fileid, start_pgno, npages, free_pgno, metalsn) + fileid, start_pgno, npages, free_pgno, ovflpoint, metalsn) DB_LOG *logp; DB_TXN *txnid; DB_LSN *ret_lsnp; @@ -1139,6 +1139,7 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags, db_pgno_t start_pgno; u_int32_t npages; db_pgno_t free_pgno; + u_int32_t ovflpoint; DB_LSN * metalsn; { DBT logrec; @@ -1160,8 +1161,9 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags, + sizeof(start_pgno) + sizeof(npages) + sizeof(free_pgno) + + sizeof(ovflpoint) + sizeof(*metalsn); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -1179,6 +1181,8 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags, bp += sizeof(npages); memcpy(bp, &free_pgno, sizeof(free_pgno)); bp += sizeof(free_pgno); + memcpy(bp, &ovflpoint, sizeof(ovflpoint)); + bp += sizeof(ovflpoint); if (metalsn != NULL) memcpy(bp, metalsn, sizeof(*metalsn)); else @@ -1191,7 +1195,7 @@ int __ham_ovfl_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -1231,10 +1235,11 @@ __ham_ovfl_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tstart_pgno: %lu\n", (u_long)argp->start_pgno); printf("\tnpages: %lu\n", (u_long)argp->npages); printf("\tfree_pgno: %lu\n", (u_long)argp->free_pgno); + printf("\tovflpoint: %lu\n", (u_long)argp->ovflpoint); printf("\tmetalsn: [%lu][%lu]\n", (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -1249,7 +1254,7 @@ __ham_ovfl_read(recbuf, argpp) __ham_ovfl_args *argp; u_int8_t *bp; - argp = (__ham_ovfl_args *)malloc(sizeof(__ham_ovfl_args) + + argp = (__ham_ovfl_args *)__db_malloc(sizeof(__ham_ovfl_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -1269,6 +1274,8 @@ __ham_ovfl_read(recbuf, argpp) bp += sizeof(argp->npages); memcpy(&argp->free_pgno, bp, sizeof(argp->free_pgno)); bp += sizeof(argp->free_pgno); + memcpy(&argp->ovflpoint, bp, sizeof(argp->ovflpoint)); + bp += sizeof(argp->ovflpoint); memcpy(&argp->metalsn, bp, sizeof(argp->metalsn)); bp += sizeof(argp->metalsn); *argpp = argp; @@ -1276,6 +1283,207 @@ __ham_ovfl_read(recbuf, argpp) } /* + * PUBLIC: int __ham_copypage_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, + * PUBLIC: DB_LSN *, db_pgno_t, DB_LSN *, DBT *)); + */ +int __ham_copypage_log(logp, txnid, ret_lsnp, flags, + fileid, pgno, pagelsn, next_pgno, nextlsn, nnext_pgno, + nnextlsn, page) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN * pagelsn; + db_pgno_t next_pgno; + DB_LSN * nextlsn; + db_pgno_t nnext_pgno; + DB_LSN * nnextlsn; + DBT *page; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t zero; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_ham_copypage; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(fileid) + + sizeof(pgno) + + sizeof(*pagelsn) + + sizeof(next_pgno) + + sizeof(*nextlsn) + + sizeof(nnext_pgno) + + sizeof(*nnextlsn) + + sizeof(u_int32_t) + (page == NULL ? 0 : page->size); + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &pgno, sizeof(pgno)); + bp += sizeof(pgno); + if (pagelsn != NULL) + memcpy(bp, pagelsn, sizeof(*pagelsn)); + else + memset(bp, 0, sizeof(*pagelsn)); + bp += sizeof(*pagelsn); + memcpy(bp, &next_pgno, sizeof(next_pgno)); + bp += sizeof(next_pgno); + if (nextlsn != NULL) + memcpy(bp, nextlsn, sizeof(*nextlsn)); + else + memset(bp, 0, sizeof(*nextlsn)); + bp += sizeof(*nextlsn); + memcpy(bp, &nnext_pgno, sizeof(nnext_pgno)); + bp += sizeof(nnext_pgno); + if (nnextlsn != NULL) + memcpy(bp, nnextlsn, sizeof(*nnextlsn)); + else + memset(bp, 0, sizeof(*nnextlsn)); + bp += sizeof(*nnextlsn); + if (page == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &page->size, sizeof(page->size)); + bp += sizeof(page->size); + memcpy(bp, page->data, page->size); + bp += page->size; + } +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + __db_free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __ham_copypage_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__ham_copypage_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __ham_copypage_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if ((ret = __ham_copypage_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]ham_copypage: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tpagelsn: [%lu][%lu]\n", + (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); + printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno); + printf("\tnextlsn: [%lu][%lu]\n", + (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset); + printf("\tnnext_pgno: %lu\n", (u_long)argp->nnext_pgno); + printf("\tnnextlsn: [%lu][%lu]\n", + (u_long)argp->nnextlsn.file, (u_long)argp->nnextlsn.offset); + printf("\tpage: "); + for (i = 0; i < argp->page.size; i++) { + c = ((char *)argp->page.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\n"); + __db_free(argp); + return (0); +} + +/* + * PUBLIC: int __ham_copypage_read __P((void *, __ham_copypage_args **)); + */ +int +__ham_copypage_read(recbuf, argpp) + void *recbuf; + __ham_copypage_args **argpp; +{ + __ham_copypage_args *argp; + u_int8_t *bp; + + argp = (__ham_copypage_args *)__db_malloc(sizeof(__ham_copypage_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->pgno, bp, sizeof(argp->pgno)); + bp += sizeof(argp->pgno); + memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); + bp += sizeof(argp->pagelsn); + memcpy(&argp->next_pgno, bp, sizeof(argp->next_pgno)); + bp += sizeof(argp->next_pgno); + memcpy(&argp->nextlsn, bp, sizeof(argp->nextlsn)); + bp += sizeof(argp->nextlsn); + memcpy(&argp->nnext_pgno, bp, sizeof(argp->nnext_pgno)); + bp += sizeof(argp->nnext_pgno); + memcpy(&argp->nnextlsn, bp, sizeof(argp->nnextlsn)); + bp += sizeof(argp->nnextlsn); + memcpy(&argp->page.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->page.data = bp; + bp += argp->page.size; + *argpp = argp; + return (0); +} + +/* * PUBLIC: int __ham_init_print __P((DB_ENV *)); */ int @@ -1305,6 +1513,9 @@ __ham_init_print(dbenv) if ((ret = __db_add_recovery(dbenv, __ham_ovfl_print, DB_ham_ovfl)) != 0) return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_copypage_print, DB_ham_copypage)) != 0) + return (ret); return (0); } @@ -1338,6 +1549,9 @@ __ham_init_recover(dbenv) if ((ret = __db_add_recovery(dbenv, __ham_ovfl_recover, DB_ham_ovfl)) != 0) return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_copypage_recover, DB_ham_copypage)) != 0) + return (ret); return (0); } diff --git a/db2/hash/hash_dup.c b/db2/hash/hash_dup.c index 71bd1c5eb0..22444e4966 100644 --- a/db2/hash/hash_dup.c +++ b/db2/hash/hash_dup.c @@ -42,7 +42,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)hash_dup.c 10.7 (Sleepycat) 9/15/97"; +static const char sccsid[] = "@(#)hash_dup.c 10.8 (Sleepycat) 10/14/97"; #endif /* not lint */ /* @@ -480,7 +480,7 @@ __ham_check_move(hashp, hcp, add_len) __ham_copy_item(hashp, hcp->pagep, H_DATAINDEX(hcp->bndx), next_pagep); /* Now delete the pair from the current page. */ - ret = __ham_del_pair(hashp, hcp); + ret = __ham_del_pair(hashp, hcp, 0); (void)__ham_put_page(hashp->dbp, hcp->pagep, 1); hcp->pagep = next_pagep; diff --git a/db2/hash/hash_page.c b/db2/hash/hash_page.c index 8ba42da1a4..0a12c14546 100644 --- a/db2/hash/hash_page.c +++ b/db2/hash/hash_page.c @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)hash_page.c 10.24 (Sleepycat) 9/17/97"; +static const char sccsid[] = "@(#)hash_page.c 10.29 (Sleepycat) 11/2/97"; #endif /* not lint */ /* @@ -489,19 +489,20 @@ __ham_putitem(p, dbt, type) /* - * PUBLIC: int __ham_del_pair __P((HTAB *, HASH_CURSOR *)); + * PUBLIC: int __ham_del_pair __P((HTAB *, HASH_CURSOR *, int)); * XXX TODO: if the item is an offdup, delete the other pages and * then remove the pair. If the offpage page is 0, then you can * just remove the pair. */ int -__ham_del_pair(hashp, cursorp) +__ham_del_pair(hashp, cursorp, reclaim_page) HTAB *hashp; HASH_CURSOR *cursorp; + int reclaim_page; { DBT data_dbt, key_dbt; DB_ENV *dbenv; - DB_LSN new_lsn, *n_lsn; + DB_LSN new_lsn, *n_lsn, tmp_lsn; PAGE *p; db_indx_t ndx; db_pgno_t chg_pgno, pgno; @@ -542,6 +543,15 @@ __ham_del_pair(hashp, cursorp) HOFFDUP_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))), sizeof(db_pgno_t)); ret = __db_ddup(hashp->dbp, pgno, __ham_del_page); + F_CLR(cursorp, H_ISDUP); + break; + case H_DUPLICATE: + /* + * If we delete a pair that is/was a duplicate, then + * we had better clear the flag so that we update the + * cursor appropriately. + */ + F_CLR(cursorp, H_ISDUP); break; } @@ -578,13 +588,13 @@ __ham_del_pair(hashp, cursorp) --hashp->hdr->nelem; /* - * Check if the page is empty. There are two cases. If it's - * empty and it's not the first chain in the bucket (i.e., the - * bucket page) then we can simply remove it. If it is the first - * chain in the bucket, then we need to copy the second page into - * it and remove the second page. + * If we need to reclaim the page, then check if the page is empty. + * There are two cases. If it's empty and it's not the first page + * in the bucket (i.e., the bucket page) then we can simply remove + * it. If it is the first chain in the bucket, then we need to copy + * the second page into it and remove the second page. */ - if (NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID && + if (reclaim_page && NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID && NEXT_PGNO(p) != PGNO_INVALID) { PAGE *n_pagep, *nn_pagep; db_pgno_t tmp_pgno; @@ -592,7 +602,6 @@ __ham_del_pair(hashp, cursorp) /* * First page in chain is empty and we know that there * are more pages in the chain. - * XXX Need to log this. */ if ((ret = __ham_get_page(hashp->dbp, NEXT_PGNO(p), &n_pagep)) != 0) @@ -605,13 +614,35 @@ __ham_del_pair(hashp, cursorp) (void) __ham_put_page(hashp->dbp, n_pagep, 0); return (ret); } + } + + if (DB_LOGGING(hashp->dbp)) { + key_dbt.data = n_pagep; + key_dbt.size = hashp->hdr->pagesize; + if ((ret = __ham_copypage_log(dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, + hashp->dbp->log_fileid, PGNO(p), &LSN(p), + PGNO(n_pagep), &LSN(n_pagep), NEXT_PGNO(n_pagep), + NEXT_PGNO(n_pagep) == PGNO_INVALID ? NULL : + &LSN(nn_pagep), &key_dbt)) != 0) + return (ret); + + /* Move lsn onto page. */ + LSN(p) = new_lsn; /* Structure assignment. */ + LSN(n_pagep) = new_lsn; + if (NEXT_PGNO(n_pagep) != PGNO_INVALID) + LSN(nn_pagep) = new_lsn; + } + if (NEXT_PGNO(n_pagep) != PGNO_INVALID) { PREV_PGNO(nn_pagep) = PGNO(p); (void)__ham_put_page(hashp->dbp, nn_pagep, 1); } tmp_pgno = PGNO(p); + tmp_lsn = LSN(p); memcpy(p, n_pagep, hashp->hdr->pagesize); PGNO(p) = tmp_pgno; + LSN(p) = tmp_lsn; PREV_PGNO(p) = PGNO_INVALID; /* @@ -623,7 +654,8 @@ __ham_del_pair(hashp, cursorp) if ((ret = __ham_dirty_page(hashp, p)) != 0 || (ret = __ham_del_page(hashp->dbp, n_pagep)) != 0) return (ret); - } else if (NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) { + } else if (reclaim_page && + NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) { PAGE *n_pagep, *p_pagep; if ((ret = @@ -690,13 +722,22 @@ __ham_del_pair(hashp, cursorp) } __ham_c_update(hashp, cursorp, chg_pgno, 0, 0, 0); + /* + * Since we just deleted a pair from the master page, anything + * in cursorp->dpgno should be cleared. + */ + cursorp->dpgno = PGNO_INVALID; + F_CLR(cursorp, H_OK); return (ret); } + /* + * __ham_replpair -- + * Given the key data indicated by the cursor, replace part/all of it + * according to the fields in the dbt. + * * PUBLIC: int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t)); - * Given the key data indicated by the cursor, replace part/all of it - * according to the fields in the dbt. */ int __ham_replpair(hashp, hcp, dbt, make_dup) @@ -768,7 +809,7 @@ __ham_replpair(hashp, hcp, dbt, make_dup) return (ret); if (dbt->doff == 0 && dbt->dlen == len) { - ret = __ham_del_pair(hashp, hcp); + ret = __ham_del_pair(hashp, hcp, 0); if (ret == 0) ret = __ham_add_el(hashp, hcp, &tmp, dbt, H_KEYDATA); @@ -784,15 +825,15 @@ __ham_replpair(hashp, hcp, dbt, make_dup) goto err; /* Now we can delete the item. */ - if ((ret = __ham_del_pair(hashp, hcp)) != 0) { - free(tdata.data); + if ((ret = __ham_del_pair(hashp, hcp, 0)) != 0) { + __db_free(tdata.data); goto err; } /* Now shift old data around to make room for new. */ if (change > 0) { - tdata.data = (void *) - realloc(tdata.data, tdata.size + change); + tdata.data = (void *)__db_realloc(tdata.data, + tdata.size + change); memset((u_int8_t *)tdata.data + tdata.size, 0, change); } @@ -812,9 +853,9 @@ __ham_replpair(hashp, hcp, dbt, make_dup) /* Now add the pair. */ ret = __ham_add_el(hashp, hcp, &tmp, &tdata, type); - free(tdata.data); + __db_free(tdata.data); } -err: free(tmp.data); +err: __db_free(tmp.data); return (ret); } @@ -1025,7 +1066,7 @@ __ham_split_page(hashp, obucket, nbucket) } } if (big_buf != NULL) - free(big_buf); + __db_free(big_buf); /* * If the original bucket spanned multiple pages, then we've got @@ -1549,17 +1590,20 @@ __ham_init_ovflpages(hp) { DB_LSN new_lsn; PAGE *p; - db_pgno_t last_pgno; - u_int32_t i, numpages; + db_pgno_t last_pgno, new_pgno; + u_int32_t i, curpages, numpages; - numpages = hp->hdr->ovfl_point + 1; + curpages = hp->hdr->spares[hp->hdr->ovfl_point] - + hp->hdr->spares[hp->hdr->ovfl_point - 1]; + numpages = hp->hdr->ovfl_point + 1 - curpages; last_pgno = hp->hdr->last_freed; + new_pgno = PGNO_OF(hp, hp->hdr->ovfl_point, curpages + 1); if (DB_LOGGING(hp->dbp)) { (void)__ham_ovfl_log(hp->dbp->dbenv->lg_info, (DB_TXN *)hp->dbp->txn, &new_lsn, 0, - hp->dbp->log_fileid, PGNO_OF(hp, hp->hdr->ovfl_point, 1), - numpages, last_pgno, &hp->hdr->lsn); + hp->dbp->log_fileid, new_pgno, + numpages, last_pgno, hp->hdr->ovfl_point, &hp->hdr->lsn); hp->hdr->lsn = new_lsn; } else ZERO_LSN(new_lsn); @@ -1567,7 +1611,8 @@ __ham_init_ovflpages(hp) hp->hdr->spares[hp->hdr->ovfl_point] += numpages; for (i = numpages; i > 0; i--) { if (__ham_new_page(hp, - PGNO_OF(hp, hp->hdr->ovfl_point, i), P_INVALID, &p) != 0) + PGNO_OF(hp, hp->hdr->ovfl_point, curpages + i), + P_INVALID, &p) != 0) break; LSN(p) = new_lsn; NEXT_PGNO(p) = last_pgno; diff --git a/db2/hash/hash_rec.c b/db2/hash/hash_rec.c index 1b30be337d..d239e3d0df 100644 --- a/db2/hash/hash_rec.c +++ b/db2/hash/hash_rec.c @@ -47,7 +47,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)hash_rec.c 10.13 (Sleepycat) 9/15/97"; +static const char sccsid[] = "@(#)hash_rec.c 10.14 (Sleepycat) 11/2/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -756,7 +756,6 @@ __ham_ovfl_recover(logp, dbtp, lsnp, redo, info) hashp = (HTAB *)file_dbp->internal; GET_META(file_dbp, hashp); getmeta = 1; - file_dbp = NULL; cmp_n = log_compare(lsnp, &hashp->hdr->lsn); cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn); @@ -764,12 +763,12 @@ __ham_ovfl_recover(logp, dbtp, lsnp, redo, info) if (cmp_p == 0 && redo) { /* Redo the allocation. */ hashp->hdr->last_freed = argp->start_pgno; - hashp->hdr->spares[argp->npages - 1] += argp->npages; + hashp->hdr->spares[argp->ovflpoint] += argp->npages; hashp->hdr->lsn = *lsnp; F_SET(file_dbp, DB_HS_DIRTYMETA); } else if (cmp_n == 0 && !redo) { hashp->hdr->last_freed = argp->free_pgno; - hashp->hdr->spares[argp->npages - 1] -= argp->npages; + hashp->hdr->spares[argp->ovflpoint] -= argp->npages; hashp->hdr->lsn = argp->metalsn; F_SET(file_dbp, DB_HS_DIRTYMETA); } @@ -808,3 +807,142 @@ out: if (getmeta) RELEASE_META(file_dbp, hashp); REC_CLOSE; } + +/* + * __ham_copypage_recover -- + * Recovery function for copypage. + * + * PUBLIC: int __ham_copypage_recover + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__ham_copypage_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __ham_copypage_args *argp; + DB *file_dbp, *mdbp; + DB_MPOOLFILE *mpf; + HTAB *hashp; + PAGE *pagep; + int cmp_n, cmp_p, getmeta, modified, ret; + + getmeta = 0; + hashp = NULL; /* XXX: shut the compiler up. */ + REC_PRINT(__ham_copypage_print); + REC_INTRO(__ham_copypage_read); + + hashp = (HTAB *)file_dbp->internal; + GET_META(file_dbp, hashp); + getmeta = 1; + modified = 0; + + /* This is the bucket page. */ + ret = memp_fget(mpf, &argp->pgno, 0, &pagep); + if (ret != 0) + if (!redo) { + /* + * We are undoing and the page doesn't exist. That + * is equivalent to having a pagelsn of 0, so we + * would not have to undo anything. In this case, + * don't bother creating a page. + */ + ret = 0; + goto donext; + } else if ((ret = memp_fget(mpf, &argp->pgno, + DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + + if (cmp_p == 0 && redo) { + /* Need to redo update described. */ + memcpy(pagep, argp->page.data, argp->page.size); + LSN(pagep) = *lsnp; + modified = 1; + } else if (cmp_n == 0 && !redo) { + /* Need to undo update described. */ + P_INIT(pagep, hashp->hdr->pagesize, argp->pgno, PGNO_INVALID, + argp->next_pgno, 0, P_HASH); + LSN(pagep) = argp->pagelsn; + modified = 1; + } + if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) + goto out; + + /* Now fix up the "next" page. */ +donext: ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep); + if (ret != 0) + if (!redo) { + /* + * We are undoing and the page doesn't exist. That + * is equivalent to having a pagelsn of 0, so we + * would not have to undo anything. In this case, + * don't bother creating a page. + */ + ret = 0; + goto do_nn; + } else if ((ret = memp_fget(mpf, &argp->next_pgno, + DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + /* There is nothing to do in the REDO case; only UNDO. */ + + cmp_n = log_compare(lsnp, &LSN(pagep)); + if (cmp_n == 0 && !redo) { + /* Need to undo update described. */ + memcpy(pagep, argp->page.data, argp->page.size); + modified = 1; + } + if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) + goto out; + + /* Now fix up the next's next page. */ +do_nn: if (argp->nnext_pgno == PGNO_INVALID) { + *lsnp = argp->prev_lsn; + goto out; + } + + ret = memp_fget(mpf, &argp->nnext_pgno, 0, &pagep); + if (ret != 0) + if (!redo) { + /* + * We are undoing and the page doesn't exist. That + * is equivalent to having a pagelsn of 0, so we + * would not have to undo anything. In this case, + * don't bother creating a page. + */ + ret = 0; + *lsnp = argp->prev_lsn; + goto out; + } else if ((ret = memp_fget(mpf, &argp->nnext_pgno, + DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->nnextlsn); + + if (cmp_p == 0 && redo) { + /* Need to redo update described. */ + PREV_PGNO(pagep) = argp->pgno; + LSN(pagep) = *lsnp; + modified = 1; + } else if (cmp_n == 0 && !redo) { + /* Need to undo update described. */ + PREV_PGNO(pagep) = argp->next_pgno; + LSN(pagep) = argp->nnextlsn; + modified = 1; + } + if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) + goto out; + + *lsnp = argp->prev_lsn; + +out: if (getmeta) + RELEASE_META(file_dbp, hashp); + REC_CLOSE; +} diff --git a/db2/include/btree_auto.h b/db2/include/btree_auto.h index b422e1db1b..041b80f196 100644 --- a/db2/include/btree_auto.h +++ b/db2/include/btree_auto.h @@ -58,6 +58,7 @@ typedef struct _bam_rsplit_args { u_int32_t fileid; db_pgno_t pgno; DBT pgdbt; + db_pgno_t nrec; DBT rootent; DB_LSN rootlsn; } __bam_rsplit_args; @@ -105,4 +106,22 @@ typedef struct _bam_cdel_args { u_int32_t indx; } __bam_cdel_args; + +#define DB_bam_repl (DB_bam_BEGIN + 8) + +typedef struct _bam_repl_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN lsn; + u_int32_t indx; + u_int32_t isdeleted; + DBT orig; + DBT repl; + u_int32_t prefix; + u_int32_t suffix; +} __bam_repl_args; + #endif diff --git a/db2/include/btree_ext.h b/db2/include/btree_ext.h index 9133c58c6b..bbe0d971b2 100644 --- a/db2/include/btree_ext.h +++ b/db2/include/btree_ext.h @@ -1,4 +1,4 @@ -/* Do not edit: automatically built by dist/distrib. */ +/* DO NOT EDIT: automatically built by dist/distrib. */ int __bam_close __P((DB *)); int __bam_sync __P((DB *, int)); int __bam_cmp __P((DB *, const DBT *, EPG *)); @@ -35,6 +35,7 @@ int __bam_pget __P((DB *, PAGE **, db_pgno_t *, int)); int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, int)); int __bam_iitem __P((DB *, PAGE **, db_indx_t *, DBT *, DBT *, int, int)); +int __bam_ritem __P((DB *, PAGE *, u_int32_t, DBT *)); int __bam_pg_alloc_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __bam_pg_free_recover @@ -49,6 +50,8 @@ int __bam_cadjust_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __bam_cdel_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_repl_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __ram_open __P((DB *, DBTYPE, DB_INFO *)); int __ram_cursor __P((DB *, DB_TXN *, DBC **)); int __ram_close __P((DB *)); @@ -94,8 +97,8 @@ int __bam_split_print int __bam_split_read __P((void *, __bam_split_args **)); int __bam_rsplit_log __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - u_int32_t, db_pgno_t, DBT *, DBT *, - DB_LSN *)); + u_int32_t, db_pgno_t, DBT *, db_pgno_t, + DBT *, DB_LSN *)); int __bam_rsplit_print __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __bam_rsplit_read __P((void *, __bam_rsplit_args **)); @@ -119,5 +122,13 @@ int __bam_cdel_log int __bam_cdel_print __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __bam_cdel_read __P((void *, __bam_cdel_args **)); +int __bam_repl_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, DB_LSN *, u_int32_t, + u_int32_t, DBT *, DBT *, u_int32_t, + u_int32_t)); +int __bam_repl_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __bam_repl_read __P((void *, __bam_repl_args **)); int __bam_init_print __P((DB_ENV *)); int __bam_init_recover __P((DB_ENV *)); diff --git a/db2/include/clib_ext.h b/db2/include/clib_ext.h index 8ccd2b559f..91e4a13fa5 100644 --- a/db2/include/clib_ext.h +++ b/db2/include/clib_ext.h @@ -1,4 +1,4 @@ -/* Do not edit: automatically built by dist/distrib. */ +/* DO NOT EDIT: automatically built by dist/distrib. */ #ifdef __STDC__ void err __P((int eval, const char *, ...)); #else diff --git a/db2/include/common_ext.h b/db2/include/common_ext.h index 9840162a12..b814582abd 100644 --- a/db2/include/common_ext.h +++ b/db2/include/common_ext.h @@ -1,4 +1,4 @@ -/* Do not edit: automatically built by dist/distrib. */ +/* DO NOT EDIT: automatically built by dist/distrib. */ int __db_appname __P((DB_ENV *, APPNAME, const char *, const char *, int *, char **)); int __db_apprec __P((DB_ENV *, int)); @@ -24,6 +24,7 @@ int __db_ferr __P((const DB_ENV *, const char *, int)); u_int32_t __db_log2 __P((u_int32_t)); int __db_rcreate __P((DB_ENV *, APPNAME, const char *, const char *, int, size_t, int *, void *)); +int __db_rinit __P((DB_ENV *, RLAYOUT *, int, size_t, int)); int __db_ropen __P((DB_ENV *, APPNAME, const char *, const char *, int, int *, void *)); int __db_rclose __P((DB_ENV *, int, void *)); diff --git a/db2/include/db.h.src b/db2/include/db.h.src index 63d9603dba..3cc2bfd4fc 100644 --- a/db2/include/db.h.src +++ b/db2/include/db.h.src @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)db.h.src 10.77 (Sleepycat) 9/24/97 + * @(#)db.h.src 10.91 (Sleepycat) 11/3/97 */ #ifndef _DB_H_ @@ -28,9 +28,15 @@ * XXX * Handle function prototypes and the keyword "const". This steps on name * space that DB doesn't control, but all of the other solutions are worse. + * + * XXX + * While Microsoft's compiler is ANSI C compliant, it doesn't have _STDC_ + * defined by default, you specify a command line flag or #pragma to turn + * it on. Don't do that, however, because some of Microsoft's own header + * files won't compile. */ #undef __P -#if defined(__STDC__) || defined(__cplusplus) +#if defined(__STDC__) || defined(__cplusplus) || defined(_MSC_VER) #define __P(protos) protos /* ANSI C prototypes */ #else #define const @@ -67,8 +73,8 @@ #define DB_VERSION_MAJOR 2 #define DB_VERSION_MINOR 3 -#define DB_VERSION_PATCH 10 -#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.10: (9/24/97)" +#define DB_VERSION_PATCH 12 +#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.12: (11/3/97)" typedef u_int32_t db_pgno_t; /* Page number type. */ typedef u_int16_t db_indx_t; /* Page offset type. */ @@ -93,6 +99,7 @@ struct __db_lockregion; typedef struct __db_lockregion DB_LOCKREGION; struct __db_lockreq; typedef struct __db_lockreq DB_LOCKREQ; struct __db_locktab; typedef struct __db_locktab DB_LOCKTAB; struct __db_log; typedef struct __db_log DB_LOG; +struct __db_log_stat; typedef struct __db_log_stat DB_LOG_STAT; struct __db_lsn; typedef struct __db_lsn DB_LSN; struct __db_mpool; typedef struct __db_mpool DB_MPOOL; struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT; @@ -122,6 +129,31 @@ struct __db_dbt { }; /* + * DB configuration. There are a set of functions which the application + * can replace with its own versions. + */ +#define DB_FUNC_CALLOC 1 /* ANSI C calloc. */ +#define DB_FUNC_CLOSE 2 /* POSIX 1003.1 close. */ +#define DB_FUNC_DIRFREE 3 /* DB: free directory list. */ +#define DB_FUNC_DIRLIST 4 /* DB: create directory list. */ +#define DB_FUNC_EXISTS 5 /* DB: return if file exists. */ +#define DB_FUNC_FREE 6 /* ANSI C free. */ +#define DB_FUNC_FSYNC 7 /* POSIX 1003.1 fsync. */ +#define DB_FUNC_IOINFO 8 /* DB: return file I/O information. */ +#define DB_FUNC_MALLOC 9 /* ANSI C malloc. */ +#define DB_FUNC_MAP 10 /* DB: map file into shared memory. */ +#define DB_FUNC_OPEN 11 /* POSIX 1003.1 open. */ +#define DB_FUNC_READ 12 /* POSIX 1003.1 read. */ +#define DB_FUNC_REALLOC 13 /* ANSI C realloc. */ +#define DB_FUNC_SEEK 14 /* POSIX 1003.1 lseek. */ +#define DB_FUNC_SLEEP 15 /* DB: sleep secs/usecs. */ +#define DB_FUNC_STRDUP 16 /* ANSI C strdup. */ +#define DB_FUNC_UNLINK 17 /* POSIX 1003.1 unlink. */ +#define DB_FUNC_UNMAP 18 /* DB: unmap shared memory file. */ +#define DB_FUNC_WRITE 19 /* POSIX 1003.1 write. */ +#define DB_FUNC_YIELD 20 /* DB: yield thread to scheduler. */ + +/* * Database configuration and initialization. */ /* @@ -134,21 +166,20 @@ struct __db_dbt { /* * Flags understood by db_appinit(3). * - * DB_APP_INIT and DB_MUTEXDEBUG are internal only, and not documented. + * DB_MUTEXDEBUG is internal only, and not documented. */ /* 0x00007 COMMON MASK. */ -#define DB_APP_INIT 0x00008 /* Appinit called, paths initialized. */ -#define DB_INIT_LOCK 0x00010 /* Initialize locking. */ -#define DB_INIT_LOG 0x00020 /* Initialize logging. */ -#define DB_INIT_MPOOL 0x00040 /* Initialize mpool. */ -#define DB_INIT_TXN 0x00080 /* Initialize transactions. */ -#define DB_MPOOL_PRIVATE 0x00100 /* Mpool: private memory pool. */ -#define DB_MUTEXDEBUG 0x00200 /* Do not get/set mutexes in regions. */ -#define DB_RECOVER 0x00400 /* Run normal recovery. */ -#define DB_RECOVER_FATAL 0x00800 /* Run catastrophic recovery. */ -#define DB_TXN_NOSYNC 0x01000 /* Do not sync log on commit. */ -#define DB_USE_ENVIRON 0x02000 /* Use the environment. */ -#define DB_USE_ENVIRON_ROOT 0x04000 /* Use the environment if root. */ +#define DB_INIT_LOCK 0x00008 /* Initialize locking. */ +#define DB_INIT_LOG 0x00010 /* Initialize logging. */ +#define DB_INIT_MPOOL 0x00020 /* Initialize mpool. */ +#define DB_INIT_TXN 0x00040 /* Initialize transactions. */ +#define DB_MPOOL_PRIVATE 0x00080 /* Mpool: private memory pool. */ +#define DB_MUTEXDEBUG 0x00100 /* Do not get/set mutexes in regions. */ +#define DB_RECOVER 0x00200 /* Run normal recovery. */ +#define DB_RECOVER_FATAL 0x00400 /* Run catastrophic recovery. */ +#define DB_TXN_NOSYNC 0x00800 /* Do not sync log on commit. */ +#define DB_USE_ENVIRON 0x01000 /* Use the environment. */ +#define DB_USE_ENVIRON_ROOT 0x02000 /* Use the environment if root. */ /* CURRENTLY UNUSED LOCK FLAGS. */ #define DB_TXN_LOCK_2PL 0x00000 /* Two-phase locking. */ @@ -209,7 +240,6 @@ struct __db_env { int lk_modes; /* Number of lock modes in table. */ unsigned int lk_max; /* Maximum number of locks. */ u_int32_t lk_detect; /* Deadlock detect on every conflict. */ - int (*db_yield) __P((void)); /* Yield function for threads. */ /* Logging. */ DB_LOG *lg_info; /* Return from log_open(). */ @@ -226,6 +256,9 @@ struct __db_env { int (*tx_recover) /* Dispatch function for recovery. */ __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +#define DB_ENV_APPINIT 0x01 /* Paths initialized by db_appinit(). */ +#define DB_ENV_STANDALONE 0x02 /* Test: freestanding environment. */ +#define DB_ENV_THREAD 0x04 /* DB_ENV is multi-threaded. */ u_int32_t flags; /* Flags. */ }; @@ -301,7 +334,7 @@ struct __db_info { #define DB_CURRENT 0x000010 /* c_get(), c_put(), log_get() */ #define DB_FIRST 0x000020 /* c_get(), log_get() */ #define DB_FLUSH 0x000040 /* log_put() */ -#define DB_GET_RECNO 0x000080 /* c_get() */ +#define DB_GET_RECNO 0x000080 /* get(), c_get() */ #define DB_KEYFIRST 0x000100 /* c_put() */ #define DB_KEYLAST 0x000200 /* c_put() */ #define DB_LAST 0x000400 /* c_get(), log_get() */ @@ -312,7 +345,7 @@ struct __db_info { #define DB_RECORDCOUNT 0x008000 /* stat() */ #define DB_SET 0x010000 /* c_get(), log_get() */ #define DB_SET_RANGE 0x020000 /* c_get() */ -#define DB_SET_RECNO 0x040000 /* get(), c_get() */ +#define DB_SET_RECNO 0x040000 /* c_get() */ /* DB (user visible) error return codes. */ #define DB_INCOMPLETE ( -1) /* Sync didn't finish. */ @@ -472,6 +505,8 @@ struct __db_bt_stat { u_int32_t bt_get; /* Items retrieved. */ u_int32_t bt_cache_hit; /* Hits in fast-insert code. */ u_int32_t bt_cache_miss; /* Misses in fast-insert code. */ + u_int32_t bt_magic; /* Magic number. */ + u_int32_t bt_version; /* Version number. */ }; #if defined(__cplusplus) @@ -479,6 +514,7 @@ extern "C" { #endif int db_appinit __P((const char *, char * const *, DB_ENV *, int)); int db_appexit __P((DB_ENV *)); +int db_jump_set __P((void *, int)); int db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **)); char *db_version __P((int *, int *, int *)); #if defined(__cplusplus) @@ -576,6 +612,22 @@ struct __db_lsn { u_int32_t offset; /* File offset. */ }; +/* Log statistics structure. */ +struct __db_log_stat { + u_int32_t st_magic; /* Log file magic number. */ + u_int32_t st_version; /* Log file version number. */ + int st_mode; /* Log file mode. */ + u_int32_t st_lg_max; /* Maximum log file size. */ + u_int32_t st_w_bytes; /* Bytes to log. */ + u_int32_t st_w_mbytes; /* Megabytes to log. */ + u_int32_t st_wc_bytes; /* Bytes to log since checkpoint. */ + u_int32_t st_wc_mbytes; /* Megabytes to log since checkpoint. */ + u_int32_t st_wcount; /* Total syncs to the log. */ + u_int32_t st_scount; /* Total writes to the log. */ + u_int32_t st_region_wait; /* Region lock granted after wait. */ + u_int32_t st_region_nowait; /* Region lock granted without wait. */ +}; + #if defined(__cplusplus) extern "C" { #endif @@ -588,6 +640,7 @@ int log_get __P((DB_LOG *, DB_LSN *, DBT *, int)); int log_open __P((const char *, int, int, DB_ENV *, DB_LOG **)); int log_put __P((DB_LOG *, DB_LSN *, const DBT *, int)); int log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *)); +int log_stat __P((DB_LOG *, DB_LOG_STAT **, void *(*)(size_t))); int log_unlink __P((const char *, int, DB_ENV *)); int log_unregister __P((DB_LOG *, u_int32_t)); #if defined(__cplusplus) @@ -610,30 +663,35 @@ int log_unregister __P((DB_LOG *, u_int32_t)); /* Mpool statistics structure. */ struct __db_mpool_stat { size_t st_cachesize; /* Cache size. */ - unsigned long st_cache_hit; /* Pages found in the cache. */ - unsigned long st_cache_miss; /* Pages not found in the cache. */ - unsigned long st_map; /* Pages from mapped files. */ - unsigned long st_page_create; /* Pages created in the cache. */ - unsigned long st_page_in; /* Pages read in. */ - unsigned long st_page_out; /* Pages written out. */ - unsigned long st_ro_evict; /* Read-only pages evicted. */ - unsigned long st_rw_evict; /* Read-write pages evicted. */ - unsigned long st_hash_buckets; /* Number of hash buckets. */ - unsigned long st_hash_searches; /* Total hash chain searches. */ - unsigned long st_hash_longest; /* Longest hash chain searched. */ - unsigned long st_hash_examined; /* Total hash entries searched. */ + u_int32_t st_cache_hit; /* Pages found in the cache. */ + u_int32_t st_cache_miss; /* Pages not found in the cache. */ + u_int32_t st_map; /* Pages from mapped files. */ + u_int32_t st_page_create; /* Pages created in the cache. */ + u_int32_t st_page_in; /* Pages read in. */ + u_int32_t st_page_out; /* Pages written out. */ + u_int32_t st_ro_evict; /* Clean pages forced from the cache. */ + u_int32_t st_rw_evict; /* Dirty pages forced from the cache. */ + u_int32_t st_hash_buckets; /* Number of hash buckets. */ + u_int32_t st_hash_searches; /* Total hash chain searches. */ + u_int32_t st_hash_longest; /* Longest hash chain searched. */ + u_int32_t st_hash_examined; /* Total hash entries searched. */ + u_int32_t st_page_clean; /* Clean pages. */ + u_int32_t st_page_dirty; /* Dirty pages. */ + u_int32_t st_page_trickle; /* Pages written by memp_trickle. */ + u_int32_t st_region_wait; /* Region lock granted after wait. */ + u_int32_t st_region_nowait; /* Region lock granted without wait. */ }; /* Mpool file statistics structure. */ struct __db_mpool_fstat { char *file_name; /* File name. */ size_t st_pagesize; /* Page size. */ - unsigned long st_cache_hit; /* Pages found in the cache. */ - unsigned long st_cache_miss; /* Pages not found in the cache. */ - unsigned long st_map; /* Pages from mapped files. */ - unsigned long st_page_create; /* Pages created in the cache. */ - unsigned long st_page_in; /* Pages read in. */ - unsigned long st_page_out; /* Pages written out. */ + u_int32_t st_cache_hit; /* Pages found in the cache. */ + u_int32_t st_cache_miss; /* Pages not found in the cache. */ + u_int32_t st_map; /* Pages from mapped files. */ + u_int32_t st_page_create; /* Pages created in the cache. */ + u_int32_t st_page_in; /* Pages read in. */ + u_int32_t st_page_out; /* Pages written out. */ }; #if defined(__cplusplus) @@ -654,6 +712,7 @@ int memp_register __P((DB_MPOOL *, int, int memp_stat __P((DB_MPOOL *, DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, void *(*)(size_t))); int memp_sync __P((DB_MPOOL *, DB_LSN *)); +int memp_trickle __P((DB_MPOOL *, int, int *)); int memp_unlink __P((const char *, int, DB_ENV *)); #if defined(__cplusplus) }; diff --git a/db2/include/db_am.h b/db2/include/db_am.h index 5814ff88c3..0ea24be667 100644 --- a/db2/include/db_am.h +++ b/db2/include/db_am.h @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)db_am.h 10.6 (Sleepycat) 8/27/97 + * @(#)db_am.h 10.7 (Sleepycat) 10/25/97 */ #ifndef _DB_AM_H #define _DB_AM_H @@ -49,7 +49,7 @@ } #define REC_CLOSE { \ if (argp != NULL) \ - free (argp); \ + __db_free(argp); \ if (file_dbp != NULL) { \ F_CLR(file_dbp, DB_AM_RECOVER); \ if (F_ISSET(file_dbp, DB_AM_THREAD)) \ @@ -67,7 +67,7 @@ } #define REC_NOOP_CLOSE { \ if (argp != NULL) \ - free (argp); \ + __db_free(argp); \ return (ret); \ } diff --git a/db2/include/db_auto.h b/db2/include/db_auto.h index 7478173740..4c7b4da970 100644 --- a/db2/include/db_auto.h +++ b/db2/include/db_auto.h @@ -59,6 +59,7 @@ typedef struct _db_ovref_args { DB_LSN prev_lsn; u_int32_t fileid; db_pgno_t pgno; + int32_t adjust; DB_LSN lsn; } __db_ovref_args; diff --git a/db2/include/db_cxx.h b/db2/include/db_cxx.h index 611d967ef9..01d1231092 100644 --- a/db2/include/db_cxx.h +++ b/db2/include/db_cxx.h @@ -4,12 +4,11 @@ * Copyright (c) 1997 * Sleepycat Software. All rights reserved. * - * @(#)db_cxx.h 10.8 (Sleepycat) 9/20/97 + * @(#)db_cxx.h 10.12 (Sleepycat) 10/25/97 */ #ifndef _DB_CXX_H_ #define _DB_CXX_H_ - // // C++ assumptions: // @@ -264,7 +263,7 @@ public: // Normally these would be called register and unregister to // parallel the C interface, but "register" is a reserved word. // - int db_register(Db *dbp, const char *name, u_int32_t *fidp); + int db_register(Db *dbp, const char *name, DBTYPE type, u_int32_t *fidp); int db_unregister(u_int32_t fid); // Create or remove new log files @@ -353,6 +352,7 @@ public: int stat(DB_MPOOL_STAT **gsp, DB_MPOOL_FSTAT ***fsp, void *(*db_malloc)(size_t)); int sync(DbLsn *lsn); + int trickle(int pct, int *nwrotep); // Create or remove new mpool files // @@ -598,6 +598,11 @@ public: // int appinit(const char *homeDir, char *const *db_config, int flags); + // Called automatically when DbEnv is destroyed, or can be + // called at any time to shut down Db. + // + int appexit(); + //////////////////////////////////////////////////////////////// // simple get/set access methods // @@ -675,11 +680,6 @@ public: u_int32_t get_lk_detect() const; void set_lk_detect(u_int32_t); - // Yield function for threads. - typedef int (*db_yield_fcn) (void); - db_yield_fcn get_yield() const; - void set_yield(db_yield_fcn); - //////////////////////////////////////////////////////////////// // Logging. @@ -783,7 +783,7 @@ class _exported Db public: int close(int flags); int cursor(DbTxn *txnid, Dbc **cursorp); - int del(Dbt *key, DbTxn *txnid); + int del(DbTxn *txnid, Dbt *key, int flags); int fd(int *fdp); int get(DbTxn *txnid, Dbt *key, Dbt *data, int flags); int put(DbTxn *txnid, Dbt *key, Dbt *data, int flags); @@ -884,5 +884,4 @@ private: Dbc(const Dbc &); Dbc &operator = (const Dbc &); }; - #endif /* !_DB_CXX_H_ */ diff --git a/db2/include/db_ext.h b/db2/include/db_ext.h index b18b10ff7f..f9b3b3a214 100644 --- a/db2/include/db_ext.h +++ b/db2/include/db_ext.h @@ -1,4 +1,4 @@ -/* Do not edit: automatically built by dist/distrib. */ +/* DO NOT EDIT: automatically built by dist/distrib. */ int __db_pgerr __P((DB *, db_pgno_t)); int __db_pgfmt __P((DB *, db_pgno_t)); int __db_addrem_log @@ -25,7 +25,7 @@ int __db_big_print int __db_big_read __P((void *, __db_big_args **)); int __db_ovref_log __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, - u_int32_t, db_pgno_t, DB_LSN *)); + u_int32_t, db_pgno_t, int32_t, DB_LSN *)); int __db_ovref_print __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __db_ovref_read __P((void *, __db_ovref_args **)); @@ -79,7 +79,7 @@ int __db_goff __P((DB *, DBT *, u_int32_t, db_pgno_t, void **, u_int32_t *)); int __db_poff __P((DB *, const DBT *, db_pgno_t *, int (*)(DB *, u_int32_t, PAGE **))); -int __db_ioff __P((DB *, db_pgno_t)); +int __db_ovref __P((DB *, db_pgno_t, int)); int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *))); int __db_moff __P((DB *, const DBT *, db_pgno_t)); void __db_loadme __P((void)); diff --git a/db2/include/db_int.h.src b/db2/include/db_int.h.src index ebadb35d36..abd93a6e8e 100644 --- a/db2/include/db_int.h.src +++ b/db2/include/db_int.h.src @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)db_int.h.src 10.30 (Sleepycat) 9/23/97 + * @(#)db_int.h.src 10.36 (Sleepycat) 10/31/97 */ #ifndef _DB_INTERNAL_H_ @@ -12,6 +12,7 @@ #include "db.h" /* Standard DB include file. */ #include "queue.h" +#include "os_func.h" #include "os_ext.h" /******************************************************* @@ -64,12 +65,16 @@ #undef SSZA #define SSZA(name, field) ((int)&(((name *)0)->field[0])) +/* Macros to return per-process address, offsets based on shared regions. */ +#define R_ADDR(base, offset) ((void *)((u_int8_t *)((base)->addr) + offset)) +#define R_OFFSET(base, p) ((u_int8_t *)(p) - (u_int8_t *)(base)->addr) + /* Free and free-string macros that overwrite memory during debugging. */ #ifdef DEBUG #undef FREE #define FREE(p, len) { \ memset(p, 0xff, len); \ - free(p); \ + __db_free(p); \ } #undef FREES #define FREES(p) { \ @@ -78,18 +83,18 @@ #else #undef FREE #define FREE(p, len) { \ - free(p); \ + __db_free(p); \ } #undef FREES #define FREES(p) { \ - free(p); \ + __db_free(p); \ } #endif /* Structure used to print flag values. */ typedef struct __fn { u_int32_t mask; /* Flag value. */ - char *name; /* Flag name. */ + const char *name; /* Flag name. */ } FN; /* Set, clear and test flags. */ @@ -163,10 +168,8 @@ typedef struct _db_mutex_t { off_t off; /* Backing file offset. */ u_long pid; /* Lock holder: 0 or process pid. */ #endif -#ifdef MUTEX_STATISTICS - u_long mutex_set_wait; /* Blocking mutex: required waiting. */ - u_long mutex_set_nowait; /* Blocking mutex: without waiting. */ -#endif + u_int32_t mutex_set_wait; /* Granted after wait. */ + u_int32_t mutex_set_nowait; /* Granted without waiting. */ } db_mutex_t; #include "mutex_ext.h" @@ -177,11 +180,10 @@ typedef struct _db_mutex_t { /* Lock/unlock a DB thread. */ #define DB_THREAD_LOCK(dbp) \ (F_ISSET(dbp, DB_AM_THREAD) ? \ - __db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1, \ - (dbp)->dbenv == NULL ? NULL : (dbp)->dbenv->db_yield) : 0) + __db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1) : 0) #define DB_THREAD_UNLOCK(dbp) \ (F_ISSET(dbp, DB_AM_THREAD) ? \ - __db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1) : 0) + __db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1) : 0) /* Btree/recno local statistics structure. */ struct __db_bt_lstat; typedef struct __db_bt_lstat DB_BTREE_LSTAT; @@ -260,7 +262,7 @@ typedef struct __dbpginfo { #define IS_ZERO_LSN(LSN) ((LSN).file == 0) /* Test if we need to log a change. */ -#define DB_LOGGING(dbp) \ +#define DB_LOGGING(dbp) \ (F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER)) #ifdef DEBUG diff --git a/db2/include/hash.h b/db2/include/hash.h index cb8ea350f5..ae6d3843c6 100644 --- a/db2/include/hash.h +++ b/db2/include/hash.h @@ -43,7 +43,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)hash.h 10.6 (Sleepycat) 8/18/97 + * @(#)hash.h 10.7 (Sleepycat) 11/1/97 */ /* Cursor structure definitions. */ @@ -179,8 +179,8 @@ typedef struct htab { /* Memory resident data structure. */ /* Constraints about number of pages and how much data goes on a page. */ #define MAX_PAGES(H) UINT32_T_MAX -#define MINFILL 0.25 -#define ISBIG(H, N) (((N) > ((H)->hdr->pagesize * MINFILL)) ? 1 : 0) +#define MINFILL 4 +#define ISBIG(H, N) (((N) > ((H)->hdr->pagesize / MINFILL)) ? 1 : 0) /* Shorthands for accessing structure */ #define NDX_INVALID 0xFFFF diff --git a/db2/include/hash_auto.h b/db2/include/hash_auto.h index 5ff1229115..2b8aea8d86 100644 --- a/db2/include/hash_auto.h +++ b/db2/include/hash_auto.h @@ -108,7 +108,25 @@ typedef struct _ham_ovfl_args { db_pgno_t start_pgno; u_int32_t npages; db_pgno_t free_pgno; + u_int32_t ovflpoint; DB_LSN metalsn; } __ham_ovfl_args; + +#define DB_ham_copypage (DB_ham_BEGIN + 8) + +typedef struct _ham_copypage_args { + u_int32_t type; + DB_TXN *txnid; + DB_LSN prev_lsn; + u_int32_t fileid; + db_pgno_t pgno; + DB_LSN pagelsn; + db_pgno_t next_pgno; + DB_LSN nextlsn; + db_pgno_t nnext_pgno; + DB_LSN nnextlsn; + DBT page; +} __ham_copypage_args; + #endif diff --git a/db2/include/hash_ext.h b/db2/include/hash_ext.h index 32788c7b8a..5abbb274f0 100644 --- a/db2/include/hash_ext.h +++ b/db2/include/hash_ext.h @@ -1,4 +1,4 @@ -/* Do not edit: automatically built by dist/distrib. */ +/* DO NOT EDIT: automatically built by dist/distrib. */ int __ham_open __P((DB *, DB_INFO *)); int __ham_close __P((DB *)); int __ham_c_iclose __P((DB *, DBC *)); @@ -54,10 +54,17 @@ int __ham_newpgno_read __P((void *, __ham_newpgno_args **)); int __ham_ovfl_log __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, db_pgno_t, u_int32_t, db_pgno_t, - DB_LSN *)); + u_int32_t, DB_LSN *)); int __ham_ovfl_print __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __ham_ovfl_read __P((void *, __ham_ovfl_args **)); +int __ham_copypage_log + __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, + DB_LSN *, db_pgno_t, DB_LSN *, DBT *)); +int __ham_copypage_print + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_copypage_read __P((void *, __ham_copypage_args **)); int __ham_init_print __P((DB_ENV *)); int __ham_init_recover __P((DB_ENV *)); int __ham_pgin __P((db_pgno_t, void *, DBT *)); @@ -81,7 +88,7 @@ int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); void __ham_putitem __P((PAGE *p, const DBT *, int)); -int __ham_del_pair __P((HTAB *, HASH_CURSOR *)); +int __ham_del_pair __P((HTAB *, HASH_CURSOR *, int)); int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t)); void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t, int32_t, DBT *)); @@ -118,4 +125,6 @@ int __ham_splitdata_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __ham_ovfl_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); +int __ham_copypage_recover + __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); int __ham_stat __P((DB *, FILE *)); diff --git a/db2/include/lock.h b/db2/include/lock.h index 8f9e81c0fa..8a927f076e 100644 --- a/db2/include/lock.h +++ b/db2/include/lock.h @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)lock.h 10.8 (Sleepycat) 9/23/97 + * @(#)lock.h 10.9 (Sleepycat) 10/25/97 */ typedef struct __db_lockobj DB_LOCKOBJ; @@ -54,8 +54,7 @@ struct __db_lockregion { /* Macros to lock/unlock the region. */ #define LOCK_LOCKREGION(lt) \ - (void)__db_mutex_lock(&(lt)->region->hdr.lock,(lt)->fd, \ - (lt)->dbenv == NULL ? NULL : (lt)->dbenv->db_yield) + (void)__db_mutex_lock(&(lt)->region->hdr.lock, (lt)->fd) #define UNLOCK_LOCKREGION(lt) \ (void)__db_mutex_unlock(&(lt)->region->hdr.lock, (lt)->fd) diff --git a/db2/include/lock_ext.h b/db2/include/lock_ext.h index 59d5072bc4..0d0ba148b6 100644 --- a/db2/include/lock_ext.h +++ b/db2/include/lock_ext.h @@ -1,4 +1,4 @@ -/* Do not edit: automatically built by dist/distrib. */ +/* DO NOT EDIT: automatically built by dist/distrib. */ int __lock_getobj __P((DB_LOCKTAB *, u_int32_t, DBT *, u_int32_t type, DB_LOCKOBJ **)); int __lock_cmp __P((DBT *, DB_LOCKOBJ *)); diff --git a/db2/include/log.h b/db2/include/log.h index a9c82fa04d..a192a38136 100644 --- a/db2/include/log.h +++ b/db2/include/log.h @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)log.h 10.9 (Sleepycat) 9/23/97 + * @(#)log.h 10.15 (Sleepycat) 11/2/97 */ #ifndef _LOG_H_ @@ -15,6 +15,8 @@ struct __hdr; typedef struct __hdr HDR; struct __log; typedef struct __log LOG; struct __log_persist; typedef struct __log_persist LOGP; +#define MEGABYTE (1024 * 1024) + #define MAXLFNAME 99999 /* Maximum log file name. */ #define LFNAME "log.%05d" /* Log file name template. */ @@ -23,21 +25,15 @@ struct __log_persist; typedef struct __log_persist LOGP; #define DEFAULT_MAX (10 * 1048576) /* 10 Mb. */ -/* Macros to return per-process address, offsets. */ -#define ADDR(base, offset) ((void *)((u_int8_t *)((base)->addr) + offset)) -#define OFFSET(base, p) ((u_int8_t *)(p) - (u_int8_t *)(base)->addr) - /* Macros to lock/unlock the region and threads. */ #define LOCK_LOGTHREAD(dblp) \ if (F_ISSET(dblp, DB_AM_THREAD)) \ - (void)__db_mutex_lock((dblp)->mutexp, -1, \ - (dblp)->dbenv == NULL ? NULL : (dblp)->dbenv->db_yield) + (void)__db_mutex_lock((dblp)->mutexp, -1) #define UNLOCK_LOGTHREAD(dblp) \ if (F_ISSET(dblp, DB_AM_THREAD)) \ (void)__db_mutex_unlock((dblp)->mutexp, -1); #define LOCK_LOGREGION(dblp) \ - (void)__db_mutex_lock(&((RLAYOUT *)(dblp)->lp)->lock, \ - (dblp)->fd, (dblp)->dbenv == NULL ? NULL : (dblp)->dbenv->db_yield) + (void)__db_mutex_lock(&((RLAYOUT *)(dblp)->lp)->lock, (dblp)->fd) #define UNLOCK_LOGREGION(dblp) \ (void)__db_mutex_unlock(&((RLAYOUT *)(dblp)->lp)->lock, (dblp)->fd) @@ -124,7 +120,7 @@ struct __log { DB_LSN lsn; /* LSN at current file offset. */ DB_LSN c_lsn; /* LSN of the last checkpoint. */ DB_LSN s_lsn; /* LSN of the last sync. */ - DB_LSN span_lsn; /* LSN spanning buffer write. */ + DB_LSN uw_lsn; /* LSN of 1st rec not fully on disk. */ u_int32_t len; /* Length of the last record. */ @@ -132,7 +128,8 @@ struct __log { u_int32_t w_off; /* Current write offset in the file. */ time_t chkpt; /* Time of the last checkpoint. */ - u_int32_t written; /* Bytes written since checkpoint. */ + + DB_LOG_STAT stat; /* Log statistics. */ u_int8_t buf[4 * 1024]; /* Log buffer. */ }; diff --git a/db2/include/log_ext.h b/db2/include/log_ext.h index bc63d9dac8..c32d1d6af6 100644 --- a/db2/include/log_ext.h +++ b/db2/include/log_ext.h @@ -1,4 +1,4 @@ -/* Do not edit: automatically built by dist/distrib. */ +/* DO NOT EDIT: automatically built by dist/distrib. */ int __log_find __P((DB_LOG *, int *)); int __log_valid __P((DB_LOG *, LOG *, int)); int __log_register_log diff --git a/db2/include/mp.h b/db2/include/mp.h index 3b71774484..f68f42b144 100644 --- a/db2/include/mp.h +++ b/db2/include/mp.h @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)mp.h 10.16 (Sleepycat) 9/23/97 + * @(#)mp.h 10.19 (Sleepycat) 10/25/97 */ struct __bh; typedef struct __bh BH; @@ -22,30 +22,36 @@ struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE; #define DB_CACHESIZE_DEF (128 * 1024) #define DB_CACHESIZE_MIN ( 20 * 1024) -/* Macro to return per-process address, offsets. */ -#define ADDR(base, offset) ((void *)((u_int8_t *)((base)->addr) + offset)) -#define OFFSET(base, p) ((u_int8_t *)(p) - (u_int8_t *)(base)->addr) - #define INVALID 0 /* Invalid shared memory offset. */ #define TEMPORARY "<tmp>" /* Temporary file name. */ /* - * There are two kinds of locks in the mpool code. The first is the region - * lock, used to serialize modifications to all data structures. The second - * is a per-buffer header lock. The locking order is as follows: + * There are three ways we do locking in the mpool code: + * + * Locking a handle mutex to provide concurrency for DB_THREAD operations. + * Locking the region mutex to provide mutual exclusion while reading and + * writing structures in the shared region. + * Locking buffer header mutexes during I/O. + * + * The first will not be further described here. We use the shared mpool + * region lock to provide mutual exclusion while reading/modifying all of + * the data structures, including the buffer headers. We use a per-buffer + * header lock to wait on buffer I/O. The order of locking is as follows: * - * Process searching for a buffer: + * Searching for a buffer: * Acquire the region lock. * Find the buffer header. * Increment the reference count (guarantee the buffer stays). - * If the BH_LOCKED flag is set: + * If the BH_LOCKED flag is set (I/O is going on): * Release the region lock. + * Request the buffer lock. + * The I/O will complete... * Acquire the buffer lock. * Release the buffer lock. * Acquire the region lock. * Return the buffer. * - * Process reading/writing a buffer: + * Reading/writing a buffer: * Acquire the region lock. * Find/create the buffer header. * If reading, increment the reference count (guarantee the buffer stays). @@ -69,8 +75,7 @@ struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE; #define LOCKHANDLE(dbmp, mutexp) \ if (F_ISSET(dbmp, MP_LOCKHANDLE)) \ - (void)__db_mutex_lock(mutexp, (dbmp)->fd, \ - (dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield) + (void)__db_mutex_lock(mutexp, (dbmp)->fd) #define UNLOCKHANDLE(dbmp, mutexp) \ if (F_ISSET(dbmp, MP_LOCKHANDLE)) \ (void)__db_mutex_unlock(mutexp, (dbmp)->fd) @@ -78,8 +83,7 @@ struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE; #define LOCKREGION(dbmp) \ if (F_ISSET(dbmp, MP_LOCKREGION)) \ (void)__db_mutex_lock(&((RLAYOUT *)(dbmp)->mp)->lock, \ - (dbmp)->fd, \ - (dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield) + (dbmp)->fd) #define UNLOCKREGION(dbmp) \ if (F_ISSET(dbmp, MP_LOCKREGION)) \ (void)__db_mutex_unlock(&((RLAYOUT *)(dbmp)->mp)->lock, \ @@ -87,8 +91,7 @@ struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE; #define LOCKBUFFER(dbmp, bhp) \ if (F_ISSET(dbmp, MP_LOCKREGION)) \ - (void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->fd, \ - (dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield) + (void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->fd) #define UNLOCKBUFFER(dbmp, bhp) \ if (F_ISSET(dbmp, MP_LOCKREGION)) \ (void)__db_mutex_unlock(&(bhp)->mutex, (dbmp)->fd) @@ -250,8 +253,8 @@ struct __bh { #define BH_WRITE 0x020 /* Page scheduled for writing. */ u_int16_t flags; - SH_TAILQ_ENTRY q; /* LRU list of bucket headers. */ - SH_TAILQ_ENTRY mq; /* MPOOLFILE list of bucket headers. */ + SH_TAILQ_ENTRY q; /* LRU queue. */ + SH_TAILQ_ENTRY hq; /* MPOOL hash bucket queue. */ db_pgno_t pgno; /* Underlying MPOOLFILE page number. */ size_t mf_offset; /* Associated MPOOLFILE offset. */ diff --git a/db2/include/mp_ext.h b/db2/include/mp_ext.h index 3934c130a8..49d86ba2e5 100644 --- a/db2/include/mp_ext.h +++ b/db2/include/mp_ext.h @@ -1,4 +1,4 @@ -/* Do not edit: automatically built by dist/distrib. */ +/* DO NOT EDIT: automatically built by dist/distrib. */ int __memp_bhwrite __P((DB_MPOOL *, MPOOLFILE *, BH *, int *, int *)); int __memp_pgread __P((DB_MPOOLFILE *, BH *, int)); diff --git a/db2/include/mutex_ext.h b/db2/include/mutex_ext.h index ff46b6a404..cb2d4886af 100644 --- a/db2/include/mutex_ext.h +++ b/db2/include/mutex_ext.h @@ -1,4 +1,4 @@ -/* Do not edit: automatically built by dist/distrib. */ +/* DO NOT EDIT: automatically built by dist/distrib. */ void __db_mutex_init __P((db_mutex_t *, off_t)); -int __db_mutex_lock __P((db_mutex_t *, int, int (*)(void))); +int __db_mutex_lock __P((db_mutex_t *, int)); int __db_mutex_unlock __P((db_mutex_t *, int)); diff --git a/db2/include/os_ext.h b/db2/include/os_ext.h index 59d72acf12..e48a1e9407 100644 --- a/db2/include/os_ext.h +++ b/db2/include/os_ext.h @@ -1,19 +1,19 @@ -/* Do not edit: automatically built by dist/distrib. */ +/* DO NOT EDIT: automatically built by dist/distrib. */ int __db_abspath __P((const char *)); -char *__db_rpath __P((const char *)); -int __db_dir __P((DB_ENV *, const char *, char ***, int *)); -void __db_dirf __P((DB_ENV *, char **, int)); +int __os_dirlist __P((const char *, char ***, int *)); +void __os_dirfree __P((char **, int)); int __db_fileid __P((DB_ENV *, const char *, int, u_int8_t *)); -int __db_lseek __P((int, size_t, db_pgno_t, u_long, int)); -int __db_mmap __P((int, size_t, int, int, void *)); -int __db_munmap __P((void *, size_t)); -int __db_oflags __P((int)); -int __db_fdopen __P((const char *, int, int, int, int *)); int __db_fsync __P((int)); +int __os_map __P((int, size_t, int, int, void **)); +int __os_unmap __P((void *, size_t)); +int __db_oflags __P((int)); +int __db_open __P((const char *, int, int, int, int *)); int __db_close __P((int)); +char *__db_rpath __P((const char *)); int __db_read __P((int, void *, size_t, ssize_t *)); int __db_write __P((int, void *, size_t, ssize_t *)); -int __db_sleep __P((u_long, u_long)); -int __db_exists __P((const char *, int *)); -int __db_stat __P((DB_ENV *, const char *, int, off_t *, off_t *)); +int __os_seek __P((int, size_t, db_pgno_t, u_long, int)); +int __os_sleep __P((u_long, u_long)); +int __os_exists __P((const char *, int *)); +int __os_ioinfo __P((const char *, int, off_t *, off_t *)); int __db_unlink __P((const char *)); diff --git a/db2/include/os_func.h b/db2/include/os_func.h new file mode 100644 index 0000000000..0a72942903 --- /dev/null +++ b/db2/include/os_func.h @@ -0,0 +1,76 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997 + * Sleepycat Software. All rights reserved. + * + * @(#)os_func.h 10.2 (Sleepycat) 10/28/97 + */ + +/* Calls which can be replaced by the application. */ +struct __db_jumptab { + void *(*db_calloc) __P((size_t, size_t)); /* DB_FUNC_CALLOC */ + int (*db_close) __P((int)); /* DB_FUNC_CLOSE */ + void (*db_dirfree) __P((char **, int)); /* DB_FUNC_DIRFREE */ + int (*db_dirlist) /* DB_FUNC_DIRLIST */ + __P((const char *, char ***, int *)); + int (*db_exists) /* DB_FUNC_EXISTS */ + __P((const char *, int *)); + void (*db_free) __P((void *)); /* DB_FUNC_FREE */ + int (*db_fsync) __P((int)); /* DB_FUNC_FSYNC */ + int (*db_ioinfo) /* DB_FUNC_IOINFO */ + __P((const char *, int, off_t *, off_t *)); + void *(*db_malloc) __P((size_t)); /* DB_FUNC_MALLOC */ + int (*db_map) /* DB_FUNC_MAP */ + __P((int, size_t, int, int, void **)); + int (*db_open) /* DB_FUNC_OPEN */ + __P((const char *, int, ...)); + ssize_t (*db_read) __P((int, void *, size_t)); /* DB_FUNC_READ */ + void *(*db_realloc) __P((void *, size_t)); /* DB_FUNC_REALLOC */ + int (*db_seek) /* DB_FUNC_SEEK */ + __P((int, size_t, db_pgno_t, u_long, int)); + int (*db_sleep) __P((u_long, u_long)); /* DB_FUNC_SLEEP */ + char *(*db_strdup) __P((const char *)); /* DB_FUNC_STRDUP */ + int (*db_unlink) __P((const char *)); /* DB_FUNC_UNLINK */ + int (*db_unmap) __P((void *, size_t)); /* DB_FUNC_UNMAP */ + ssize_t (*db_write) /* DB_FUNC_WRITE */ + __P((int, const void *, size_t)); + int (*db_yield) __P((void)); /* DB_FUNC_YIELD */ +}; + +extern struct __db_jumptab __db_jump; + +/* + * Names used by DB to call through the jump table. + * + * The naming scheme goes like this: if the functionality the application can + * replace is the same as the DB functionality, e.g., calloc, or dirlist, then + * we use the name __db_XXX, and the application is expected to replace the + * complete functionality, which may or may not map directly to an ANSI C or + * POSIX 1003.1 interface. If the functionality that the aplication replaces + * only underlies what the DB os directory exports to other parts of DB, e.g., + * read, then the name __os_XXX is used, and the application can only replace + * the underlying functionality. Under most circumstances, the os directory + * part of DB is the only code that should use the __os_XXX names, all other + * parts of DB should be calling __db_XXX functions. + */ +#define __db_calloc __db_jump.db_calloc +#define __os_close __db_jump.db_close /* __db_close is a wrapper. */ +#define __db_dirfree __db_jump.db_dirfree +#define __db_dirlist __db_jump.db_dirlist +#define __db_exists __db_jump.db_exists +#define __db_free __db_jump.db_free +#define __os_fsync __db_jump.db_fsync /* __db_fsync is a wrapper. */ +#define __db_ioinfo __db_jump.db_ioinfo +#define __db_malloc __db_jump.db_malloc +#define __db_map __db_jump.db_map +#define __os_open __db_jump.db_open /* __db_open is a wrapper. */ +#define __os_read __db_jump.db_read /* __db_read is a wrapper. */ +#define __db_realloc __db_jump.db_realloc +#define __db_seek __db_jump.db_seek +#define __db_sleep __db_jump.db_sleep +#define __db_strdup __db_jump.db_strdup +#define __os_unlink __db_jump.db_unlink /* __db_unlink is a wrapper. */ +#define __db_unmap __db_jump.db_unmap +#define __os_write __db_jump.db_write /* __db_write is a wrapper. */ +#define __db_yield __db_jump.db_yield diff --git a/db2/include/txn.h b/db2/include/txn.h index 8bb3976c1c..c64ac3fc52 100644 --- a/db2/include/txn.h +++ b/db2/include/txn.h @@ -4,7 +4,7 @@ * Copyright (c) 1996, 1997 * Sleepycat Software. All rights reserved. * - * @(#)txn.h 10.10 (Sleepycat) 9/23/97 + * @(#)txn.h 10.11 (Sleepycat) 10/25/97 */ #ifndef _TXN_H_ #define _TXN_H_ @@ -96,15 +96,13 @@ struct __db_txnregion { /* Macros to lock/unlock the region and threads. */ #define LOCK_TXNTHREAD(tmgrp) \ if (F_ISSET(tmgrp, DB_THREAD)) \ - (void)__db_mutex_lock((tmgrp)->mutexp, -1, \ - (tmgrp)->dbenv == NULL ? NULL : (tmgrp)->dbenv->db_yield) + (void)__db_mutex_lock((tmgrp)->mutexp, -1) #define UNLOCK_TXNTHREAD(tmgrp) \ if (F_ISSET(tmgrp, DB_THREAD)) \ (void)__db_mutex_unlock((tmgrp)->mutexp, -1) #define LOCK_TXNREGION(tmgrp) \ - (void)__db_mutex_lock(&(tmgrp)->region->hdr.lock,(tmgrp)->fd, \ - (tmgrp)->dbenv == NULL ? NULL : (tmgrp)->dbenv->db_yield) + (void)__db_mutex_lock(&(tmgrp)->region->hdr.lock, (tmgrp)->fd) #define UNLOCK_TXNREGION(tmgrp) \ (void)__db_mutex_unlock(&(tmgrp)->region->hdr.lock, (tmgrp)->fd) diff --git a/db2/include/txn_ext.h b/db2/include/txn_ext.h index 8ba0b0c44e..9b617bb68c 100644 --- a/db2/include/txn_ext.h +++ b/db2/include/txn_ext.h @@ -1,4 +1,4 @@ -/* Do not edit: automatically built by dist/distrib. */ +/* DO NOT EDIT: automatically built by dist/distrib. */ int __txn_regop_log __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t)); diff --git a/db2/lock/lock.c b/db2/lock/lock.c index a2a3b208f2..f1223a9fa6 100644 --- a/db2/lock/lock.c +++ b/db2/lock/lock.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)lock.c 10.36 (Sleepycat) 9/24/97"; +static const char sccsid[] = "@(#)lock.c 10.38 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -200,7 +200,7 @@ lock_open(path, flags, mode, dbenv, ltp) /* * Create the lock table structure. */ - if ((lt = (DB_LOCKTAB *)calloc(1, sizeof(DB_LOCKTAB))) == NULL) { + if ((lt = (DB_LOCKTAB *)__db_calloc(1, sizeof(DB_LOCKTAB))) == NULL) { __db_err(dbenv, "%s", strerror(ENOMEM)); return (ENOMEM); } @@ -269,7 +269,7 @@ out: if (lt->region != NULL) (void)__db_rclose(lt->dbenv, lt->fd, lt->region); if (LF_ISSET(DB_CREATE)) (void)lock_unlink(path, 1, lt->dbenv); - free(lt); + __db_free(lt); return (ret); } @@ -505,7 +505,7 @@ lock_close(lt) return (ret); /* Free lock table. */ - free(lt); + __db_free(lt); return (0); } @@ -728,8 +728,7 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp) */ (void)__db_mutex_init(&newl->mutex, MUTEX_LOCK_OFFSET(lt->region, &newl->mutex)); - (void)__db_mutex_lock(&newl->mutex, lt->fd, - lt->dbenv == NULL ? NULL : lt->dbenv->db_yield); + (void)__db_mutex_lock(&newl->mutex, lt->fd); /* * Now, insert the lock onto its locker's list. @@ -760,8 +759,7 @@ __lock_get_internal(lt, locker, flags, obj, lock_mode, lockp) if (lrp->detect != DB_LOCK_NORUN) ret = lock_detect(lt, 0, lrp->detect); - (void)__db_mutex_lock(&newl->mutex, - lt->fd, lt->dbenv == NULL ? NULL : lt->dbenv->db_yield); + (void)__db_mutex_lock(&newl->mutex, lt->fd); LOCK_LOCKREGION(lt); if (newl->status != DB_LSTAT_PENDING) { @@ -975,11 +973,9 @@ __lock_dump_region(lt, flags) #ifndef HAVE_SPINLOCKS printf("Mutex: off %lu", (u_long)lrp->hdr.lock.off); #endif -#ifdef MUTEX_STATISTICS printf(" waits %lu nowaits %lu", (u_long)lrp->hdr.lock.mutex_set_wait, (u_long)lrp->hdr.lock.mutex_set_nowait); -#endif printf("\n%s:%lu\t%s:%lu\t%s:%lu\t%s:%lu\n", "nconflicts ", (u_long)lrp->nconflicts, "nrequests ", (u_long)lrp->nrequests, diff --git a/db2/lock/lock_deadlock.c b/db2/lock/lock_deadlock.c index f947f901c3..566021fe89 100644 --- a/db2/lock/lock_deadlock.c +++ b/db2/lock/lock_deadlock.c @@ -11,7 +11,7 @@ static const char copyright[] = "@(#) Copyright (c) 1997\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)lock_deadlock.c 10.21 (Sleepycat) 9/6/97"; +static const char sccsid[] = "@(#)lock_deadlock.c 10.25 (Sleepycat) 11/1/97"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -50,16 +50,19 @@ typedef struct { int valid; u_int32_t id; DB_LOCK last_lock; + db_pgno_t pgno; } locker_info; static int __dd_abort __P((DB_ENV *, locker_info *)); -static int __dd_build __P((DB_ENV *, u_int32_t **, int *, locker_info **)); -#ifdef DEBUG -static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, int)); -#endif +static int __dd_build + __P((DB_ENV *, u_int32_t **, u_int32_t *, locker_info **)); static u_int32_t *__dd_find __P((u_int32_t *, locker_info *, u_int32_t)); +#ifdef DEBUG +static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t)); +#endif + int lock_detect(lt, flags, atype) DB_LOCKTAB *lt; @@ -68,8 +71,8 @@ lock_detect(lt, flags, atype) { DB_ENV *dbenv; locker_info *idmap; - u_int32_t *bitmap, *deadlock, killid; - int do_pass, i, nlockers, nentries, ret; + u_int32_t *bitmap, *deadlock, i, killid, nentries, nlockers; + int do_pass, ret; /* Validate arguments. */ if ((ret = @@ -77,17 +80,16 @@ lock_detect(lt, flags, atype) return (ret); /* Check if a detector run is necessary. */ - do_pass = 1; dbenv = lt->dbenv; if (LF_ISSET(DB_LOCK_CONFLICT)) { /* Make a pass every time a lock waits. */ LOCK_LOCKREGION(lt); do_pass = dbenv->lk_info->region->need_dd != 0; UNLOCK_LOCKREGION(lt); - } - if (!do_pass) - return (0); + if (!do_pass) + return (0); + } /* Build the waits-for bitmap. */ if ((ret = __dd_build(dbenv, &bitmap, &nlockers, &idmap)) != 0) @@ -118,8 +120,7 @@ lock_detect(lt, flags, atype) if (killid == BAD_KILLID) { __db_err(dbenv, - "warning: could not find %s", - "locker to abort"); + "warning: could not find locker to abort"); break; } @@ -137,11 +138,8 @@ lock_detect(lt, flags, atype) /* * We are trying to calculate the id of the * locker whose entry is indicated by deadlock. - * We know that this is less than nlockers, so - * the cast below is valid. */ - killid = - (u_int32_t)((deadlock - bitmap) / nentries); + killid = (deadlock - bitmap) / nentries; break; case DB_LOCK_YOUNGEST: /* @@ -155,8 +153,7 @@ lock_detect(lt, flags, atype) if (killid == BAD_KILLID) { __db_err(dbenv, - "warning: could not find %s", - "locker to abort"); + "warning: could not find locker to abort"); break; } /* @@ -184,8 +181,8 @@ lock_detect(lt, flags, atype) "warning: unable to abort locker %lx", (u_long)idmap[killid].id); } - free(bitmap); - free(idmap); + __db_free(bitmap); + __db_free(idmap); return (ret); } @@ -197,15 +194,15 @@ lock_detect(lt, flags, atype) static int __dd_build(dbenv, bmp, nlockers, idmap) DB_ENV *dbenv; - u_int32_t **bmp; - int *nlockers; + u_int32_t **bmp, *nlockers; locker_info **idmap; { - DB_LOCKTAB *lt; - DB_LOCKOBJ *op, *lockerp; struct __db_lock *lp; - u_int32_t *bitmap, count, *entryp, i, id, nentries, *tmpmap; + DB_LOCKTAB *lt; + DB_LOCKOBJ *op, *lo, *lockerp; + u_int8_t *pptr; locker_info *id_array; + u_int32_t *bitmap, count, *entryp, i, id, nentries, *tmpmap; int is_first, ret; lt = dbenv->lk_info; @@ -238,24 +235,24 @@ retry: count = lt->region->nlockers; * We can probably save the malloc's between iterations just * reallocing if necessary because count grew by too much. */ - if ((bitmap = (u_int32_t *)calloc((size_t)count, + if ((bitmap = (u_int32_t *)__db_calloc((size_t)count, sizeof(u_int32_t) * nentries)) == NULL) { __db_err(dbenv, "%s", strerror(ENOMEM)); return (ENOMEM); } if ((tmpmap = - (u_int32_t *)calloc(sizeof(u_int32_t), nentries)) == NULL) { + (u_int32_t *)__db_calloc(sizeof(u_int32_t), nentries)) == NULL) { __db_err(dbenv, "%s", strerror(ENOMEM)); - free(bitmap); + __db_free(bitmap); return (ENOMEM); } - if ((id_array = (locker_info *)calloc((size_t)count, + if ((id_array = (locker_info *)__db_calloc((size_t)count, sizeof(locker_info))) == NULL) { __db_err(dbenv, "%s", strerror(ENOMEM)); - free(bitmap); - free(tmpmap); + __db_free(bitmap); + __db_free(tmpmap); return (ENOMEM); } @@ -264,9 +261,9 @@ retry: count = lt->region->nlockers; */ LOCK_LOCKREGION(lt); if (lt->region->nlockers > count) { - free(bitmap); - free(tmpmap); - free(id_array); + __db_free(bitmap); + __db_free(tmpmap); + __db_free(id_array); goto retry; } @@ -326,9 +323,8 @@ retry: count = lt->region->nlockers; lp != NULL; is_first = 0, lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { - if ((ret = __lock_getobj(lt, - lp->holder, NULL, DB_LOCK_LOCKER, &lockerp)) - != 0) { + if ((ret = __lock_getobj(lt, lp->holder, + NULL, DB_LOCK_LOCKER, &lockerp)) != 0) { __db_err(dbenv, "warning unable to find object"); continue; @@ -369,8 +365,16 @@ retry: count = lt->region->nlockers; continue; } lp = SH_LIST_FIRST(&lockerp->heldby, __db_lock); - if (lp != NULL) + if (lp != NULL) { id_array[id].last_lock = LOCK_TO_OFFSET(lt, lp); + lo = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj); + pptr = SH_DBT_PTR(&lo->lockobj); + if (lo->lockobj.size >= sizeof(db_pgno_t)) + memcpy(&id_array[id].pgno, pptr, + sizeof(db_pgno_t)); + else + id_array[id].pgno = 0; + } } /* Pass complete, reset the deadlock detector bit. */ @@ -384,21 +388,20 @@ retry: count = lt->region->nlockers; *nlockers = id; *idmap = id_array; *bmp = bitmap; - free(tmpmap); + __db_free(tmpmap); return (0); } static u_int32_t * __dd_find(bmp, idmap, nlockers) - u_int32_t *bmp; + u_int32_t *bmp, nlockers; locker_info *idmap; - u_int32_t nlockers; { u_int32_t i, j, nentries, *mymap, *tmpmap; /* - * For each locker, or in the bits from the lockers - * on which that locker is waiting. + * For each locker, OR in the bits from the lockers on which that + * locker is waiting. */ nentries = ALIGN(nlockers, 32) / 32; for (mymap = bmp, i = 0; i < nlockers; i++, mymap += nentries) { @@ -422,9 +425,9 @@ __dd_abort(dbenv, info) DB_ENV *dbenv; locker_info *info; { + struct __db_lock *lockp; DB_LOCKTAB *lt; DB_LOCKOBJ *lockerp, *sh_obj; - struct __db_lock *lockp; int ret; lt = dbenv->lk_info; @@ -459,19 +462,17 @@ static void __dd_debug(dbenv, idmap, bitmap, nlockers) DB_ENV *dbenv; locker_info *idmap; - u_int32_t *bitmap; - int nlockers; + u_int32_t *bitmap, nlockers; { - u_int32_t *mymap; - int i, j, nentries; + u_int32_t i, j, *mymap, nentries; char *msgbuf; __db_err(dbenv, "Waitsfor array"); __db_err(dbenv, "waiter\twaiting on"); /* - * Alloc space to print 10 bytes per item waited on. + * Allocate space to print 10 bytes per item waited on. */ - if ((msgbuf = (char *)malloc((nlockers + 1) * 10 + 64)) == NULL) { + if ((msgbuf = (char *)__db_malloc((nlockers + 1) * 10 + 64)) == NULL) { __db_err(dbenv, "%s", strerror(ENOMEM)); return; } @@ -480,7 +481,8 @@ __dd_debug(dbenv, idmap, bitmap, nlockers) for (mymap = bitmap, i = 0; i < nlockers; i++, mymap += nentries) { if (!idmap[i].valid) continue; - sprintf(msgbuf, "%lx\t\t", (u_long)idmap[i].id);/* Waiter. */ + sprintf(msgbuf, /* Waiter. */ + "%lx/%lu:\t", (u_long)idmap[i].id, (u_long)idmap[i].pgno); for (j = 0; j < nlockers; j++) if (ISSET_MAP(mymap, j)) sprintf(msgbuf, "%s %lx", msgbuf, @@ -490,6 +492,6 @@ __dd_debug(dbenv, idmap, bitmap, nlockers) __db_err(dbenv, msgbuf); } - free(msgbuf); + __db_free(msgbuf); } #endif diff --git a/db2/log/log.c b/db2/log/log.c index 893c1ee402..17681f8e0f 100644 --- a/db2/log/log.c +++ b/db2/log/log.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log.c 10.27 (Sleepycat) 9/23/97"; +static const char sccsid[] = "@(#)log.c 10.33 (Sleepycat) 11/2/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -67,11 +67,11 @@ log_open(path, flags, mode, dbenv, lpp) } /* Create and initialize the DB_LOG structure. */ - if ((dblp = (DB_LOG *)calloc(1, sizeof(DB_LOG))) == NULL) + if ((dblp = (DB_LOG *)__db_calloc(1, sizeof(DB_LOG))) == NULL) return (ENOMEM); - if (path != NULL && (dblp->dir = strdup(path)) == NULL) { - free(dblp); + if (path != NULL && (dblp->dir = __db_strdup(path)) == NULL) { + __db_free(dblp); return (ENOMEM); } @@ -329,10 +329,12 @@ __log_find(dblp, valp) } /* Get the list of file names. */ - ret = __db_dir(dblp->dbenv, dir, &names, &fcnt); + ret = __db_dirlist(dir, &names, &fcnt); FREES(p); - if (ret != 0) + if (ret != 0) { + __db_err(dblp->dbenv, "%s: %s", dir, strerror(ret)); return (ret); + } /* * Search for a valid log file name, return a value of 0 on @@ -350,7 +352,7 @@ __log_find(dblp, valp) } /* Discard the list. */ - __db_dirf(dblp->dbenv, names, fcnt); + __db_dirfree(names, fcnt); return (ret); } @@ -376,10 +378,10 @@ __log_valid(dblp, lp, cnt) return (ret); fd = -1; - if ((ret = __db_fdopen(p, + if ((ret = __db_open(p, DB_RDONLY | DB_SEQUENTIAL, DB_RDONLY | DB_SEQUENTIAL, 0, &fd)) != 0 || - (ret = __db_lseek(fd, 0, 0, sizeof(HDR), SEEK_SET)) != 0 || + (ret = __db_seek(fd, 0, 0, sizeof(HDR), SEEK_SET)) != 0 || (ret = __db_read(fd, &persist, sizeof(LOGP), &nw)) != 0 || nw != sizeof(LOGP)) { if (ret == 0) @@ -474,3 +476,39 @@ log_unlink(path, force, dbenv) return (__db_runlink(dbenv, DB_APP_LOG, path, DB_DEFAULT_LOG_FILE, force)); } + +/* + * log_stat -- + * Return LOG statistics. + */ +int +log_stat(dblp, gspp, db_malloc) + DB_LOG *dblp; + DB_LOG_STAT **gspp; + void *(*db_malloc) __P((size_t)); +{ + LOG *lp; + + *gspp = NULL; + lp = dblp->lp; + + if ((*gspp = db_malloc == NULL ? + (DB_LOG_STAT *)__db_malloc(sizeof(**gspp)) : + (DB_LOG_STAT *)db_malloc(sizeof(**gspp))) == NULL) + return (ENOMEM); + + /* Copy out the global statistics. */ + LOCK_LOGREGION(dblp); + **gspp = lp->stat; + + (*gspp)->st_magic = lp->persist.magic; + (*gspp)->st_version = lp->persist.version; + (*gspp)->st_mode = lp->persist.mode; + (*gspp)->st_lg_max = lp->persist.lg_max; + + (*gspp)->st_region_nowait = lp->rlayout.lock.mutex_set_nowait; + (*gspp)->st_region_wait = lp->rlayout.lock.mutex_set_wait; + UNLOCK_LOGREGION(dblp); + + return (0); +} diff --git a/db2/log/log_archive.c b/db2/log/log_archive.c index 6904a2c726..140ea31fd1 100644 --- a/db2/log/log_archive.c +++ b/db2/log/log_archive.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_archive.c 10.26 (Sleepycat) 9/23/97"; +static const char sccsid[] = "@(#)log_archive.c 10.28 (Sleepycat) 10/28/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -68,7 +68,7 @@ log_archive(dblp, listp, flags, db_malloc) * but that's just not possible. */ if (LF_ISSET(DB_ARCH_ABS)) { - __set_errno (0); + errno = 0; if ((pref = getcwd(buf, sizeof(buf))) == NULL) return (errno == 0 ? ENOMEM : errno); } else @@ -84,7 +84,7 @@ log_archive(dblp, listp, flags, db_malloc) if ((ret = log_get(dblp, &stable_lsn, &rec, DB_LAST)) != 0) return (ret); if (F_ISSET(dblp, DB_AM_THREAD)) - free(rec.data); + __db_free(rec.data); fnum = stable_lsn.file; break; case 0: @@ -102,7 +102,7 @@ log_archive(dblp, listp, flags, db_malloc) #define LIST_INCREMENT 64 /* Get some initial space. */ if ((array = - (char **)malloc(sizeof(char *) * (array_size = 10))) == NULL) + (char **)__db_malloc(sizeof(char *) * (array_size = 10))) == NULL) return (ENOMEM); array[0] = NULL; @@ -115,7 +115,7 @@ log_archive(dblp, listp, flags, db_malloc) if (n >= array_size - 1) { array_size += LIST_INCREMENT; - if ((array = (char **)realloc(array, + if ((array = (char **)__db_realloc(array, sizeof(char *) * array_size)) == NULL) { ret = ENOMEM; goto err; @@ -127,7 +127,7 @@ log_archive(dblp, listp, flags, db_malloc) goto err; FREES(name); } else if ((p = __db_rpath(name)) != NULL) { - if ((array[n] = (char *)strdup(p + 1)) == NULL) { + if ((array[n] = (char *)__db_strdup(p + 1)) == NULL) { ret = ENOMEM; goto err; } @@ -158,7 +158,7 @@ log_archive(dblp, listp, flags, db_malloc) err: if (array != NULL) { for (arrayp = array; *arrayp != NULL; ++arrayp) FREES(*arrayp); - free(array); + __db_free(array); } return (ret); } @@ -182,7 +182,7 @@ __build_data(dblp, pref, listp, db_malloc) /* Get some initial space. */ if ((array = - (char **)malloc(sizeof(char *) * (array_size = 10))) == NULL) + (char **)__db_malloc(sizeof(char *) * (array_size = 10))) == NULL) return (ENOMEM); array[0] = NULL; @@ -200,7 +200,7 @@ __build_data(dblp, pref, listp, db_malloc) memcpy(&rectype, rec.data, sizeof(rectype)); if (rectype != DB_log_register) { if (F_ISSET(dblp, DB_AM_THREAD)) { - free(rec.data); + __db_free(rec.data); rec.data = NULL; } continue; @@ -214,25 +214,25 @@ __build_data(dblp, pref, listp, db_malloc) if (n >= array_size - 1) { array_size += LIST_INCREMENT; - if ((array = (char **)realloc(array, + if ((array = (char **)__db_realloc(array, sizeof(char *) * array_size)) == NULL) { ret = ENOMEM; goto lg_free; } } - if ((array[n] = (char *)strdup(argp->name.data)) == NULL) { + if ((array[n] = (char *)__db_strdup(argp->name.data)) == NULL) { ret = ENOMEM; lg_free: if (F_ISSET(&rec, DB_DBT_MALLOC) && rec.data != NULL) - free(rec.data); + __db_free(rec.data); goto err1; } array[++n] = NULL; - free(argp); + __db_free(argp); if (F_ISSET(dblp, DB_AM_THREAD)) { - free(rec.data); + __db_free(rec.data); rec.data = NULL; } } @@ -289,7 +289,7 @@ lg_free: if (F_ISSET(&rec, DB_DBT_MALLOC) && rec.data != NULL) if (ret != 0) goto err2; } else if ((p = __db_rpath(real_name)) != NULL) { - array[last] = (char *)strdup(p + 1); + array[last] = (char *)__db_strdup(p + 1); FREES(real_name); if (array[last] == NULL) goto err2; @@ -321,7 +321,7 @@ err2: /* err1: if (array != NULL) { for (arrayp = array; *arrayp != NULL; ++arrayp) FREES(*arrayp); - free(array); + __db_free(array); } return (ret); } @@ -341,7 +341,7 @@ __absname(pref, name, newnamep) l_name = strlen(name); /* Malloc space for concatenating the two. */ - if ((newname = (char *)malloc(l_pref + l_name + 2)) == NULL) + if ((newname = (char *)__db_malloc(l_pref + l_name + 2)) == NULL) return (ENOMEM); /* Build the name. */ @@ -379,7 +379,7 @@ __usermem(listp, func) * Don't simplify this expression, SunOS compilers don't like it. */ if (func == NULL) - array = (char **)malloc(len); + array = (char **)__db_malloc(len); else array = (char **)func(len); if (array == NULL) @@ -399,7 +399,7 @@ __usermem(listp, func) /* NULL-terminate the list. */ *arrayp = NULL; - free(*listp); + __db_free(*listp); *listp = array; return (0); diff --git a/db2/log/log_auto.c b/db2/log/log_auto.c index ea88a7bff9..d5dbfe1f5f 100644 --- a/db2/log/log_auto.c +++ b/db2/log/log_auto.c @@ -53,7 +53,7 @@ int __log_register_log(logp, txnid, ret_lsnp, flags, + sizeof(u_int32_t) + (uid == NULL ? 0 : uid->size) + sizeof(id) + sizeof(ftype); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -94,7 +94,7 @@ int __log_register_log(logp, txnid, ret_lsnp, flags, ret = __log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -151,7 +151,7 @@ __log_register_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tid: %lu\n", (u_long)argp->id); printf("\tftype: 0x%lx\n", (u_long)argp->ftype); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -166,7 +166,7 @@ __log_register_read(recbuf, argpp) __log_register_args *argp; u_int8_t *bp; - argp = (__log_register_args *)malloc(sizeof(__log_register_args) + + argp = (__log_register_args *)__db_malloc(sizeof(__log_register_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -223,7 +223,7 @@ int __log_unregister_log(logp, txnid, ret_lsnp, flags, lsnp = &txnid->last_lsn; logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + sizeof(id); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -242,7 +242,7 @@ int __log_unregister_log(logp, txnid, ret_lsnp, flags, ret = __log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -280,7 +280,7 @@ __log_unregister_print(notused1, dbtp, lsnp, notused3, notused4) (u_long)argp->prev_lsn.offset); printf("\tid: %lu\n", (u_long)argp->id); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -295,7 +295,7 @@ __log_unregister_read(recbuf, argpp) __log_unregister_args *argp; u_int8_t *bp; - argp = (__log_unregister_args *)malloc(sizeof(__log_unregister_args) + + argp = (__log_unregister_args *)__db_malloc(sizeof(__log_unregister_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); diff --git a/db2/log/log_findckp.c b/db2/log/log_findckp.c index df75e20e03..115a00e8aa 100644 --- a/db2/log/log_findckp.c +++ b/db2/log/log_findckp.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_findckp.c 10.11 (Sleepycat) 8/27/97"; +static const char sccsid[] = "@(#)log_findckp.c 10.12 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -77,13 +77,13 @@ __log_findckp(lp, lsnp) next_lsn = last_ckp; do { if (F_ISSET(lp, DB_AM_THREAD)) - free(data.data); + __db_free(data.data); if ((ret = log_get(lp, &next_lsn, &data, DB_SET)) != 0) return (ret); if ((ret = __txn_ckp_read(data.data, &ckp_args)) != 0) { if (F_ISSET(lp, DB_AM_THREAD)) - free(data.data); + __db_free(data.data); return (ret); } if (IS_ZERO_LSN(ckp_lsn)) @@ -100,12 +100,12 @@ __log_findckp(lp, lsnp) } last_ckp = next_lsn; next_lsn = ckp_args->last_ckp; - free(ckp_args); + __db_free(ckp_args); } while (!IS_ZERO_LSN(next_lsn) && log_compare(&last_ckp, &ckp_lsn) > 0); if (F_ISSET(lp, DB_AM_THREAD)) - free(data.data); + __db_free(data.data); /* * At this point, either, next_lsn is ZERO or ckp_lsn is the @@ -118,7 +118,7 @@ __log_findckp(lp, lsnp) if ((ret = log_get(lp, &last_ckp, &data, DB_FIRST)) != 0) return (ret); if (F_ISSET(lp, DB_AM_THREAD)) - free(data.data); + __db_free(data.data); } *lsnp = last_ckp; diff --git a/db2/log/log_get.c b/db2/log/log_get.c index 3f6df6c33c..ed35d57f82 100644 --- a/db2/log/log_get.c +++ b/db2/log/log_get.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_get.c 10.19 (Sleepycat) 9/23/97"; +static const char sccsid[] = "@(#)log_get.c 10.21 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -170,7 +170,8 @@ __log_get(dblp, alsn, dbt, flags, silent) if (!IS_ZERO_LSN(nlsn)) { /* If at start-of-file, move to the previous file. */ if (nlsn.offset == 0) { - if (nlsn.file == 1) + if (nlsn.file == 1 || + __log_valid(dblp, NULL, nlsn.file - 1) != 0) return (DB_NOTFOUND); --nlsn.file; @@ -215,27 +216,21 @@ retry: goto cksum; } - /* - * Move the file descriptor to the page that has the hdr. We dealt - * with moving to a previous log file in the flags switch code, but - * we don't yet know if we'll need to move to a subsequent file. - * - * Acquire a file descriptor. - */ + /* Acquire a file descriptor. */ if (dblp->c_fd == -1) { if ((ret = __log_name(dblp, nlsn.file, &np)) != 0) goto err1; - if ((ret = __db_fdopen(np, DB_RDONLY | DB_SEQUENTIAL, + if ((ret = __db_open(np, DB_RDONLY | DB_SEQUENTIAL, DB_RDONLY | DB_SEQUENTIAL, 0, &dblp->c_fd)) != 0) { fail = np; goto err1; } - free(np); + __db_free(np); np = NULL; } /* Seek to the header offset and read the header. */ - if ((ret = __db_lseek(dblp->c_fd, 0, 0, nlsn.offset, SEEK_SET)) != 0) { + if ((ret = __db_seek(dblp->c_fd, 0, 0, nlsn.offset, SEEK_SET)) != 0) { fail = "seek"; goto err1; } @@ -289,7 +284,7 @@ retry: } /* Allocate temporary memory to hold the record. */ - if ((tbuf = (char *)malloc(len)) == NULL) { + if ((tbuf = (char *)__db_malloc(len)) == NULL) { ret = ENOMEM; goto err1; } @@ -318,7 +313,7 @@ retry: if ((ret = __db_retcopy(dbt, tbuf, len, &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0) goto err1; - free(tbuf); + __db_free(tbuf); tbuf = NULL; cksum: if (hdr.cksum != __ham_func4(dbt->data, dbt->size)) { @@ -349,8 +344,8 @@ err1: if (!silent) __db_err(dblp->dbenv, "log_get: %s: %s", fail, strerror(ret)); err2: if (np != NULL) - free(np); + __db_free(np); if (tbuf != NULL) - free(tbuf); + __db_free(tbuf); return (ret); } diff --git a/db2/log/log_put.c b/db2/log/log_put.c index 225595f33e..92d9563301 100644 --- a/db2/log/log_put.c +++ b/db2/log/log_put.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_put.c 10.14 (Sleepycat) 9/23/97"; +static const char sccsid[] = "@(#)log_put.c 10.20 (Sleepycat) 11/2/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -29,9 +29,10 @@ static const char sccsid[] = "@(#)log_put.c 10.14 (Sleepycat) 9/23/97"; #include "common_ext.h" static int __log_fill __P((DB_LOG *, void *, u_int32_t)); +static int __log_flush __P((DB_LOG *, const DB_LSN *)); static int __log_newfd __P((DB_LOG *)); -static int __log_write __P((DB_LOG *, void *, u_int32_t)); static int __log_putr __P((DB_LOG *, const DBT *, u_int32_t)); +static int __log_write __P((DB_LOG *, void *, u_int32_t)); /* * log_put -- @@ -63,11 +64,8 @@ log_put(dblp, lsn, dbt, flags) } LOCK_LOGREGION(dblp); - ret = __log_put(dblp, lsn, dbt, flags); - UNLOCK_LOGREGION(dblp); - return (ret); } @@ -102,14 +100,10 @@ __log_put(dblp, lsn, dbt, flags) "log_put: record larger than maximum file size"); return (EINVAL); } - if (lp->b_off != 0) { - if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0) - return (ret); - if ((ret = __db_fsync(dblp->lfd)) != 0) - return (ret); - lp->s_lsn.file = lp->lsn.file; - lp->s_lsn.offset = lp->lsn.offset - 1; - } + + /* Flush the log. */ + if ((ret = __log_flush(dblp, NULL)) != 0) + return (ret); /* * Save the last known offset from the previous file, we'll @@ -117,9 +111,15 @@ __log_put(dblp, lsn, dbt, flags) */ lastoff = lp->lsn.offset; + /* Point the current LSN to the new file. */ ++lp->lsn.file; lp->lsn.offset = 0; + + /* Reset the file write offset. */ lp->w_off = 0; + + /* Reset the first-unwritten LSN for the buffer. */ + lp->uw_lsn = lp->lsn; } else lastoff = 0; @@ -149,56 +149,54 @@ __log_put(dblp, lsn, dbt, flags) * Put out the checkpoint record (above). * Save the LSN of the checkpoint in the shared region. * Append the set of file name information into the log. - * Flush the current buffer contents to disk. - * Sync the log to disk. - * Save the time the checkpoint was written. - * Reset the bytes written since the last checkpoint. */ if (flags == DB_CHECKPOINT) { lp->c_lsn = *lsn; for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname); fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) { - t.data = ADDR(dblp, fnp->name_off); + t.data = R_ADDR(dblp, fnp->name_off); t.size = strlen(t.data) + 1; memset(&fid_dbt, 0, sizeof(fid_dbt)); - fid_dbt.data = ADDR(dblp, fnp->fileid_off); + fid_dbt.data = R_ADDR(dblp, fnp->fileid_off); fid_dbt.size = DB_FILE_ID_LEN; if ((ret = __log_register_log(dblp, NULL, &r_unused, 0, &t, &fid_dbt, fnp->id, fnp->s_type)) != 0) return (ret); } - if (lp->b_off != 0 && - (ret = __log_write(dblp, lp->buf, lp->b_off)) != 0) - return (ret); - (void)time(&lp->chkpt); - lp->written = 0; - - if ((ret = __db_fsync(dblp->lfd)) != 0) - return (ret); - lp->s_lsn.file = lp->lsn.file; - lp->s_lsn.offset = lp->lsn.offset - 1; } - /* We always flush on a checkpoint. */ - if (flags == DB_FLUSH || flags == DB_CHECKPOINT) { - if (lp->b_off != 0 && - (ret = __log_write(dblp, lp->buf, lp->b_off)) != 0) + /* + * On a checkpoint or when flush is requested, we: + * Flush the current buffer contents to disk. + * Sync the log to disk. + */ + if (flags == DB_FLUSH || flags == DB_CHECKPOINT) + if ((ret = __log_flush(dblp, NULL)) != 0) return (ret); - if ((ret = __db_fsync(dblp->lfd)) != 0) - return (ret); - lp->s_lsn.file = lp->lsn.file; - lp->s_lsn.offset = lp->lsn.offset - 1; + /* + * On a checkpoint, we: + * Save the time the checkpoint was written. + * Reset the bytes written since the last checkpoint. + */ + if (flags == DB_CHECKPOINT) { + (void)time(&lp->chkpt); + lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0; } /* - * If we just did I/O, i.e., this LSN could have spanned the start of - * the in-core buffer, we remember it so that we can flush correctly - * during a sync. + * When an application calls the log_flush routine, we need to figure + * out if the current buffer needs to be flushed. The problem is that + * if a record spans buffers, it's possible for the record continued + * in the current buffer to have begun in a previous buffer. Each time + * we write a buffer, we update the first-unwritten LSN to point to the + * first LSN after that written buffer. If we have a spanning record, + * correct that value to be the LSN that started it all, here. */ if (lsn->offset < lp->w_off && lsn->offset + lp->len > lp->w_off) - lp->span_lsn = *lsn; + lp->uw_lsn = *lsn; + return (0); } @@ -248,6 +246,24 @@ log_flush(dblp, lsn) DB_LOG *dblp; const DB_LSN *lsn; { + int ret; + + LOCK_LOGREGION(dblp); + ret = __log_flush(dblp, lsn); + UNLOCK_LOGREGION(dblp); + return (ret); +} + +/* + * __log_flush -- + * Write all records less than or equal to the specified LSN; internal + * version. + */ +static int +__log_flush(dblp, lsn) + DB_LOG *dblp; + const DB_LSN *lsn; +{ DB_LSN t_lsn; LOG *lp; int ret; @@ -255,60 +271,64 @@ log_flush(dblp, lsn) ret = 0; lp = dblp->lp; - LOCK_LOGREGION(dblp); - - /* If no LSN specified, flush the entire log. */ + /* + * If no LSN specified, flush the entire log by setting the flush LSN + * to the last LSN written in the log. Otherwise, check that the LSN + * isn't a non-existent record for the log. + */ if (lsn == NULL) { t_lsn.file = lp->lsn.file; t_lsn.offset = lp->lsn.offset - lp->len; lsn = &t_lsn; - } - - /* If it's a non-existent record, it's an error. */ - if (lsn->file > lp->lsn.file || - (lsn->file == lp->lsn.file && lsn->offset > lp->lsn.offset)) { - __db_err(dblp->dbenv, "log_flush: LSN past current end-of-log"); - ret = EINVAL; - goto ret1; - } - - /* - * If it's from a previous file, we're done because we sync each - * file when we move to a new one. - */ - if (lsn->file < lp->lsn.file) - goto ret1; + } else + if (lsn->file > lp->lsn.file || + (lsn->file == lp->lsn.file && + lsn->offset > lp->lsn.offset - lp->len)) { + __db_err(dblp->dbenv, + "log_flush: LSN past current end-of-log"); + return (EINVAL); + } /* - * If it's less than the last-sync'd offset, we've already sync'd - * this LSN. + * If the LSN is less than the last-sync'd LSN, we're done. Note, + * the last-sync LSN saved in s_lsn is the LSN of the first byte + * that has not yet been written to disk, so the test is <, not <=. */ - if (lsn->offset <= lp->s_lsn.offset) - goto ret1; + if (lsn->file < lp->s_lsn.file || + (lsn->file == lp->s_lsn.file && lsn->offset < lp->s_lsn.offset)) + return (0); /* * We may need to write the current buffer. We have to write the - * current buffer if the sync LSN is greater than or equal to the - * saved spanning-LSN. + * current buffer if the flush LSN is greater than or equal to the + * first-unwritten LSN (uw_lsn). If we write the buffer, then we + * update the first-unwritten LSN. */ - if (lsn->file >= lp->span_lsn.file && - lsn->offset >= lp->span_lsn.offset) + if (lp->b_off != 0 && + lsn->file >= lp->uw_lsn.file && lsn->offset >= lp->uw_lsn.offset) if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0) - goto ret1; + return (ret); - /* Acquire a file descriptor if we don't have one. */ - if (dblp->lfname != dblp->lp->lsn.file && - (ret = __log_newfd(dblp)) != 0) - goto ret1; + /* + * It's possible that this thread may never have written to this log + * file. Acquire a file descriptor if we don't already have one. + */ + if (dblp->lfname != dblp->lp->lsn.file) + if ((ret = __log_newfd(dblp)) != 0) + return (ret); + /* Sync all writes to disk. */ if ((ret = __db_fsync(dblp->lfd)) != 0) - goto ret1; + return (ret); + ++lp->stat.st_scount; - lp->s_lsn.file = lp->lsn.file; - lp->s_lsn.offset = lsn->offset; + /* + * Set the last-synced LSN, the first LSN after the last record + * that we know is on disk. + */ + lp->s_lsn = lp->uw_lsn; -ret1: UNLOCK_LOGREGION(dblp); - return (ret); + return (0); } /* @@ -385,17 +405,32 @@ __log_write(dblp, addr, len) * Seek to the offset in the file (someone may have written it * since we last did). */ - if ((ret = __db_lseek(dblp->lfd, 0, 0, lp->w_off, SEEK_SET)) != 0) + if ((ret = __db_seek(dblp->lfd, 0, 0, lp->w_off, SEEK_SET)) != 0) return (ret); if ((ret = __db_write(dblp->lfd, addr, len, &nw)) != 0) return (ret); if (nw != (int32_t)len) return (EIO); - /* Update the seek offset and reset the buffer offset. */ + /* + * Reset the buffer offset, update the seek offset, and update the + * first-unwritten LSN. + */ lp->b_off = 0; lp->w_off += len; - lp->written += len; + lp->uw_lsn.file = lp->lsn.file; + lp->uw_lsn.offset = lp->w_off; + + /* Update written statistics. */ + if ((lp->stat.st_w_bytes += len) >= MEGABYTE) { + lp->stat.st_w_bytes -= MEGABYTE; + ++lp->stat.st_w_mbytes; + } + if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) { + lp->stat.st_wc_bytes -= MEGABYTE; + ++lp->stat.st_wc_mbytes; + } + ++lp->stat.st_wcount; return (0); } @@ -415,11 +450,8 @@ log_file(dblp, lsn, namep, len) char *p; LOCK_LOGREGION(dblp); - ret = __log_name(dblp, lsn->file, &p); - UNLOCK_LOGREGION(dblp); - if (ret != 0) return (ret); @@ -429,7 +461,7 @@ log_file(dblp, lsn, namep, len) return (ENOMEM); } (void)strcpy(namep, p); - free(p); + __db_free(p); return (0); } @@ -455,7 +487,7 @@ __log_newfd(dblp) dblp->lfname = dblp->lp->lsn.file; if ((ret = __log_name(dblp, dblp->lfname, &p)) != 0) return (ret); - if ((ret = __db_fdopen(p, + if ((ret = __db_open(p, DB_CREATE | DB_SEQUENTIAL, DB_CREATE | DB_SEQUENTIAL, dblp->lp->persist.mode, &dblp->lfd)) != 0) @@ -472,14 +504,14 @@ __log_newfd(dblp) * PUBLIC: int __log_name __P((DB_LOG *, int, char **)); */ int -__log_name(dblp, fileno, namep) +__log_name(dblp, filenumber, namep) DB_LOG *dblp; char **namep; - int fileno; + int filenumber; { char name[sizeof(LFNAME) + 10]; - (void)snprintf(name, sizeof(name), LFNAME, fileno); + (void)snprintf(name, sizeof(name), LFNAME, filenumber); return (__db_appname(dblp->dbenv, DB_APP_LOG, dblp->dir, name, NULL, namep)); } diff --git a/db2/log/log_rec.c b/db2/log/log_rec.c index f49a7f16ea..92b8203990 100644 --- a/db2/log/log_rec.c +++ b/db2/log/log_rec.c @@ -40,7 +40,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_rec.c 10.13 (Sleepycat) 8/27/97"; +static const char sccsid[] = "@(#)log_rec.c 10.14 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -99,7 +99,7 @@ __log_register_recover(logp, dbtp, lsnp, redo, info) out: F_CLR(logp, DB_AM_RECOVER); if (argp != NULL) - free(argp); + __db_free(argp); return (ret); } @@ -150,7 +150,7 @@ __log_unregister_recover(logp, dbtp, lsnp, redo, info) out: F_CLR(logp, DB_AM_RECOVER); if (argp != NULL) - free(argp); + __db_free(argp); return (ret); } @@ -227,14 +227,14 @@ __log_add_logid(logp, dbp, ndx) */ if (logp->dbentry_cnt <= ndx) { if (logp->dbentry_cnt == 0) { - logp->dbentry = - (DB_ENTRY *)malloc(DB_GROW_SIZE * sizeof(DB_ENTRY)); + logp->dbentry = (DB_ENTRY *) + __db_malloc(DB_GROW_SIZE * sizeof(DB_ENTRY)); if (logp->dbentry == NULL) { ret = ENOMEM; goto err; } } else { - temp_entryp = (DB_ENTRY *)realloc(logp->dbentry, + temp_entryp = (DB_ENTRY *)__db_realloc(logp->dbentry, (DB_GROW_SIZE + logp->dbentry_cnt) * sizeof(DB_ENTRY)); if (temp_entryp == NULL) { diff --git a/db2/log/log_register.c b/db2/log/log_register.c index 859b1e5bcb..2dab361616 100644 --- a/db2/log/log_register.c +++ b/db2/log/log_register.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)log_register.c 10.11 (Sleepycat) 9/15/97"; +static const char sccsid[] = "@(#)log_register.c 10.12 (Sleepycat) 9/29/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -72,7 +72,7 @@ log_register(dblp, dbp, name, type, idp) if (fid <= fnp->id) fid = fnp->id + 1; if (!memcmp(dbp->lock.fileid, - ADDR(dblp, fnp->fileid_off), DB_FILE_ID_LEN)) { + R_ADDR(dblp, fnp->fileid_off), DB_FILE_ID_LEN)) { ++fnp->ref; fid = fnp->id; if (!F_ISSET(dblp, DB_AM_RECOVER) && @@ -95,13 +95,13 @@ log_register(dblp, dbp, name, type, idp) * XXX Now that uids are fixed size, we can put them in the fnp * structure. */ - fnp->fileid_off = OFFSET(dblp, fidp); + fnp->fileid_off = R_OFFSET(dblp, fidp); memcpy(fidp, dbp->lock.fileid, DB_FILE_ID_LEN); len = strlen(name) + 1; if ((ret = __db_shalloc(dblp->addr, len, 0, &namep)) != 0) goto err; - fnp->name_off = OFFSET(dblp, namep); + fnp->name_off = R_OFFSET(dblp, namep); memcpy(namep, name, len); SH_TAILQ_INSERT_HEAD(&dblp->lp->fq, fnp, q, __fname); @@ -185,8 +185,8 @@ log_unregister(dblp, fid) } /* Free the unique file information, name and structure. */ - __db_shalloc_free(dblp->addr, ADDR(dblp, fnp->fileid_off)); - __db_shalloc_free(dblp->addr, ADDR(dblp, fnp->name_off)); + __db_shalloc_free(dblp->addr, R_ADDR(dblp, fnp->fileid_off)); + __db_shalloc_free(dblp->addr, R_ADDR(dblp, fnp->name_off)); SH_TAILQ_REMOVE(&dblp->lp->fq, fnp, q, __fname); __db_shalloc_free(dblp->addr, fnp); diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c index fb6bc96ae7..a707603eec 100644 --- a/db2/mp/mp_bh.c +++ b/db2/mp/mp_bh.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_bh.c 10.16 (Sleepycat) 9/23/97"; +static const char sccsid[] = "@(#)mp_bh.c 10.21 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -94,10 +94,10 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep) * files that we have previously tried (and failed) to open. */ dbt.size = mfp->pgcookie_len; - dbt.data = ADDR(dbmp, mfp->pgcookie_off); - if (__memp_fopen(dbmp, ADDR(dbmp, mfp->path_off), + dbt.data = R_ADDR(dbmp, mfp->pgcookie_off); + if (__memp_fopen(dbmp, R_ADDR(dbmp, mfp->path_off), mfp->ftype, 0, 0, mfp->stat.st_pagesize, - mfp->lsn_off, &dbt, ADDR(dbmp, mfp->fileid_off), 0, &dbmfp) != 0) + mfp->lsn_off, &dbt, R_ADDR(dbmp, mfp->fileid_off), 0, &dbmfp) != 0) return (0); found: return (__memp_pgwrite(dbmfp, bhp, restartp, wrotep)); @@ -137,7 +137,7 @@ __memp_pgread(dbmfp, bhp, can_create) ret = 0; LOCKHANDLE(dbmp, dbmfp->mutexp); if (dbmfp->fd == -1 || (ret = - __db_lseek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) { + __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) { if (!can_create) { if (dbmfp->fd == -1) ret = EINVAL; @@ -230,6 +230,7 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep) dbmp = dbmfp->dbmp; dbenv = dbmp->dbenv; + mp = dbmp->mp; mfp = dbmfp->mfp; if (restartp != NULL) @@ -277,8 +278,7 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep) } /* Write the page out. */ - if ((ret = - __db_lseek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) + if ((ret = __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) fail = "seek"; else if ((ret = __db_write(dbmfp->fd, bhp->buf, pagesize, &nw)) != 0) fail = "write"; @@ -309,15 +309,23 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep) /* Clean up the flags based on a successful write. */ F_SET(bhp, BH_CALLPGIN); F_CLR(bhp, BH_DIRTY | BH_LOCKED); + + ++mp->stat.st_page_clean; + --mp->stat.st_page_dirty; + UNLOCKBUFFER(dbmp, bhp); /* - * If we wrote a buffer which a checkpoint is waiting for, update + * If we write a buffer for which a checkpoint is waiting, update * the count of pending buffers (both in the mpool as a whole and * for this file). If the count for this file goes to zero, flush * the writes. * * XXX: + * Don't lock the region around the sync, fsync(2) has no atomicity + * issues. + * + * XXX: * We ignore errors from the sync -- it makes no sense to return an * error to the calling process, so set a flag causing the sync to * be retried later. @@ -325,21 +333,15 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep) * If the buffer we wrote has a LSN larger than the current largest * we've written for this checkpoint, update the saved value. */ - mp = dbmp->mp; if (F_ISSET(bhp, BH_WRITE)) { if (log_compare(&lsn, &mp->lsn) > 0) mp->lsn = lsn; F_CLR(bhp, BH_WRITE); --mp->lsn_cnt; - if (--mfp->lsn_cnt == 0) { - /* - * Don't lock -- there are no atomicity issues for - * fsync(2). - */ - if (__db_fsync(dbmfp->fd) != 0) - F_SET(mp, MP_LSN_RETRY); - } + + if (--mfp->lsn_cnt == 0 && __db_fsync(dbmfp->fd) != 0) + F_SET(mp, MP_LSN_RETRY); } /* Update I/O statistics. */ @@ -391,7 +393,7 @@ __memp_pg(dbmfp, bhp, is_pgin) dbtp = NULL; else { dbt.size = mfp->pgcookie_len; - dbt.data = ADDR(dbmp, mfp->pgcookie_off); + dbt.data = R_ADDR(dbmp, mfp->pgcookie_off); dbtp = &dbt; } UNLOCKHANDLE(dbmp, dbmp->mutexp); @@ -433,19 +435,21 @@ __memp_bhfree(dbmp, mfp, bhp, free_mem) { size_t off; - /* Delete the buffer header from the MPOOL hash list. */ - off = BUCKET(dbmp->mp, OFFSET(dbmp, mfp), bhp->pgno); - SH_TAILQ_REMOVE(&dbmp->htab[off], bhp, mq, __bh); + /* Delete the buffer header from the hash bucket queue. */ + off = BUCKET(dbmp->mp, R_OFFSET(dbmp, mfp), bhp->pgno); + SH_TAILQ_REMOVE(&dbmp->htab[off], bhp, hq, __bh); - /* Delete the buffer header from the LRU chain. */ + /* Delete the buffer header from the LRU queue. */ SH_TAILQ_REMOVE(&dbmp->mp->bhq, bhp, q, __bh); /* * If we're not reusing it immediately, free the buffer header * and data for real. */ - if (free_mem) + if (free_mem) { __db_shalloc_free(dbmp->addr, bhp); + --dbmp->mp->stat.st_page_clean; + } } /* @@ -474,13 +478,13 @@ __memp_upgrade(dbmp, dbmfp, mfp) return (1); /* Try the open. */ - if (__db_fdopen(ADDR(dbmp, mfp->path_off), 0, 0, 0, &fd) != 0) { + if (__db_open(R_ADDR(dbmp, mfp->path_off), 0, 0, 0, &fd) != 0) { F_SET(dbmfp, MP_UPGRADE_FAIL); return (1); } /* Swap the descriptors and set the upgrade flag. */ - (void)close(dbmfp->fd); + (void)__db_close(dbmfp->fd); dbmfp->fd = fd; F_SET(dbmfp, MP_UPGRADE); diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c index a0364e92c3..3f99e60505 100644 --- a/db2/mp/mp_fget.c +++ b/db2/mp/mp_fget.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fget.c 10.25 (Sleepycat) 9/23/97"; +static const char sccsid[] = "@(#)mp_fget.c 10.30 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -87,14 +87,14 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) * We want to switch threads as often as possible. Sleep every time * we get a new page to make it more likely. */ - if (__sleep_on_every_page_get && (dbmp->dbenv == NULL || - dbmp->dbenv->db_yield == NULL || dbmp->dbenv->db_yield() != 0)) + if (__sleep_on_every_page_get && + (__db_yield == NULL || __db_yield() != 0)) __db_sleep(0, 1); #endif mp = dbmp->mp; mfp = dbmfp->mfp; - mf_offset = OFFSET(dbmp, mfp); + mf_offset = R_OFFSET(dbmp, mfp); addr = NULL; bhp = NULL; b_incr = b_inserted = readonly_alloc = ret = 0; @@ -137,7 +137,7 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) } } if (!readonly_alloc) { - addr = ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize); + addr = R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize); ++mp->stat.st_map; ++mfp->stat.st_map; @@ -159,9 +159,12 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) */ if (dbmfp->fd == -1) size = 0; - else if ((ret = __db_stat(dbmp->dbenv, - dbmfp->path, dbmfp->fd, &size, NULL)) != 0) + else if ((ret = + __db_ioinfo(dbmfp->path, dbmfp->fd, &size, NULL)) != 0) { + __db_err(dbmp->dbenv, + "%s: %s", dbmfp->path, strerror(ret)); goto err; + } *pgnoaddr = size == 0 ? 0 : (size - 1) / mfp->stat.st_pagesize; @@ -190,26 +193,29 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) goto found; } - /* If we haven't checked the BH list yet, do the search. */ + /* If we haven't checked the BH hash bucket queue, do the search. */ if (!LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) { - ++mp->stat.st_hash_searches; bucket = BUCKET(mp, mf_offset, *pgnoaddr); for (cnt = 0, bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, mq, __bh)) { + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) { ++cnt; if (bhp->pgno == *pgnoaddr && bhp->mf_offset == mf_offset) { addr = bhp->buf; + ++mp->stat.st_hash_searches; if (cnt > mp->stat.st_hash_longest) mp->stat.st_hash_longest = cnt; mp->stat.st_hash_examined += cnt; goto found; } } - if (cnt > mp->stat.st_hash_longest) - mp->stat.st_hash_longest = cnt; - mp->stat.st_hash_examined += cnt; + if (cnt != 0) { + ++mp->stat.st_hash_searches; + if (cnt > mp->stat.st_hash_longest) + mp->stat.st_hash_longest = cnt; + mp->stat.st_hash_examined += cnt; + } } /* @@ -239,8 +245,9 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp) * our region lock without screwing up the world. */ bucket = BUCKET(mp, mf_offset, *pgnoaddr); - SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, mq, __bh); + SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, hq, __bh); SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q); + ++mp->stat.st_page_clean; b_inserted = 1; /* Set the page number, and associated MPOOLFILE. */ @@ -281,7 +288,8 @@ reread: if ((ret = __memp_pgread(dbmfp, * !!! * The __memp_pgread call discarded and reacquired the region * lock. Because the buffer reference count was incremented - * before the region lock was discarded the buffer didn't move. + * before the region lock was discarded the buffer can't move + * and its contents can't change. */ ++mp->stat.st_cache_miss; ++mfp->stat.st_cache_miss; @@ -305,7 +313,8 @@ found: /* Increment the reference count. */ * BH_LOCKED -- * I/O in progress, wait for it to finish. Because the buffer * reference count was incremented before the region lock was - * discarded we know the buffer didn't move. + * discarded we know the buffer can't move and its contents + * can't change. */ if (F_ISSET(bhp, BH_LOCKED)) { UNLOCKREGION(dbmp); diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c index 5ab807701c..de59c9ea9b 100644 --- a/db2/mp/mp_fopen.c +++ b/db2/mp/mp_fopen.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fopen.c 10.27 (Sleepycat) 9/23/97"; +static const char sccsid[] = "@(#)mp_fopen.c 10.30 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -92,7 +92,7 @@ __memp_fopen(dbmp, path, /* Allocate and initialize the per-process structure. */ if ((dbmfp = - (DB_MPOOLFILE *)calloc(1, sizeof(DB_MPOOLFILE))) == NULL) { + (DB_MPOOLFILE *)__db_calloc(1, sizeof(DB_MPOOLFILE))) == NULL) { __db_err(dbenv, "%s: %s", path == NULL ? TEMPORARY : path, strerror(ENOMEM)); return (ENOMEM); @@ -120,7 +120,7 @@ __memp_fopen(dbmp, path, /* Open the file. */ - if ((ret = __db_fdopen(dbmfp->path, + if ((ret = __db_open(dbmfp->path, LF_ISSET(DB_CREATE | DB_RDONLY), DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) { __db_err(dbenv, "%s: %s", dbmfp->path, strerror(ret)); @@ -128,9 +128,11 @@ __memp_fopen(dbmp, path, } /* Don't permit files that aren't a multiple of the pagesize. */ - if ((ret = __db_stat(dbenv, - dbmfp->path, dbmfp->fd, &size, NULL)) != 0) + if ((ret = + __db_ioinfo(dbmfp->path, dbmfp->fd, &size, NULL)) != 0) { + __db_err(dbenv, "%s: %s", dbmfp->path, strerror(ret)); goto err; + } if (size % pagesize) { __db_err(dbenv, "%s: file size not a multiple of the pagesize", @@ -198,7 +200,7 @@ __memp_fopen(dbmp, path, dbmfp->addr = NULL; if (mfp->can_mmap) { dbmfp->len = size; - if (__db_mmap(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) { + if (__db_map(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) { mfp->can_mmap = 0; dbmfp->addr = NULL; } @@ -264,7 +266,7 @@ __memp_mf_open(dbmp, dbmfp, for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) if (!memcmp(fileid, - ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) { + R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) { if (ftype != mfp->ftype || pagesize != mfp->stat.st_pagesize) { __db_err(dbmp->dbenv, @@ -325,10 +327,10 @@ alloc: if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0) if (0) { err: if (mfp->path_off != 0) __db_shalloc_free(dbmp->addr, - ADDR(dbmp, mfp->path_off)); + R_ADDR(dbmp, mfp->path_off)); if (!istemp) __db_shalloc_free(dbmp->addr, - ADDR(dbmp, mfp->fileid_off)); + R_ADDR(dbmp, mfp->fileid_off)); if (mfp != NULL) __db_shalloc_free(dbmp->addr, mfp); mfp = NULL; @@ -367,7 +369,7 @@ memp_fclose(dbmfp) /* Discard any mmap information. */ if (dbmfp->addr != NULL && - (ret = __db_munmap(dbmfp->addr, dbmfp->len)) != 0) + (ret = __db_unmap(dbmfp->addr, dbmfp->len)) != 0) __db_err(dbmp->dbenv, "%s: %s", dbmfp->path, strerror(ret)); /* Close the file; temporary files may not yet have been created. */ @@ -423,7 +425,7 @@ __memp_mf_close(dbmp, dbmfp) * fairly expensive to reintegrate the buffers back into the region for * no purpose. */ - mf_offset = OFFSET(dbmp, mfp); + mf_offset = R_OFFSET(dbmp, mfp); for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) { nbhp = SH_TAILQ_NEXT(bhp, q, __bh); @@ -436,6 +438,10 @@ __memp_mf_close(dbmp, dbmfp) #endif if (bhp->mf_offset == mf_offset) { + if (F_ISSET(bhp, BH_DIRTY)) { + ++mp->stat.st_page_clean; + --mp->stat.st_page_dirty; + } __memp_bhfree(dbmp, mfp, bhp, 0); SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh); } @@ -446,11 +452,11 @@ __memp_mf_close(dbmp, dbmfp) /* Free the space. */ __db_shalloc_free(dbmp->addr, mfp); - __db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->path_off)); + __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off)); if (mfp->fileid_off != 0) - __db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->fileid_off)); + __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off)); if (mfp->pgcookie_off != 0) - __db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->pgcookie_off)); + __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off)); ret1: UNLOCKREGION(dbmp); return (0); diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c index 9ea7cd9d0d..892f179d3a 100644 --- a/db2/mp/mp_fput.c +++ b/db2/mp/mp_fput.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fput.c 10.12 (Sleepycat) 9/23/97"; +static const char sccsid[] = "@(#)mp_fput.c 10.14 (Sleepycat) 10/5/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -35,10 +35,12 @@ memp_fput(dbmfp, pgaddr, flags) { BH *bhp; DB_MPOOL *dbmp; + MPOOL *mp; MPOOLFILE *mfp; int wrote, ret; dbmp = dbmfp->dbmp; + mp = dbmp->mp; /* Validate arguments. */ if (flags) { @@ -82,10 +84,16 @@ memp_fput(dbmfp, pgaddr, flags) LOCKREGION(dbmp); /* Set/clear the page bits. */ - if (LF_ISSET(DB_MPOOL_CLEAN)) + if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) { + ++mp->stat.st_page_clean; + --mp->stat.st_page_dirty; F_CLR(bhp, BH_DIRTY); - if (LF_ISSET(DB_MPOOL_DIRTY)) + } + if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) { + --mp->stat.st_page_clean; + ++mp->stat.st_page_dirty; F_SET(bhp, BH_DIRTY); + } if (LF_ISSET(DB_MPOOL_DISCARD)) F_SET(bhp, BH_DISCARD); @@ -108,11 +116,11 @@ memp_fput(dbmfp, pgaddr, flags) } /* Move the buffer to the head/tail of the LRU chain. */ - SH_TAILQ_REMOVE(&dbmp->mp->bhq, bhp, q, __bh); + SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh); if (F_ISSET(bhp, BH_DISCARD)) - SH_TAILQ_INSERT_HEAD(&dbmp->mp->bhq, bhp, q, __bh); + SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh); else - SH_TAILQ_INSERT_TAIL(&dbmp->mp->bhq, bhp, q); + SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q); /* * If this buffer is scheduled for writing because of a checkpoint, @@ -125,14 +133,14 @@ memp_fput(dbmfp, pgaddr, flags) if (F_ISSET(bhp, BH_DIRTY)) { if (__memp_bhwrite(dbmp, dbmfp->mfp, bhp, NULL, &wrote) != 0 || !wrote) - F_SET(dbmp->mp, MP_LSN_RETRY); + F_SET(mp, MP_LSN_RETRY); } else { F_CLR(bhp, BH_WRITE); - mfp = ADDR(dbmp, bhp->mf_offset); + mfp = R_ADDR(dbmp, bhp->mf_offset); --mfp->lsn_cnt; - --dbmp->mp->lsn_cnt; + --mp->lsn_cnt; } UNLOCKREGION(dbmp); diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c index a3a3dcef9c..a7d2706008 100644 --- a/db2/mp/mp_fset.c +++ b/db2/mp/mp_fset.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_fset.c 10.9 (Sleepycat) 9/20/97"; +static const char sccsid[] = "@(#)mp_fset.c 10.10 (Sleepycat) 10/5/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -34,9 +34,13 @@ memp_fset(dbmfp, pgaddr, flags) { BH *bhp; DB_MPOOL *dbmp; + MPOOL *mp; + MPOOLFILE *mfp; int ret; dbmp = dbmfp->dbmp; + mfp = dbmfp->mfp; + mp = dbmp->mp; /* Validate arguments. */ if (flags != 0) { @@ -60,10 +64,16 @@ memp_fset(dbmfp, pgaddr, flags) LOCKREGION(dbmp); - if (LF_ISSET(DB_MPOOL_DIRTY)) - F_SET(bhp, BH_DIRTY); - if (LF_ISSET(DB_MPOOL_CLEAN)) + if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) { + ++mp->stat.st_page_clean; + --mp->stat.st_page_dirty; F_CLR(bhp, BH_DIRTY); + } + if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) { + --mp->stat.st_page_clean; + ++mp->stat.st_page_dirty; + F_SET(bhp, BH_DIRTY); + } if (LF_ISSET(DB_MPOOL_DISCARD)) F_SET(bhp, BH_DISCARD); diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c index f622b1ed26..4c19739ebd 100644 --- a/db2/mp/mp_open.c +++ b/db2/mp/mp_open.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_open.c 10.13 (Sleepycat) 9/23/97"; +static const char sccsid[] = "@(#)mp_open.c 10.15 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -54,7 +54,7 @@ memp_open(path, flags, mode, dbenv, retp) cachesize = dbenv == NULL ? 0 : dbenv->mp_size; /* Create and initialize the DB_MPOOL structure. */ - if ((dbmp = (DB_MPOOL *)calloc(1, sizeof(DB_MPOOL))) == NULL) + if ((dbmp = (DB_MPOOL *)__db_calloc(1, sizeof(DB_MPOOL))) == NULL) return (ENOMEM); LIST_INIT(&dbmp->dbregq); TAILQ_INIT(&dbmp->dbmfq); @@ -62,8 +62,7 @@ memp_open(path, flags, mode, dbenv, retp) dbmp->dbenv = dbenv; /* Decide if it's possible for anyone else to access the pool. */ - if ((dbenv == NULL && path == NULL) || - (dbenv != NULL && F_ISSET(dbenv, DB_MPOOL_PRIVATE))) + if ((dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE)) F_SET(dbmp, MP_ISPRIVATE); /* @@ -183,7 +182,7 @@ memp_register(dbmp, ftype, pgin, pgout) { DB_MPREG *mpr; - if ((mpr = (DB_MPREG *)malloc(sizeof(DB_MPREG))) == NULL) + if ((mpr = (DB_MPREG *)__db_malloc(sizeof(DB_MPREG))) == NULL) return (ENOMEM); mpr->ftype = ftype; diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c index 7794cfa7f3..01f0920df4 100644 --- a/db2/mp/mp_pr.c +++ b/db2/mp/mp_pr.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_pr.c 10.13 (Sleepycat) 8/27/97"; +static const char sccsid[] = "@(#)mp_pr.c 10.18 (Sleepycat) 11/1/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -53,7 +53,7 @@ memp_stat(dbmp, gspp, fspp, db_malloc) *gspp = NULL; if ((*gspp = db_malloc == NULL ? - (DB_MPOOL_STAT *)malloc(sizeof(**gspp)) : + (DB_MPOOL_STAT *)__db_malloc(sizeof(**gspp)) : (DB_MPOOL_STAT *)db_malloc(sizeof(**gspp))) == NULL) return (ENOMEM); @@ -62,6 +62,10 @@ memp_stat(dbmp, gspp, fspp, db_malloc) /* Copy out the global statistics. */ **gspp = dbmp->mp->stat; (*gspp)->st_hash_buckets = dbmp->mp->htab_buckets; + (*gspp)->st_region_wait = + dbmp->mp->rlayout.lock.mutex_set_wait; + (*gspp)->st_region_nowait = + dbmp->mp->rlayout.lock.mutex_set_nowait; UNLOCKREGION(dbmp); } @@ -85,7 +89,7 @@ memp_stat(dbmp, gspp, fspp, db_malloc) /* Allocate space for the pointers. */ len = (len + 1) * sizeof(DB_MPOOL_FSTAT *); if ((*fspp = db_malloc == NULL ? - (DB_MPOOL_FSTAT **)malloc(len) : + (DB_MPOOL_FSTAT **)__db_malloc(len) : (DB_MPOOL_FSTAT **)db_malloc(len)) == NULL) return (ENOMEM); @@ -96,11 +100,11 @@ memp_stat(dbmp, gspp, fspp, db_malloc) mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); mfp != NULL; ++tfsp, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { - name = ADDR(dbmp, mfp->path_off); + name = R_ADDR(dbmp, mfp->path_off); nlen = strlen(name); len = sizeof(DB_MPOOL_FSTAT) + nlen + 1; if ((*tfsp = db_malloc == NULL ? - (DB_MPOOL_FSTAT *)malloc(len) : + (DB_MPOOL_FSTAT *)__db_malloc(len) : (DB_MPOOL_FSTAT *)db_malloc(len)) == NULL) return (ENOMEM); **tfsp = mfp->stat; @@ -200,18 +204,19 @@ __memp_pmp(fp, dbmp, mp, data) (void)fprintf(fp, "references: %lu; cachesize: %lu\n", (u_long)mp->rlayout.refcnt, (u_long)mp->stat.st_cachesize); (void)fprintf(fp, - " %lu pages created\n", mp->stat.st_page_create); + " %lu pages created\n", (u_long)mp->stat.st_page_create); (void)fprintf(fp, - " %lu mmap pages returned\n", mp->stat.st_map); + " %lu mmap pages returned\n", (u_long)mp->stat.st_map); (void)fprintf(fp, " %lu I/O's (%lu read, %lu written)\n", - mp->stat.st_page_in + mp->stat.st_page_out, - mp->stat.st_page_in, mp->stat.st_page_out); + (u_long)mp->stat.st_page_in + mp->stat.st_page_out, + (u_long)mp->stat.st_page_in, (u_long)mp->stat.st_page_out); if (mp->stat.st_cache_hit + mp->stat.st_cache_miss != 0) (void)fprintf(fp, " %.0f%% cache hit rate (%lu hit, %lu miss)\n", ((double)mp->stat.st_cache_hit / (mp->stat.st_cache_hit + mp->stat.st_cache_miss)) * 100, - mp->stat.st_cache_hit, mp->stat.st_cache_miss); + (u_long)mp->stat.st_cache_hit, + (u_long)mp->stat.st_cache_miss); /* Display the MPOOLFILE structures. */ for (cnt = 0, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); @@ -230,17 +235,18 @@ __memp_pmp(fp, dbmp, mp, data) (void)fprintf(fp, "%s\nHASH table of BH's (%lu buckets):\n", DB_LINE, (u_long)mp->htab_buckets); (void)fprintf(fp, - "longest chain searched %lu\n", mp->stat.st_hash_longest); + "longest chain searched %lu\n", (u_long)mp->stat.st_hash_longest); (void)fprintf(fp, "average chain searched %lu (total/calls: %lu/%lu)\n", - mp->stat.st_hash_examined / + (u_long)mp->stat.st_hash_examined / (mp->stat.st_hash_searches ? mp->stat.st_hash_searches : 1), - mp->stat.st_hash_examined, mp->stat.st_hash_searches); + (u_long)mp->stat.st_hash_examined, + (u_long)mp->stat.st_hash_searches); for (htabp = dbmp->htab, bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) { if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL) (void)fprintf(fp, "%lu:\n", (u_long)bucket); for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, mq, __bh)) + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) __memp_pbh(fp, dbmp, bhp, data); } @@ -249,7 +255,7 @@ __memp_pmp(fp, dbmp, mp, data) for (sep = "\n ", bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh); bhp != NULL; sep = ", ", bhp = SH_TAILQ_NEXT(bhp, q, __bh)) (void)fprintf(fp, "%s%lu/%lu", sep, - (u_long)bhp->pgno, (u_long)OFFSET(dbmp, bhp)); + (u_long)bhp->pgno, (u_long)R_OFFSET(dbmp, bhp)); (void)fprintf(fp, "\n"); } @@ -263,16 +269,18 @@ __memp_pmf(fp, mfp, data) MPOOLFILE *mfp; int data; { - (void)fprintf(fp, " %lu pages created\n", mfp->stat.st_page_create); + (void)fprintf(fp, " %lu pages created\n", + (u_long)mfp->stat.st_page_create); (void)fprintf(fp, " %lu I/O's (%lu read, %lu written)\n", - mfp->stat.st_page_in + mfp->stat.st_page_out, - mfp->stat.st_page_in, mfp->stat.st_page_out); + (u_long)mfp->stat.st_page_in + mfp->stat.st_page_out, + (u_long)mfp->stat.st_page_in, (u_long)mfp->stat.st_page_out); if (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss != 0) (void)fprintf(fp, " %.0f%% cache hit rate (%lu hit, %lu miss)\n", ((double)mfp->stat.st_cache_hit / (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss)) * 100, - mfp->stat.st_cache_hit, mfp->stat.st_cache_miss); + (u_long)mfp->stat.st_cache_hit, + (u_long)mfp->stat.st_cache_miss); if (!data) return; @@ -298,7 +306,7 @@ __memp_pbh(fp, dbmp, bhp, data) return; (void)fprintf(fp, " BH @ %lu (mf: %lu): page %lu; ref %lu", - (u_long)OFFSET(dbmp, bhp), + (u_long)R_OFFSET(dbmp, bhp), (u_long)bhp->mf_offset, (u_long)bhp->pgno, (u_long)bhp->ref); sep = "; "; if (F_ISSET(bhp, BH_DIRTY)) { diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c index a5c52123b9..6b2f93125c 100644 --- a/db2/mp/mp_region.c +++ b/db2/mp/mp_region.c @@ -7,7 +7,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_region.c 10.11 (Sleepycat) 8/2/97"; +static const char sccsid[] = "@(#)mp_region.c 10.16 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -51,7 +51,7 @@ __memp_ralloc(dbmp, len, offsetp, retp) nomore = 0; alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) { if (offsetp != NULL) - *offsetp = OFFSET(dbmp, p); + *offsetp = R_OFFSET(dbmp, p); *(void **)retp = p; return (0); } @@ -68,7 +68,7 @@ alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) { if (__db_shsizeof(bhp) == len) { SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh); if (offsetp != NULL) - *offsetp = OFFSET(dbmp, bhp); + *offsetp = R_OFFSET(dbmp, bhp); *(void **)retp = bhp; return (0); } @@ -82,6 +82,7 @@ alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) { SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh); __db_shalloc_free(dbmp->addr, bhp); + --mp->stat.st_page_clean; /* * Retry as soon as we've freed up sufficient space. If we @@ -104,7 +105,7 @@ retry: /* Find a buffer we can flush; pure LRU. */ continue; /* Find the associated MPOOLFILE. */ - mfp = ADDR(dbmp, bhp->mf_offset); + mfp = R_ADDR(dbmp, bhp->mf_offset); /* * Write the page if it's dirty. @@ -135,8 +136,7 @@ retry: /* Find a buffer we can flush; pure LRU. */ else { if (restart) goto retry; - else - continue; + continue; } } else ++mp->stat.st_ro_evict; @@ -150,7 +150,7 @@ retry: /* Find a buffer we can flush; pure LRU. */ __memp_bhfree(dbmp, mfp, bhp, 0); if (offsetp != NULL) - *offsetp = OFFSET(dbmp, bhp); + *offsetp = R_OFFSET(dbmp, bhp); *(void **)retp = bhp; return (0); } @@ -225,9 +225,13 @@ retry: if (LF_ISSET(DB_CREATE)) { * be possible for DB_THREAD to be set if HAVE_SPINLOCKS aren't * defined. */ - if (F_ISSET(dbmp, MP_ISPRIVATE)) - ret = (dbmp->maddr = malloc(rlen)) == NULL ? ENOMEM : 0; - else + if (F_ISSET(dbmp, MP_ISPRIVATE)) { + if ((dbmp->maddr = __db_malloc(rlen)) == NULL) + ret = ENOMEM; + else + ret = __db_rinit(dbmp->dbenv, + dbmp->maddr, 0, rlen, 0); + } else ret = __db_rcreate(dbmp->dbenv, DB_APP_NONE, path, DB_DEFAULT_MPOOL_FILE, mode, rlen, &fd, &dbmp->maddr); @@ -259,7 +263,10 @@ retry: if (LF_ISSET(DB_CREATE)) { 0, &dbmp->htab)) != 0) goto err; __db_hashinit(dbmp->htab, mp->htab_buckets); - mp->htab = OFFSET(dbmp, dbmp->htab); + mp->htab = R_OFFSET(dbmp, dbmp->htab); + + ZERO_LSN(mp->lsn); + mp->lsn_cnt = 0; memset(&mp->stat, 0, sizeof(mp->stat)); mp->stat.st_cachesize = cachesize; @@ -303,7 +310,7 @@ retry: if (LF_ISSET(DB_CREATE)) { * Get the hash table address; it's on the shared page, so we have * to lock first. */ - dbmp->htab = ADDR(dbmp, dbmp->mp->htab); + dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab); dbmp->fd = fd; @@ -333,7 +340,7 @@ __memp_rclose(dbmp) DB_MPOOL *dbmp; { if (F_ISSET(dbmp, MP_ISPRIVATE)) { - free(dbmp->maddr); + __db_free(dbmp->maddr); return (0); } return (__db_rclose(dbmp->dbenv, dbmp->fd, dbmp->maddr)); diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c index 65b2a18267..2f042df9e1 100644 --- a/db2/mp/mp_sync.c +++ b/db2/mp/mp_sync.c @@ -7,13 +7,14 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mp_sync.c 10.9 (Sleepycat) 8/29/97"; +static const char sccsid[] = "@(#)mp_sync.c 10.15 (Sleepycat) 11/1/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> #include <errno.h> +#include <stdlib.h> #include <string.h> #endif @@ -23,6 +24,8 @@ static const char sccsid[] = "@(#)mp_sync.c 10.9 (Sleepycat) 8/29/97"; #include "mp.h" #include "common_ext.h" +static int __bhcmp __P((const void *, const void *)); + /* * memp_sync -- * Mpool sync function. @@ -32,27 +35,40 @@ memp_sync(dbmp, lsnp) DB_MPOOL *dbmp; DB_LSN *lsnp; { - BH *bhp; + BH *bhp, **bharray; DB_ENV *dbenv; MPOOL *mp; MPOOLFILE *mfp; - int can_write, wrote, lsn_cnt, restart, ret; + int ar_cnt, cnt, nalloc, next, notused, ret, wrote; dbenv = dbmp->dbenv; - if (dbmp->dbenv->lg_info == NULL) { - __db_err(dbenv, "memp_sync requires logging"); + if (dbenv->lg_info == NULL) { + __db_err(dbenv, "memp_sync: requires logging"); return (EINVAL); } + /* + * We try and write the buffers in page order so that the underlying + * filesystem doesn't have to seek and can write contiguous blocks, + * plus, we don't want to hold the region lock while we write the + * buffers. Get memory to hold the buffer pointers. Get a good-size + * block, too, because we realloc while holding the region lock if we + * run out. + */ + if ((bharray = + (BH **)__db_malloc((nalloc = 1024) * sizeof(BH *))) == NULL) + return (ENOMEM); + LOCKREGION(dbmp); /* - * If the application is asking about a previous call, and we haven't - * found any buffers that the application holding the pin couldn't - * write, return yes or no based on the current count. Note, if the - * application is asking about a LSN *smaller* than one we've already - * handled, then we return based on the count for that LSN. + * If the application is asking about a previous call to memp_sync(), + * and we haven't found any buffers that the application holding the + * pin couldn't write, return yes or no based on the current count. + * Note, if the application is asking about a LSN *smaller* than one + * we've already handled or are currently handling, then we return a + * result based on the count for the larger LSN. */ mp = dbmp->mp; if (!F_ISSET(mp, MP_LSN_RETRY) && log_compare(lsnp, &mp->lsn) <= 0) { @@ -61,9 +77,7 @@ memp_sync(dbmp, lsnp) ret = 0; } else ret = DB_INCOMPLETE; - - UNLOCKREGION(dbmp); - return (ret); + goto done; } /* Else, it's a new checkpoint. */ @@ -74,7 +88,7 @@ memp_sync(dbmp, lsnp) * for which we were already doing a checkpoint. (BTW, I don't expect * to see multiple LSN's from the same or multiple processes, but You * Just Never Know. Responding as if they all called with the largest - * of the LSNs specified makes everything work. + * of the LSNs specified makes everything work.) * * We don't currently use the LSN we save. We could potentially save * the last-written LSN in each buffer header and use it to determine @@ -93,64 +107,127 @@ memp_sync(dbmp, lsnp) /* * Walk the list of buffers and mark all dirty buffers to be written - * and all pinned buffers to be potentially written. We do this in - * single fell swoop while holding the region locked so that processes - * can't make new buffers dirty, causing us to never finish. Since - * the application may have restarted the sync, clear any BH_WRITE - * flags that appear to be left over. + * and all pinned buffers to be potentially written (we can't know if + * we'll need to write them until the holding process returns them to + * the cache). We do this in one pass while holding the region locked + * so that processes can't make new buffers dirty, causing us to never + * finish. Since the application may have restarted the sync, clear + * any BH_WRITE flags that appear to be left over from previous calls. + * + * Keep a count of the total number of buffers we need to write in + * MPOOL->lsn_cnt, and for each file, in MPOOLFILE->lsn_count. */ - can_write = lsn_cnt = 0; - for (lsn_cnt = 0, bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); + ar_cnt = 0; + for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) if (F_ISSET(bhp, BH_DIRTY) || bhp->ref != 0) { F_SET(bhp, BH_WRITE); - if (bhp->ref == 0) - can_write = 1; + ++mp->lsn_cnt; - mfp = ADDR(dbmp, bhp->mf_offset); + mfp = R_ADDR(dbmp, bhp->mf_offset); ++mfp->lsn_cnt; - ++lsn_cnt; + /* + * If the buffer isn't in use, we should be able to + * write it immediately, so save a reference to it. + */ + if (bhp->ref == 0) { + if (ar_cnt == nalloc) { + nalloc *= 2; + if ((bharray = + (BH **)__db_realloc(bharray, + nalloc * sizeof(BH *))) == NULL) { + ret = ENOMEM; + goto err; + } + } + bharray[ar_cnt++] = bhp; + } } else F_CLR(bhp, BH_WRITE); - mp->lsn_cnt = lsn_cnt; - - /* If there no buffers we can write, we're done. */ - if (!can_write) { - UNLOCKREGION(dbmp); - return (mp->lsn_cnt ? DB_INCOMPLETE : 0); + /* If there no buffers we can write immediately, we're done. */ + if (ar_cnt == 0) { + ret = mp->lsn_cnt ? DB_INCOMPLETE : 0; + goto done; } - /* - * Write any buffers that we can. Restart the walk after each write, - * __memp_pgwrite() discards and reacquires the region lock during I/O. - */ -retry: for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) { - /* Ignore pinned or locked buffers. */ - if (!F_ISSET(bhp, BH_WRITE) || - bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED)) - continue; + /* Lock down the buffers and their contents. */ + for (cnt = 0; cnt < ar_cnt; ++cnt) + ++bharray[cnt]->ref; - mfp = ADDR(dbmp, bhp->mf_offset); - if ((ret = - __memp_bhwrite(dbmp, mfp, bhp, &restart, &wrote)) != 0) - goto err; - if (wrote) { - if (restart) - goto retry; + UNLOCKREGION(dbmp); + + /* Sort the buffers we're going to write. */ + qsort(bharray, ar_cnt, sizeof(BH *), __bhcmp); + + LOCKREGION(dbmp); + + /* Walk the array, writing buffers. */ + for (next = 0; next < ar_cnt; ++next) { + /* + * It's possible for a thread to have gotten the buffer since + * we listed it for writing. If the reference count is still + * 1, we're the only ones using the buffer, go ahead and write. + * If it's >1, then skip the buffer and assume that it will be + * written when it's returned to the cache. + */ + if (bharray[next]->ref > 1) { + --bharray[next]->ref; continue; } - __db_err(dbenv, "%s: unable to flush page: %lu", - ADDR(dbmp, mfp->path_off), (u_long)bhp->pgno); - ret = EPERM; - goto err; + + /* Write the buffer. */ + mfp = R_ADDR(dbmp, bharray[next]->mf_offset); + ret = + __memp_bhwrite(dbmp, mfp, bharray[next], ¬used, &wrote); + + /* Release the buffer. */ + --bharray[next]->ref; + + /* If there's an error, release the rest of the buffers. */ + if (ret != 0 || !wrote) { + while (++next < ar_cnt) + --bharray[next]->ref; + + if (ret != 0) + goto err; + + /* + * Any process syncing the shared memory buffer pool + * had better be able to write to any underlying file. + * Be understanding, but firm, on this point. + */ + if (!wrote) { + __db_err(dbenv, "%s: unable to flush page: %lu", + R_ADDR(dbmp, mfp->path_off), + (u_long)bharray[next]->pgno); + ret = EPERM; + goto err; + } + } } ret = mp->lsn_cnt ? DB_INCOMPLETE : 0; -err: UNLOCKREGION(dbmp); +done: + if (0) { +err: /* + * On error, clear: + * MPOOL->lsn_cnt (the total sync count) + * MPOOLFILE->lsn_cnt (the per-file sync count) + * BH_WRITE flag (the scheduled for writing flag) + */ + mp->lsn_cnt = 0; + for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); + mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) + mfp->lsn_cnt = 0; + for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) + F_CLR(bhp, BH_WRITE); + } + UNLOCKREGION(dbmp); + __db_free(bharray); return (ret); } @@ -162,10 +239,10 @@ int memp_fsync(dbmfp) DB_MPOOLFILE *dbmfp; { - BH *bhp; + BH *bhp, **bharray; DB_MPOOL *dbmp; size_t mf_offset; - int pincnt, restart, ret, wrote; + int ar_cnt, cnt, nalloc, next, pincnt, notused, ret, wrote; /* * If this handle doesn't have a file descriptor that's open for @@ -175,35 +252,205 @@ memp_fsync(dbmfp) if (F_ISSET(dbmfp, MP_READONLY | MP_PATH_TEMP)) return (0); - dbmp = dbmfp->dbmp; ret = 0; + dbmp = dbmfp->dbmp; + mf_offset = R_OFFSET(dbmp, dbmfp->mfp); - mf_offset = OFFSET(dbmp, dbmfp->mfp); + /* + * We try and write the buffers in page order so that the underlying + * filesystem doesn't have to seek and can write contiguous blocks, + * plus, we don't want to hold the region lock while we write the + * buffers. Get memory to hold the buffer pointers. Get a good-size + * block, too, because we realloc while holding the region lock if we + * run out. + */ + nalloc = 1024; + if ((bharray = + (BH **)__db_malloc((size_t)nalloc * sizeof(BH *))) == NULL) + return (ENOMEM); LOCKREGION(dbmp); /* - * Walk the list of buffer headers for the MPOOLFILE, and write out any - * dirty buffers that we can. + * Walk the LRU list of buffer headers, and get a list of buffers to + * write for this MPOOLFILE. */ -retry: pincnt = 0; + ar_cnt = pincnt = 0; for (bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh); - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) - if (F_ISSET(bhp, BH_DIRTY) && bhp->mf_offset == mf_offset) { - if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED)) { - ++pincnt; - continue; - } - if ((ret = - __memp_pgwrite(dbmfp, bhp, &restart, &wrote)) != 0) + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) { + if (!F_ISSET(bhp, BH_DIRTY) || bhp->mf_offset != mf_offset) + continue; + if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED)) { + ++pincnt; + continue; + } + + if (ar_cnt == nalloc) { + nalloc *= 2; + if ((bharray = (BH **)__db_realloc(bharray, + nalloc * sizeof(BH *))) == NULL) { + ret = ENOMEM; goto err; - if (!wrote) - ++pincnt; - if (restart) - goto retry; + } + } + + bharray[ar_cnt++] = bhp; + } + + /* Lock down the buffers and their contents. */ + for (cnt = 0; cnt < ar_cnt; ++cnt) + ++bharray[cnt]->ref; + + UNLOCKREGION(dbmp); + + /* Sort the buffers we're going to write. */ + qsort(bharray, ar_cnt, sizeof(BH *), __bhcmp); + + LOCKREGION(dbmp); + + /* Walk the array, writing buffers. */ + for (next = 0; next < ar_cnt; ++next) { + /* + * It's possible for a thread to have gotten the buffer since + * we listed it for writing. If the reference count is still + * 1, we're the only ones using the buffer, go ahead and write. + * If it's >1, then skip the buffer and assume that it will be + * written when it's returned to the cache. + */ + if (bharray[next]->ref > 1) { + ++pincnt; + + --bharray[next]->ref; + continue; } + /* Write the buffer. */ + ret = __memp_pgwrite(dbmfp, bharray[next], ¬used, &wrote); + + /* Release the buffer. */ + --bharray[next]->ref; + + /* If there's an error, release the rest of the buffers. */ + if (ret != 0) { + while (++next < ar_cnt) + --bharray[next]->ref; + goto err; + } + if (!wrote) + ++pincnt; + } + +err: UNLOCKREGION(dbmp); + + __db_free(bharray); + + /* + * Sync the underlying file as the last thing we do, so that the OS + * has maximal opportunity to flush buffers before we request it. + * + * XXX: + * Don't lock the region around the sync, fsync(2) has no atomicity + * issues. + */ + if (ret == 0) + return (pincnt == 0 ? __db_fsync(dbmfp->fd) : DB_INCOMPLETE); + return (ret); + +} + +/* + * memp_trickle -- + * Keep a specified percentage of the buffers clean. + */ +int +memp_trickle(dbmp, pct, nwrotep) + DB_MPOOL *dbmp; + int pct, *nwrotep; +{ + BH *bhp; + MPOOL *mp; + MPOOLFILE *mfp; + u_long total; + int notused, ret, wrote; + + mp = dbmp->mp; + if (nwrotep != NULL) + *nwrotep = 0; + + if (pct < 1 || pct > 100) + return (EINVAL); + + LOCKREGION(dbmp); + + /* + * If there are sufficient clean buffers, or no buffers or no dirty + * buffers, we're done. + * + * XXX + * Using st_page_clean and st_page_dirty is our only choice at the + * moment, but it's not as correct as we might like in the presence + * of pools with more than one buffer size, as a free 512-byte buffer + * isn't the same as a free 8K buffer. + */ +loop: total = mp->stat.st_page_clean + mp->stat.st_page_dirty; + if (total == 0 || mp->stat.st_page_dirty == 0 || + (mp->stat.st_page_clean * 100) / total >= (u_long)pct) { + UNLOCKREGION(dbmp); + return (0); + } + + /* Loop until we write a buffer. */ + for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) { + if (bhp->ref != 0 || + !F_ISSET(bhp, BH_DIRTY) || F_ISSET(bhp, BH_LOCKED)) + continue; + + mfp = R_ADDR(dbmp, bhp->mf_offset); + if ((ret = + __memp_bhwrite(dbmp, mfp, bhp, ¬used, &wrote)) != 0) + goto err; + + /* + * Any process syncing the shared memory buffer pool + * had better be able to write to any underlying file. + * Be understanding, but firm, on this point. + */ + if (!wrote) { + __db_err(dbmp->dbenv, "%s: unable to flush page: %lu", + R_ADDR(dbmp, mfp->path_off), (u_long)bhp->pgno); + ret = EPERM; + goto err; + } + + ++mp->stat.st_page_trickle; + if (nwrotep != NULL) + ++*nwrotep; + goto loop; + } + + /* No more buffers to write. */ + return (0); + err: UNLOCKREGION(dbmp); + return (ret); +} + +static int +__bhcmp(p1, p2) + const void *p1, *p2; +{ + BH *bhp1, *bhp2; + + bhp1 = *(BH **)p1; + bhp2 = *(BH **)p2; + + /* Sort by file (shared memory pool offset). */ + if (bhp1->mf_offset < bhp2->mf_offset) + return (-1); + if (bhp1->mf_offset > bhp2->mf_offset) + return (1); - return (ret == 0 ? (pincnt ? DB_INCOMPLETE : 0) : ret); + /* Sort by page in file. */ + return (bhp1->pgno < bhp2->pgno ? -1 : 1); } diff --git a/db2/mutex/mutex.c b/db2/mutex/mutex.c index 5315b2d3fe..7c8ea6ebd1 100644 --- a/db2/mutex/mutex.c +++ b/db2/mutex/mutex.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)mutex.c 10.25 (Sleepycat) 9/23/97"; +static const char sccsid[] = "@(#)mutex.c 10.28 (Sleepycat) 10/31/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -67,6 +67,10 @@ static const char sccsid[] = "@(#)mutex.c 10.25 (Sleepycat) 9/23/97"; #define TSL_UNSET(x) _lock_clear(x) #endif +#ifdef HAVE_ASSEM_SCO_CC +#include "sco.cc" +#endif + #ifdef HAVE_ASSEM_SPARC_GCC #include "sparc.gcc" #endif @@ -138,13 +142,12 @@ __db_mutex_init(mp, off) * __db_mutex_lock * Lock on a mutex, logically blocking if necessary. * - * PUBLIC: int __db_mutex_lock __P((db_mutex_t *, int, int (*)(void))); + * PUBLIC: int __db_mutex_lock __P((db_mutex_t *, int)); */ int -__db_mutex_lock(mp, fd, yield) +__db_mutex_lock(mp, fd) db_mutex_t *mp; int fd; - int (*yield) __P((void)); { u_long usecs; @@ -166,17 +169,15 @@ __db_mutex_lock(mp, fd, yield) } mp->pid = getpid(); #endif -#ifdef MUTEX_STATISTICS if (usecs == MS(10)) ++mp->mutex_set_nowait; else ++mp->mutex_set_wait; -#endif return (0); } /* Yield the processor; wait 10ms initially, up to 1 second. */ - if (yield == NULL || yield() != 0) { + if (__db_yield == NULL || __db_yield() != 0) { (void)__db_sleep(0, usecs); if ((usecs <<= 1) > SECOND) usecs = SECOND; @@ -200,7 +201,7 @@ __db_mutex_lock(mp, fd, yield) * up to 1 second. */ for (usecs = MS(10); mp->pid != 0;) - if (yield == NULL || yield() != 0) { + if (__db_yield == NULL || __db_yield() != 0) { (void)__db_sleep(0, usecs); if ((usecs <<= 1) > SECOND) usecs = SECOND; @@ -234,10 +235,6 @@ __db_mutex_lock(mp, fd, yield) if (locked) break; } - -#ifdef MUTEX_STATISTICS - ++mp->mutex_set_wait; -#endif return (0); #endif /* !HAVE_SPINLOCKS */ } diff --git a/db2/os/db_os_abs.c b/db2/os/db_os_abs.c deleted file mode 100644 index 8795205839..0000000000 --- a/db2/os/db_os_abs.c +++ /dev/null @@ -1,82 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1997 - * Sleepycat Software. All rights reserved. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)db_os_abs.c 10.5 (Sleepycat) 7/5/97"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <string.h> -#endif - -#include "db_int.h" -#include "os_ext.h" - -/* - * __db_abspath -- - * Return if a path is an absolute path. - * - * PUBLIC: int __db_abspath __P((const char *)); - */ -int -__db_abspath(path) - const char *path; -{ -#ifdef _WIN32 - /* - * !!! - * Check for drive specifications, e.g., "C:". In addition, the path - * separator used by the win32 DB (PATH_SEPARATOR) is \; look for both - * / and \ since these are user-input paths. - */ - if (isalpha(path[0]) && path[1] == ':') - path += 2; - return (path[0] == '/' || path[0] == '\\'); -#else -#ifdef macintosh - /* - * !!! - * Absolute pathnames always start with a volume name, which must be - * followed by a colon, thus they are of the form: - * volume: or volume:dir1:dir2:file - * - * Relative pathnames are either a single name without colons or a - * path starting with a colon, thus of the form: - * file or :file or :dir1:dir2:file - */ - return (strchr(path, ':') != NULL && path[0] != ':'); -#else - return (path[0] == '/'); -#endif -#endif -} - -/* - * __db_rpath -- - * Return the last path separator in the path or NULL if none found. - * - * PUBLIC: char *__db_rpath __P((const char *)); - */ -char * -__db_rpath(path) - const char *path; -{ - const char *s, *last; - - last = NULL; - if (PATH_SEPARATOR[1] != '\0') { - for (s = path; s[0] != '\0'; ++s) - if (strchr(PATH_SEPARATOR, s[0]) != NULL) - last = s; - } else - for (s = path; s[0] != '\0'; ++s) - if (s[0] == PATH_SEPARATOR[0]) - last = s; - return ((char *)last); -} diff --git a/db2/os/db_os_dir.c b/db2/os/db_os_dir.c deleted file mode 100644 index 1206e3faa7..0000000000 --- a/db2/os/db_os_dir.c +++ /dev/null @@ -1,138 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1997 - * Sleepycat Software. All rights reserved. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)db_os_dir.c 10.10 (Sleepycat) 9/17/97"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#if HAVE_DIRENT_H -# include <dirent.h> -# define NAMLEN(dirent) strlen((dirent)->d_name) -#else -# define dirent direct -# define NAMLEN(dirent) (dirent)->d_namlen -# if HAVE_SYS_NDIR_H -# include <sys/ndir.h> -# endif -# if HAVE_SYS_DIR_H -# include <sys/dir.h> -# endif -# if HAVE_NDIR_H -# include <ndir.h> -# endif -#endif - -#include <errno.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#endif - -#include "db_int.h" -#include "os_ext.h" -#include "common_ext.h" - -/* - * __db_dir -- - * Return a list of the files in a directory. - * - * PUBLIC: int __db_dir __P((DB_ENV *, const char *, char ***, int *)); - */ -int -__db_dir(dbenv, dir, namesp, cntp) - DB_ENV *dbenv; - const char *dir; - char ***namesp; - int *cntp; -{ - int arraysz, cnt; - char **names; -#ifdef _WIN32 - struct _finddata_t fdata; - long dirhandle; - int finished; - char filespec[MAX_PATH]; - - (void)snprintf(filespec, sizeof(filespec), "%s/*", dir); - if ((dirhandle = _findfirst(filespec, &fdata)) == -1) { - __db_err(dbenv, "%s: %s", filespec, strerror(errno)); - return (errno); - } - - names = NULL; - finished = 0; - for (arraysz = cnt = 0; finished != 1; ++cnt) { - if (cnt >= arraysz) { - arraysz += 100; - names = (char **)(names == NULL ? - malloc(arraysz * sizeof(names[0])) : - realloc(names, arraysz * sizeof(names[0]))); - if (names == NULL) - goto nomem; - } - if ((names[cnt] = (char *)strdup(fdata.name)) == NULL) - goto nomem; - if (_findnext(dirhandle,&fdata) != 0) - finished = 1; - } - _findclose(dirhandle); -#else /* !_WIN32 */ - struct dirent *dp; - DIR *dirp; - - if ((dirp = opendir(dir)) == NULL) { - __db_err(dbenv, "%s: %s", dir, strerror(errno)); - return (errno); - } - names = NULL; - for (arraysz = cnt = 0; (dp = readdir(dirp)) != NULL; ++cnt) { - if (cnt >= arraysz) { - arraysz += 100; - names = (char **)(names == NULL ? - malloc(arraysz * sizeof(names[0])) : - realloc(names, arraysz * sizeof(names[0]))); - if (names == NULL) - goto nomem; - } - if ((names[cnt] = (char *)strdup(dp->d_name)) == NULL) - goto nomem; - } - (void)closedir(dirp); -#endif /* !_WIN32 */ - - *namesp = names; - *cntp = cnt; - return (0); - -nomem: if (names != NULL) - __db_dirf(dbenv, names, cnt); - __db_err(dbenv, "%s", strerror(ENOMEM)); - return (ENOMEM); -} - -/* - * __db_dirf -- - * Free the list of files. - * - * PUBLIC: void __db_dirf __P((DB_ENV *, char **, int)); - */ -void -__db_dirf(dbenv, names, cnt) - DB_ENV *dbenv; - char **names; - int cnt; -{ - dbenv = dbenv; /* XXX: Shut the compiler up. */ - while (cnt > 0) - free(names[--cnt]); - free (names); -} diff --git a/db2/os/db_os_lseek.c b/db2/os/db_os_lseek.c deleted file mode 100644 index cecf0e156b..0000000000 --- a/db2/os/db_os_lseek.c +++ /dev/null @@ -1,60 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1997 - * Sleepycat Software. All rights reserved. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)db_os_lseek.c 10.3 (Sleepycat) 6/28/97"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <errno.h> -#include <unistd.h> -#endif - -#include "db_int.h" -#include "os_ext.h" - -/* - * __db_lseek -- - * Seek to a page/byte offset in the file. - * - * PUBLIC: int __db_lseek __P((int, size_t, db_pgno_t, u_long, int)); - */ -int -__db_lseek(fd, pgsize, pageno, relative, whence) - int fd; - size_t pgsize; - db_pgno_t pageno; - u_long relative; - int whence; -{ - /* 64-bit offsets are done differently by different vendors. */ -#undef __LSEEK_SET -#ifdef HAVE_LLSEEK -#define __LSEEK_SET - offset_t offset; /* Solaris. */ - - offset = pgsize * pageno + relative; - return (llseek(fd, offset, whence) == -1 ? errno : 0); -#endif -#ifdef HAVE_LSEEKI -#define __LSEEK_SET - __int64 offset; /* WNT */ - - offset = pgsize * pageno + relative; - return (_lseeki64(fd, offset, whence) == -1 ? errno : 0); -#endif -#ifndef __LSEEK_SET - off_t offset; /* Default. */ - - offset = pgsize * pageno + relative; - return (lseek(fd, offset, whence) == -1 ? errno : 0); -#endif -} diff --git a/db2/os/db_os_mmap.c b/db2/os/db_os_mmap.c deleted file mode 100644 index 0cd8fad0b0..0000000000 --- a/db2/os/db_os_mmap.c +++ /dev/null @@ -1,106 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 1997 - * Sleepycat Software. All rights reserved. - */ - -#include "config.h" - -#ifndef lint -static const char sccsid[] = "@(#)db_os_mmap.c 10.4 (Sleepycat) 6/28/97"; -#endif /* not lint */ - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> -#include <sys/mman.h> - -#include <errno.h> -#endif - -#include "db_int.h" -#include "os_ext.h" - -/* - * __db_mmap -- - * Map in some shared memory backed by a file descriptor. - * - * PUBLIC: int __db_mmap __P((int, size_t, int, int, void *)); - */ -int -__db_mmap(fd, len, is_private, rdonly, addr) - int fd, is_private, rdonly; - size_t len; - void *addr; -{ -#ifdef _WIN32 - /* We have not implemented copy-on-write here */ - void * pMemory = 0; - HANDLE hFile = (HANDLE)_get_osfhandle(fd); - HANDLE hMemory = CreateFileMapping( - hFile, - 0, - (rdonly ? PAGE_READONLY : PAGE_READWRITE), - 0, - len, /* This code fails if the library is ever compiled on a 64-bit machine */ - 0 - ); - if (NULL == hMemory) - { - return errno; - } - pMemory = MapViewOfFile( - hMemory, - (rdonly ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS), - 0, - 0, - len - ); - CloseHandle(hMemory); - *(void **)addr = pMemory; - return 0; - -#else /* !_WIN32 */ - - void *p; - int flags, prot; - - flags = is_private ? MAP_PRIVATE : MAP_SHARED; -#ifdef MAP_HASSEMAPHORE - flags += MAP_HASSEMAPHORE; -#endif - prot = PROT_READ | (rdonly ? 0 : PROT_WRITE); - -#ifndef MAP_FAILED /* XXX: Mmap(2) failure return. */ -#define MAP_FAILED -1 -#endif - if ((p = - mmap(NULL, len, prot, flags, fd, (off_t)0)) == (void *)MAP_FAILED) - return (errno); - - *(void **)addr = p; - return (0); -#endif /* _WIN32 */ -} - -/* - * __db_unmap -- - * Release the specified shared memory. - * - * PUBLIC: int __db_munmap __P((void *, size_t)); - */ -int -__db_munmap(addr, len) - void *addr; - size_t len; -{ - /* - * !!! - * The argument len is always the same length as was mapped. - */ -#ifdef _WIN32 - return (!UnmapViewOfFile(addr) ? errno : 0); -#else - return (munmap(addr, len) ? errno : 0); -#endif -} diff --git a/db2/os/os_abs.c b/db2/os/os_abs.c new file mode 100644 index 0000000000..872e46d058 --- /dev/null +++ b/db2/os/os_abs.c @@ -0,0 +1,31 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)os_abs.c 10.7 (Sleepycat) 10/24/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> +#endif + +#include "db_int.h" + +/* + * __db_abspath -- + * Return if a path is an absolute path. + * + * PUBLIC: int __db_abspath __P((const char *)); + */ +int +__db_abspath(path) + const char *path; +{ + return (path[0] == '/'); +} diff --git a/db2/os/os_dir.c b/db2/os/os_dir.c new file mode 100644 index 0000000000..10fb8b6739 --- /dev/null +++ b/db2/os/os_dir.c @@ -0,0 +1,100 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)os_dir.c 10.13 (Sleepycat) 10/28/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#if HAVE_DIRENT_H +# include <dirent.h> +# define NAMLEN(dirent) strlen((dirent)->d_name) +#else +# define dirent direct +# define NAMLEN(dirent) (dirent)->d_namlen +# if HAVE_SYS_NDIR_H +# include <sys/ndir.h> +# endif +# if HAVE_SYS_DIR_H +# include <sys/dir.h> +# endif +# if HAVE_NDIR_H +# include <ndir.h> +# endif +#endif + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#endif + +#include "db_int.h" +#include "common_ext.h" + +/* + * __os_dirlist -- + * Return a list of the files in a directory. + * + * PUBLIC: int __os_dirlist __P((const char *, char ***, int *)); + */ +int +__os_dirlist(dir, namesp, cntp) + const char *dir; + char ***namesp; + int *cntp; +{ + struct dirent *dp; + DIR *dirp; + int arraysz, cnt; + char **names; + + if ((dirp = opendir(dir)) == NULL) + return (errno); + names = NULL; + for (arraysz = cnt = 0; (dp = readdir(dirp)) != NULL; ++cnt) { + if (cnt >= arraysz) { + arraysz += 100; + names = (char **)(names == NULL ? + __db_malloc(arraysz * sizeof(names[0])) : + __db_realloc(names, arraysz * sizeof(names[0]))); + if (names == NULL) + goto nomem; + } + if ((names[cnt] = (char *)__db_strdup(dp->d_name)) == NULL) + goto nomem; + } + (void)closedir(dirp); + + *namesp = names; + *cntp = cnt; + return (0); + +nomem: if (names != NULL) + __os_dirfree(names, cnt); + return (ENOMEM); +} + +/* + * __os_dirfree -- + * Free the list of files. + * + * PUBLIC: void __os_dirfree __P((char **, int)); + */ +void +__os_dirfree(names, cnt) + char **names; + int cnt; +{ + while (cnt > 0) + __db_free(names[--cnt]); + __db_free(names); +} diff --git a/db2/os/db_os_fid.c b/db2/os/os_fid.c index 960d580bad..6820b88786 100644 --- a/db2/os/db_os_fid.c +++ b/db2/os/os_fid.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_os_fid.c 10.8 (Sleepycat) 8/27/97"; +static const char sccsid[] = "@(#)os_fid.c 10.9 (Sleepycat) 10/24/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -22,8 +22,6 @@ static const char sccsid[] = "@(#)db_os_fid.c 10.8 (Sleepycat) 8/27/97"; #endif #include "db_int.h" -#include "db_page.h" -#include "os_ext.h" #include "common_ext.h" /* @@ -39,58 +37,11 @@ __db_fileid(dbenv, fname, timestamp, fidp) int timestamp; u_int8_t *fidp; { + struct stat sb; size_t i; time_t now; u_int8_t *p; -#ifdef _WIN32 - /* - * The documentation for GetFileInformationByHandle() states that the - * inode-type numbers are not constant between processes. Actually, - * they are, they're the NTFS MFT indexes. So, this works on NTFS, - * but perhaps not on other platforms, and perhaps not over a network. - * Can't think of a better solution right now. - */ - int fd = 0; - HANDLE fh = 0; - BY_HANDLE_FILE_INFORMATION fi; - BOOL retval = FALSE; - - /* Clear the buffer. */ - memset(fidp, 0, DB_FILE_ID_LEN); - - /* first we open the file, because we're not given a handle to it */ - fd = open(fname,_O_RDONLY,_S_IREAD); - if (-1 == fd) { - /* If we can't open it, we're in trouble */ - return (errno); - } - - /* File open, get its info */ - fh = (HANDLE)_get_osfhandle(fd); - if ((HANDLE)(-1) != fh) { - retval = GetFileInformationByHandle(fh,&fi); - } - close(fd); - - /* - * We want the three 32-bit words which tell us the volume ID and - * the file ID. We make a crude attempt to copy the bytes over to - * the callers buffer. - * - * DBDB: really we should ensure that the bytes get packed the same - * way on all compilers, platforms etc. - */ - if ( ((HANDLE)(-1) != fh) && (TRUE == retval) ) { - memcpy(fidp, &fi.nFileIndexLow, sizeof(u_int32_t)); - fidp += sizeof(u_int32_t); - memcpy(fidp, &fi.nFileIndexHigh, sizeof(u_int32_t)); - fidp += sizeof(u_int32_t); - memcpy(fidp, &fi.dwVolumeSerialNumber, sizeof(u_int32_t)); - } -#else - struct stat sb; - /* Clear the buffer. */ memset(fidp, 0, DB_FILE_ID_LEN); @@ -115,7 +66,7 @@ __db_fileid(dbenv, fname, timestamp, fidp) for (p = (u_int8_t *)&sb.st_dev + sizeof(sb.st_dev), i = 0; i < sizeof(sb.st_dev); ++i) *fidp++ = *--p; -#endif + if (timestamp) { (void)time(&now); for (p = (u_int8_t *)&now + diff --git a/db2/os/os_fsync.c b/db2/os/os_fsync.c new file mode 100644 index 0000000000..7b001ceeb0 --- /dev/null +++ b/db2/os/os_fsync.c @@ -0,0 +1,34 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)os_fsync.c 10.3 (Sleepycat) 10/25/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#include <unistd.h> +#endif + +#include "db_int.h" + +/* + * __db_fsync -- + * Flush a file descriptor. + * + * PUBLIC: int __db_fsync __P((int)); + */ +int +__db_fsync(fd) + int fd; +{ + return (__os_fsync(fd) ? errno : 0); +} diff --git a/db2/os/os_func.c b/db2/os/os_func.c new file mode 100644 index 0000000000..afd40f4624 --- /dev/null +++ b/db2/os/os_func.c @@ -0,0 +1,153 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)os_func.c 10.4 (Sleepycat) 10/28/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#endif + +#include "db_int.h" + +/* + * XXX + * We provide our own extern declarations so that we don't collide with + * systems that get them wrong, e.g., SunOS. + */ +#ifdef _WIN32 +#define fsync _commit +#define imported __declspec(dllimport) +#else +#define imported +#endif + +imported extern void *calloc __P((size_t, size_t)); +imported extern int close __P((int)); +imported extern void free __P((void *)); +imported extern int fsync __P((int)); +imported extern void *malloc __P((size_t)); +imported extern int open __P((const char *, int, ...)); +imported extern ssize_t read __P((int, void *, size_t)); +imported extern char *strdup __P((const char *)); +imported extern void *realloc __P((void *, size_t)); +imported extern int unlink __P((const char *)); +imported extern ssize_t write __P((int, const void *, size_t)); + +/* + * __db_jump -- + * This list of interfaces that applications can replace. In some + * cases, the user is permitted to replace the standard ANSI C or + * POSIX 1003.1 call, e.g., calloc or read. In others, we provide + * a local interface to the functionality, e.g., __os_map. + */ +struct __db_jumptab __db_jump = { + calloc, /* DB_FUNC_CALLOC */ + close, /* DB_FUNC_CLOSE */ + __os_dirfree, /* DB_FUNC_DIRFREE */ + __os_dirlist, /* DB_FUNC_DIRLIST */ + __os_exists, /* DB_FUNC_EXISTS */ + free, /* DB_FUNC_FREE */ + fsync, /* DB_FUNC_FSYNC */ + __os_ioinfo, /* DB_FUNC_IOINFO */ + malloc, /* DB_FUNC_MALLOC */ + __os_map, /* DB_FUNC_MAP */ + open, /* DB_FUNC_OPEN */ + read, /* DB_FUNC_READ */ + realloc, /* DB_FUNC_REALLOC */ + __os_seek, /* DB_FUNC_SEEK */ + __os_sleep, /* DB_FUNC_SLEEP */ + strdup, /* DB_FUNC_STRDUP */ + unlink, /* DB_FUNC_UNLINK */ + __os_unmap, /* DB_FUNC_UNMAP */ + write, /* DB_FUNC_WRITE */ + NULL /* DB_FUNC_YIELD */ +}; + +/* + * db_jump_set -- + * Replace an interface. + */ +int +db_jump_set(func, which) + void *func; + int which; +{ + switch (which) { + case DB_FUNC_CALLOC: + __db_calloc = (void *(*) __P((size_t, size_t)))func; + break; + case DB_FUNC_CLOSE: + __os_close = (int (*) __P((int)))func; + break; + case DB_FUNC_DIRFREE: + __db_dirfree = (void (*) __P((char **, int)))func; + break; + case DB_FUNC_DIRLIST: + __db_dirlist = + (int (*) __P((const char *, char ***, int *)))func; + break; + case DB_FUNC_EXISTS: + __db_exists = (int (*) __P((const char *, int *)))func; + break; + case DB_FUNC_FREE: + __db_free = (void (*) __P((void *)))func; + break; + case DB_FUNC_FSYNC: + __os_fsync = (int (*) __P((int)))func; + break; + case DB_FUNC_IOINFO: + __db_ioinfo = + (int (*) __P((const char *, int, off_t *, off_t *)))func; + break; + case DB_FUNC_MALLOC: + __db_malloc = (void *(*) __P((size_t)))func; + break; + case DB_FUNC_MAP: + __db_map = (int (*) __P((int, size_t, int, int, void **)))func; + break; + case DB_FUNC_OPEN: + __os_open = (int (*) __P((const char *, int, ...)))func; + break; + case DB_FUNC_READ: + __os_read = (ssize_t (*) __P((int, void *, size_t)))func; + break; + case DB_FUNC_REALLOC: + __db_realloc = (void *(*) __P((void *, size_t)))func; + break; + case DB_FUNC_SEEK: + __db_seek = + (int (*) __P((int, size_t, db_pgno_t, u_long, int)))func; + break; + case DB_FUNC_SLEEP: + __db_sleep = (int (*) __P((u_long, u_long)))func; + break; + case DB_FUNC_STRDUP: + __db_strdup = (char *(*) __P((const char *)))func; + break; + case DB_FUNC_UNLINK: + __os_unlink = (int (*) __P((const char *)))func; + break; + case DB_FUNC_UNMAP: + __db_unmap = (int (*) __P((void *, size_t)))func; + break; + case DB_FUNC_WRITE: + __os_write = (ssize_t (*) __P((int, const void *, size_t)))func; + break; + case DB_FUNC_YIELD: + __db_yield = (int (*) __P((void)))func; + break; + default: + return (EINVAL); + } + return (0); +} diff --git a/db2/os/os_map.c b/db2/os/os_map.c new file mode 100644 index 0000000000..b1553188dc --- /dev/null +++ b/db2/os/os_map.c @@ -0,0 +1,71 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)os_map.c 10.7 (Sleepycat) 10/25/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> +#include <sys/mman.h> + +#include <errno.h> +#endif + +#include "db_int.h" + +/* + * __os_map -- + * Map in some shared memory backed by a file descriptor. + * + * PUBLIC: int __os_map __P((int, size_t, int, int, void **)); + */ +int +__os_map(fd, len, is_private, is_rdonly, addr) + int fd, is_private, is_rdonly; + size_t len; + void **addr; +{ + void *p; + int flags, prot; + + flags = is_private ? MAP_PRIVATE : MAP_SHARED; +#ifdef MAP_HASSEMAPHORE + flags |= MAP_HASSEMAPHORE; +#endif + prot = PROT_READ | (is_rdonly ? 0 : PROT_WRITE); + +#ifndef MAP_FAILED /* XXX: Mmap(2) failure return. */ +#define MAP_FAILED -1 +#endif + if ((p = + mmap(NULL, len, prot, flags, fd, (off_t)0)) == (void *)MAP_FAILED) + return (errno); + + *addr = p; + return (0); +} + +/* + * __os_unmap -- + * Release the specified shared memory. + * + * PUBLIC: int __os_unmap __P((void *, size_t)); + */ +int +__os_unmap(addr, len) + void *addr; + size_t len; +{ + /* + * !!! + * The argument len is always the same length as was mapped. + */ + return (munmap(addr, len) ? errno : 0); +} diff --git a/db2/os/os_oflags.c b/db2/os/os_oflags.c new file mode 100644 index 0000000000..3656eef1c4 --- /dev/null +++ b/db2/os/os_oflags.c @@ -0,0 +1,48 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)os_oflags.c 10.2 (Sleepycat) 10/24/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <fcntl.h> +#endif + +#include "db_int.h" + +/* + * __db_oflags -- + * Convert open(2) flags to DB flags. + * + * PUBLIC: int __db_oflags __P((int)); + */ +int +__db_oflags(oflags) + int oflags; +{ + int dbflags; + + /* + * XXX + * Convert POSIX 1003.1 open(2) flags to DB flags. Not an exact + * science as most POSIX implementations don't have a flag value + * for O_RDONLY, it's simply the lack of a write flag. + */ + dbflags = 0; + if (oflags & O_CREAT) + dbflags |= DB_CREATE; + if (!(oflags & (O_RDWR | O_WRONLY)) || oflags & O_RDONLY) + dbflags |= DB_RDONLY; + if (oflags & O_TRUNC) + dbflags |= DB_TRUNCATE; + return (dbflags); +} diff --git a/db2/os/db_os_open.c b/db2/os/os_open.c index 1d67ef9508..05784e4810 100644 --- a/db2/os/db_os_open.c +++ b/db2/os/os_open.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_os_open.c 10.14 (Sleepycat) 7/5/97"; +static const char sccsid[] = "@(#)os_open.c 10.19 (Sleepycat) 10/28/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -20,44 +20,15 @@ static const char sccsid[] = "@(#)db_os_open.c 10.14 (Sleepycat) 7/5/97"; #endif #include "db_int.h" -#include "os_ext.h" /* - * __db_oflags -- - * Convert open(2) flags to DB flags. - * - * PUBLIC: int __db_oflags __P((int)); - */ -int -__db_oflags(oflags) - int oflags; -{ - int dbflags; - - /* - * XXX - * Convert POSIX 1003.1 open(2) flags to DB flags. Not an exact - * science as most POSIX implementations don't have a flag value - * for O_RDONLY, it's simply the lack of a write flag. - */ - dbflags = 0; - if (oflags & O_CREAT) - dbflags |= DB_CREATE; - if (!(oflags & (O_RDWR | O_WRONLY)) || oflags & O_RDONLY) - dbflags |= DB_RDONLY; - if (oflags & O_TRUNC) - dbflags |= DB_TRUNCATE; - return (dbflags); -} - -/* - * __db_fdopen -- + * __db_open -- * Open a file descriptor. * - * PUBLIC: int __db_fdopen __P((const char *, int, int, int, int *)); + * PUBLIC: int __db_open __P((const char *, int, int, int, int *)); */ int -__db_fdopen(name, arg_flags, ok_flags, mode, fdp) +__db_open(name, arg_flags, ok_flags, mode, fdp) const char *name; int arg_flags, ok_flags, mode, *fdp; { @@ -95,13 +66,13 @@ __db_fdopen(name, arg_flags, ok_flags, mode, fdp) flags |= O_TRUNC; /* Open the file. */ - if ((fd = open(name, flags, mode)) == -1) + if ((fd = __os_open(name, flags, mode)) == -1) return (errno); #ifndef _WIN32 /* Delete any temporary file; done for Win32 by _O_TEMPORARY. */ if (arg_flags & DB_TEMPORARY) - (void)unlink(name); + (void)__os_unlink(name); #endif #if !defined(_WIN32) && !defined(macintosh) @@ -112,7 +83,7 @@ __db_fdopen(name, arg_flags, ok_flags, mode, fdp) if (fcntl(fd, F_SETFD, 1) == -1) { int ret = errno; - (void)__db_close(fd); + (void)__os_close(fd); return (ret); } #endif @@ -121,19 +92,6 @@ __db_fdopen(name, arg_flags, ok_flags, mode, fdp) } /* - * __db_fsync -- - * Flush a file descriptor. - * - * PUBLIC: int __db_fsync __P((int)); - */ -int -__db_fsync(fd) - int fd; -{ - return (fsync(fd) ? errno : 0); -} - -/* * __db_close -- * Close a file descriptor. * @@ -143,5 +101,5 @@ int __db_close(fd) int fd; { - return (close(fd) ? errno : 0); + return (__os_close(fd) ? errno : 0); } diff --git a/db2/os/os_rpath.c b/db2/os/os_rpath.c new file mode 100644 index 0000000000..44fd4ec9f4 --- /dev/null +++ b/db2/os/os_rpath.c @@ -0,0 +1,42 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)os_rpath.c 10.2 (Sleepycat) 10/24/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <string.h> +#endif + +#include "db_int.h" + +/* + * __db_rpath -- + * Return the last path separator in the path or NULL if none found. + * + * PUBLIC: char *__db_rpath __P((const char *)); + */ +char * +__db_rpath(path) + const char *path; +{ + const char *s, *last; + + last = NULL; + if (PATH_SEPARATOR[1] != '\0') { + for (s = path; s[0] != '\0'; ++s) + if (strchr(PATH_SEPARATOR, s[0]) != NULL) + last = s; + } else + for (s = path; s[0] != '\0'; ++s) + if (s[0] == PATH_SEPARATOR[0]) + last = s; + return ((char *)last); +} diff --git a/db2/os/db_os_rw.c b/db2/os/os_rw.c index 5a6c2196fd..48f7fdc5b1 100644 --- a/db2/os/db_os_rw.c +++ b/db2/os/os_rw.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_os_rw.c 10.4 (Sleepycat) 6/28/97"; +static const char sccsid[] = "@(#)os_rw.c 10.6 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -19,7 +19,6 @@ static const char sccsid[] = "@(#)db_os_rw.c 10.4 (Sleepycat) 6/28/97"; #endif #include "db_int.h" -#include "os_ext.h" /* * __db_read -- @@ -40,7 +39,7 @@ __db_read(fd, addr, len, nrp) for (taddr = addr, offset = 0; offset < len; taddr += nr, offset += nr) { - if ((nr = read(fd, taddr, len - offset)) < 0) + if ((nr = __os_read(fd, taddr, len - offset)) < 0) return (errno); if (nr == 0) break; @@ -68,7 +67,7 @@ __db_write(fd, addr, len, nwp) for (taddr = addr, offset = 0; offset < len; taddr += nw, offset += nw) - if ((nw = write(fd, taddr, len - offset)) < 0) + if ((nw = __os_write(fd, taddr, len - offset)) < 0) return (errno); *nwp = len; return (0); diff --git a/db2/os/os_seek.c b/db2/os/os_seek.c new file mode 100644 index 0000000000..e27044b626 --- /dev/null +++ b/db2/os/os_seek.c @@ -0,0 +1,42 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)os_seek.c 10.6 (Sleepycat) 10/25/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#include <unistd.h> +#endif + +#include "db_int.h" + +/* + * __os_seek -- + * Seek to a page/byte offset in the file. + * + * PUBLIC: int __os_seek __P((int, size_t, db_pgno_t, u_long, int)); + */ +int +__os_seek(fd, pgsize, pageno, relative, whence) + int fd; + size_t pgsize; + db_pgno_t pageno; + u_long relative; + int whence; +{ + off_t offset; + + offset = pgsize * pageno + relative; + + return (lseek(fd, offset, whence) == -1 ? errno : 0); +} diff --git a/db2/os/db_os_sleep.c b/db2/os/os_sleep.c index 5591789f51..2d2cb71f6d 100644 --- a/db2/os/db_os_sleep.c +++ b/db2/os/os_sleep.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_os_sleep.c 10.6 (Sleepycat) 6/28/97"; +static const char sccsid[] = "@(#)os_sleep.c 10.8 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -28,21 +28,18 @@ static const char sccsid[] = "@(#)db_os_sleep.c 10.6 (Sleepycat) 6/28/97"; #endif #include "db_int.h" -#include "os_ext.h" /* - * __db_sleep -- + * __os_sleep -- * Yield the processor for a period of time. * - * PUBLIC: int __db_sleep __P((u_long, u_long)); + * PUBLIC: int __os_sleep __P((u_long, u_long)); */ int -__db_sleep(secs, usecs) +__os_sleep(secs, usecs) u_long secs, usecs; /* Seconds and microseconds. */ { -#ifndef _WIN32 struct timeval t; -#endif /* Don't require that the values be normalized. */ for (; usecs >= 1000000; ++secs, usecs -= 1000000); @@ -51,12 +48,7 @@ __db_sleep(secs, usecs) * It's important that we yield the processor here so that other * processes or threads are permitted to run. */ -#ifdef _WIN32 - Sleep(secs * 1000 + usecs / 1000); - return (0); -#else t.tv_sec = secs; t.tv_usec = usecs; return (select(0, NULL, NULL, NULL, &t) == -1 ? errno : 0); -#endif } diff --git a/db2/os/db_os_stat.c b/db2/os/os_stat.c index 7929b6b754..ee84ab0588 100644 --- a/db2/os/db_os_stat.c +++ b/db2/os/os_stat.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_os_stat.c 10.6 (Sleepycat) 7/2/97"; +static const char sccsid[] = "@(#)os_stat.c 10.8 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -20,17 +20,16 @@ static const char sccsid[] = "@(#)db_os_stat.c 10.6 (Sleepycat) 7/2/97"; #endif #include "db_int.h" -#include "os_ext.h" #include "common_ext.h" /* - * __db_exists -- + * __os_exists -- * Return if the file exists. * - * PUBLIC: int __db_exists __P((const char *, int *)); + * PUBLIC: int __os_exists __P((const char *, int *)); */ int -__db_exists(path, isdirp) +__os_exists(path, isdirp) const char *path; int *isdirp; { @@ -44,25 +43,22 @@ __db_exists(path, isdirp) } /* - * __db_stat -- + * __os_ioinfo -- * Return file size and I/O size; abstracted to make it easier * to replace. * - * PUBLIC: int __db_stat __P((DB_ENV *, const char *, int, off_t *, off_t *)); + * PUBLIC: int __os_ioinfo __P((const char *, int, off_t *, off_t *)); */ int -__db_stat(dbenv, path, fd, sizep, iop) - DB_ENV *dbenv; +__os_ioinfo(path, fd, sizep, iop) const char *path; int fd; off_t *sizep, *iop; { struct stat sb; - if (fstat(fd, &sb) == -1) { - __db_err(dbenv, "%s: fstat: %s", path, strerror(errno)); + if (fstat(fd, &sb) == -1) return (errno); - } /* Return the size of the file. */ if (sizep != NULL) diff --git a/db2/os/db_os_unlink.c b/db2/os/os_unlink.c index 872beba3cf..473ce77d39 100644 --- a/db2/os/db_os_unlink.c +++ b/db2/os/os_unlink.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_os_unlink.c 10.2 (Sleepycat) 6/28/97"; +static const char sccsid[] = "@(#)os_unlink.c 10.4 (Sleepycat) 10/28/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -19,7 +19,6 @@ static const char sccsid[] = "@(#)db_os_unlink.c 10.2 (Sleepycat) 6/28/97"; #endif #include "db_int.h" -#include "os_ext.h" /* * __db_unlink -- @@ -31,5 +30,5 @@ int __db_unlink(path) const char *path; { - return (unlink(path) == -1 ? errno : 0); + return (__os_unlink(path) == -1 ? errno : 0); } diff --git a/db2/progs/db_deadlock/db_deadlock.c b/db2/progs/db_deadlock/db_deadlock.c index ec2b53dee7..473e5b9cb2 100644 --- a/db2/progs/db_deadlock/db_deadlock.c +++ b/db2/progs/db_deadlock/db_deadlock.c @@ -11,7 +11,7 @@ static const char copyright[] = "@(#) Copyright (c) 1997\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_deadlock.c 10.15 (Sleepycat) 9/4/97"; +static const char sccsid[] = "@(#)db_deadlock.c 10.16 (Sleepycat) 10/14/97"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -53,13 +53,13 @@ main(argc, argv) DB_ENV *dbenv; u_int32_t atype; time_t now; - long seconds; + long usecs; int ch, flags, verbose; char *home, *logfile; atype = DB_LOCK_DEFAULT; home = logfile = NULL; - seconds = 0; + usecs = 0; flags = verbose = 0; while ((ch = getopt(argc, argv, "a:h:L:t:vw")) != EOF) switch (ch) { @@ -85,7 +85,8 @@ main(argc, argv) logfile = optarg; break; case 't': - get_long(optarg, 1, LONG_MAX, &seconds); + get_long(optarg, 1, LONG_MAX, &usecs); + usecs *= 1000000; break; case 'v': verbose = 1; @@ -103,16 +104,17 @@ main(argc, argv) if (argc != 0) usage(); - if (seconds == 0 && !LF_ISSET(DB_LOCK_CONFLICT)) { + if (usecs == 0 && !LF_ISSET(DB_LOCK_CONFLICT)) { warnx("at least one of -t and -w must be specified"); usage(); } /* - * We detect every second when we're running in DB_LOCK_CONFLICT mode. + * We detect every 100ms (100000 us) when we're running in + * DB_LOCK_CONFLICT mode. */ - if (seconds == 0) - seconds = 1; + if (usecs == 0) + usecs = 100000; /* Initialize the deadlock detector by opening the lock manager. */ dbenv = db_init(home, verbose); @@ -125,14 +127,14 @@ main(argc, argv) while (!interrupted) { if (dbenv->db_verbose != 0) { time(&now); - __db_err(dbenv, "Running at %s", ctime(&now)); + __db_err(dbenv, "Running at %.24s", ctime(&now)); } if ((errno = lock_detect(dbenv->lk_info, flags, atype)) != 0) break; - /* Make a pass every "seconds" seconds. */ - (void)__db_sleep(seconds, 0); + /* Make a pass every "usecs" usecs. */ + (void)__db_sleep(0, usecs); } if (logfile != NULL) diff --git a/db2/progs/db_dump/db_dump.c b/db2/progs/db_dump/db_dump.c index a0f60c69a6..c09719059b 100644 --- a/db2/progs/db_dump/db_dump.c +++ b/db2/progs/db_dump/db_dump.c @@ -31,6 +31,8 @@ static const char sccsid[] = "@(#)db_dump.c 10.16 (Sleepycat) 8/27/97"; #include "hash.h" #include "clib_ext.h" +#undef stat + void configure __P((char *)); DB_ENV *db_init __P((char *)); void dbt_dump __P((DBT *)); diff --git a/db2/progs/db_load/db_load.c b/db2/progs/db_load/db_load.c index a1ebfa8a11..6597f10e10 100644 --- a/db2/progs/db_load/db_load.c +++ b/db2/progs/db_load/db_load.c @@ -11,7 +11,7 @@ static const char copyright[] = "@(#) Copyright (c) 1997\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_load.c 10.13 (Sleepycat) 9/15/97"; +static const char sccsid[] = "@(#)db_load.c 10.14 (Sleepycat) 10/19/97"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -352,7 +352,7 @@ dbt_rprint(dbtp) escape = 1; continue; } - if (++len >= dbtp->ulen - 10) { + if (len >= dbtp->ulen - 10) { dbtp->ulen *= 2; if ((dbtp->data = (void *)realloc(dbtp->data, dbtp->ulen)) == NULL) { @@ -361,6 +361,7 @@ dbt_rprint(dbtp) } p = (u_int8_t *)dbtp->data + len; } + ++len; *p++ = c1; } dbtp->size = len; @@ -420,7 +421,7 @@ dbt_rdump(dbtp) } if ((c2 = getchar()) == EOF) err(1, "unexpected end of key/data pair"); - if (++len >= dbtp->ulen - 10) { + if (len >= dbtp->ulen - 10) { dbtp->ulen *= 2; if ((dbtp->data = (void *)realloc(dbtp->data, dbtp->ulen)) == NULL) { @@ -429,6 +430,7 @@ dbt_rdump(dbtp) } p = (u_int8_t *)dbtp->data + len; } + ++len; *p++ = digitize(c1) << 4 | digitize(c2); } dbtp->size = len; diff --git a/db2/progs/db_recover/db_recover.c b/db2/progs/db_recover/db_recover.c index 55b9b49a79..5a39d320f8 100644 --- a/db2/progs/db_recover/db_recover.c +++ b/db2/progs/db_recover/db_recover.c @@ -11,7 +11,7 @@ static const char copyright[] = "@(#) Copyright (c) 1997\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_recover.c 10.15 (Sleepycat) 9/21/97"; +static const char sccsid[] = "@(#)db_recover.c 10.16 (Sleepycat) 10/28/97"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -82,7 +82,7 @@ main(argc, argv) (u_long)dbenv->tx_info->region->last_ckp.offset); } - exit (db_appexit(dbenv)); + return (db_appexit(dbenv)); } DB_ENV * diff --git a/db2/progs/db_stat/db_stat.c b/db2/progs/db_stat/db_stat.c index 1a989f4df3..b1f1615fa9 100644 --- a/db2/progs/db_stat/db_stat.c +++ b/db2/progs/db_stat/db_stat.c @@ -11,7 +11,7 @@ static const char copyright[] = "@(#) Copyright (c) 1997\n\ Sleepycat Software Inc. All rights reserved.\n"; -static const char sccsid[] = "@(#)db_stat.c 8.20 (Sleepycat) 8/27/97"; +static const char sccsid[] = "@(#)db_stat.c 8.26 (Sleepycat) 11/2/97"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -29,18 +29,22 @@ static const char sccsid[] = "@(#)db_stat.c 8.20 (Sleepycat) 8/27/97"; #include "db_int.h" #include "clib_ext.h" +#undef stat + +#define MB 1048576 #define DIVIDER "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" -typedef enum { T_NOTSET, T_DB, T_MPOOL, T_TXN } test_t; +typedef enum { T_NOTSET, T_DB, T_LOG, T_MPOOL, T_TXN } test_t; -void bstat __P((DB *)); +void btree_stats __P((DB *)); DB_ENV *db_init __P((char *, test_t)); -void hstat __P((DB *)); +void hash_stats __P((DB *)); int main __P((int, char *[])); -void mstat __P((DB_ENV *)); +void log_stats __P((DB_ENV *)); +void mpool_stats __P((DB_ENV *)); void onint __P((int)); void prflags __P((u_int32_t, const FN *)); -void tstat __P((DB_ENV *)); +void txn_stats __P((DB_ENV *)); int txn_compare __P((const void *, const void *)); void usage __P((void)); @@ -63,7 +67,7 @@ main(argc, argv) ttype = T_NOTSET; db = home = NULL; - while ((ch = getopt(argc, argv, "d:h:mt")) != EOF) + while ((ch = getopt(argc, argv, "d:h:lmt")) != EOF) switch (ch) { case 'd': db = optarg; @@ -72,6 +76,9 @@ main(argc, argv) case 'h': home = optarg; break; + case 'l': + ttype = T_LOG; + break; case 'm': ttype = T_MPOOL; break; @@ -100,10 +107,10 @@ main(argc, argv) switch (dbp->type) { case DB_BTREE: case DB_RECNO: - bstat(dbp); + btree_stats(dbp); break; case DB_HASH: - hstat(dbp); + hash_stats(dbp); break; case DB_UNKNOWN: abort(); /* Impossible. */ @@ -111,11 +118,14 @@ main(argc, argv) } (void)dbp->close(dbp, 0); break; + case T_LOG: + log_stats(dbenv); + break; case T_MPOOL: - mstat(dbenv); + mpool_stats(dbenv); break; case T_TXN: - tstat(dbenv); + txn_stats(dbenv); break; case T_NOTSET: abort(); /* Impossible. */ @@ -133,11 +143,11 @@ main(argc, argv) } /* - * bstat -- + * btree_stats -- * Display btree/recno statistics. */ void -bstat(dbp) +btree_stats(dbp) DB *dbp; { static const FN fn[] = { @@ -156,6 +166,8 @@ bstat(dbp) (t == 0 ? 0 : \ (((double)((t * sp->bt_pagesize) - f) / (t * sp->bt_pagesize)) * 100)) + printf("%#lx\tBtree magic number.\n", (u_long)sp->bt_magic); + printf("%lu\tBtree version number.\n", (u_long)sp->bt_version); prflags(sp->bt_flags, fn); if (dbp->type == DB_BTREE) { #ifdef NOT_IMPLEMENTED @@ -213,22 +225,56 @@ printf("%lu\tNumber of bytes free in tree overflow pages (%.0f%% ff).\n", } /* - * hstat -- + * hash_stats -- * Display hash statistics. */ void -hstat(dbp) +hash_stats(dbp) DB *dbp; { return; } /* - * mstat -- + * log_stats -- + * Display log statistics. + */ +void +log_stats(dbenv) + DB_ENV *dbenv; +{ + DB_LOG_STAT *sp; + + if (log_stat(dbenv->lg_info, &sp, NULL)) + err(1, NULL); + + printf("%#lx\tLog magic number.\n", (u_long)sp->st_magic); + printf("%lu\tLog version number.\n", (u_long)sp->st_version); + printf("%#o\tLog file mode.\n", sp->st_mode); + if (sp->st_lg_max % MB == 0) + printf("%luMb\tLog file size.\n", (u_long)sp->st_lg_max / MB); + else if (sp->st_lg_max % 1024 == 0) + printf("%luKb\tLog file size.\n", (u_long)sp->st_lg_max / 1024); + else + printf("%lu\tLog file size.\n", (u_long)sp->st_lg_max); + printf("%luMb\tLog bytes written (+%lu bytes).\n", + (u_long)sp->st_w_mbytes, (u_long)sp->st_w_bytes); + printf("%luMb\tLog bytes written since last checkpoint (+%lu bytes).\n", + (u_long)sp->st_wc_mbytes, (u_long)sp->st_wc_bytes); + printf("%lu\tTotal log file writes.\n", (u_long)sp->st_wcount); + printf("%lu\tTotal log file flushes.\n", (u_long)sp->st_scount); + printf("%lu\tThe number of region locks granted without waiting.\n", + (u_long)sp->st_region_nowait); + printf("%lu\tThe number of region locks granted after waiting.\n", + (u_long)sp->st_region_wait); +} + +/* + * mpool_stats -- * Display mpool statistics. */ void -mstat(dbenv) +mpool_stats(dbenv) DB_ENV *dbenv; { DB_MPOOL_FSTAT **fsp; @@ -239,62 +285,75 @@ mstat(dbenv) printf("%lu\tCache size (%luK).\n", (u_long)gsp->st_cachesize, (u_long)gsp->st_cachesize / 1024); - printf("%lu\tRequested pages found in the cache", gsp->st_cache_hit); + printf("%lu\tRequested pages found in the cache", + (u_long)gsp->st_cache_hit); if (gsp->st_cache_hit + gsp->st_cache_miss != 0) printf(" (%.0f%%)", ((double)gsp->st_cache_hit / (gsp->st_cache_hit + gsp->st_cache_miss)) * 100); printf(".\n"); printf("%lu\tRequested pages mapped into the process' address space.\n", - gsp->st_map); + (u_long)gsp->st_map); printf("%lu\tRequested pages not found in the cache.\n", - gsp->st_cache_miss); - printf("%lu\tPages created in the cache.\n", gsp->st_page_create); - printf("%lu\tPages read into the cache.\n", gsp->st_page_in); + (u_long)gsp->st_cache_miss); + printf("%lu\tPages created in the cache.\n", + (u_long)gsp->st_page_create); + printf("%lu\tPages read into the cache.\n", (u_long)gsp->st_page_in); printf("%lu\tPages written from the cache to the backing file.\n", - gsp->st_page_out); - printf("%lu\tRead-only pages forced from the cache.\n", - gsp->st_ro_evict); - printf("%lu\tRead-write pages forced from the cache.\n", - gsp->st_rw_evict); + (u_long)gsp->st_page_out); + printf("%lu\tClean pages forced from the cache.\n", + (u_long)gsp->st_ro_evict); + printf("%lu\tDirty pages forced from the cache.\n", + (u_long)gsp->st_rw_evict); + printf("%lu\tDirty buffers written by trickle-sync thread.\n", + (u_long)gsp->st_page_trickle); + printf("%lu\tCurrent clean buffer count.\n", + (u_long)gsp->st_page_clean); + printf("%lu\tCurrent dirty buffer count.\n", + (u_long)gsp->st_page_dirty); printf("%lu\tNumber of hash buckets used for page location.\n", - gsp->st_hash_buckets); + (u_long)gsp->st_hash_buckets); printf("%lu\tTotal number of times hash chains searched for a page.\n", - gsp->st_hash_searches); + (u_long)gsp->st_hash_searches); printf("%lu\tThe longest hash chain searched for a page.\n", - gsp->st_hash_longest); + (u_long)gsp->st_hash_longest); printf( "%lu\tTotal number of hash buckets examined for page location.\n", - gsp->st_hash_examined); + (u_long)gsp->st_hash_examined); + printf("%lu\tThe number of region locks granted without waiting.\n", + (u_long)gsp->st_region_nowait); + printf("%lu\tThe number of region locks granted after waiting.\n", + (u_long)gsp->st_region_wait); for (; fsp != NULL && *fsp != NULL; ++fsp) { printf("%s\n", DIVIDER); printf("%s\n", (*fsp)->file_name); printf("%lu\tPage size.\n", (u_long)(*fsp)->st_pagesize); printf("%lu\tRequested pages found in the cache", - (*fsp)->st_cache_hit); + (u_long)(*fsp)->st_cache_hit); if ((*fsp)->st_cache_hit + (*fsp)->st_cache_miss != 0) printf(" (%.0f%%)", ((double)(*fsp)->st_cache_hit / ((*fsp)->st_cache_hit + (*fsp)->st_cache_miss)) * 100); printf(".\n"); printf("%lu\tRequested pages mapped into the process' address space.\n", - (*fsp)->st_map); + (u_long)(*fsp)->st_map); printf("%lu\tRequested pages not found in the cache.\n", - (*fsp)->st_cache_miss); + (u_long)(*fsp)->st_cache_miss); printf("%lu\tPages created in the cache.\n", - (*fsp)->st_page_create); - printf("%lu\tPages read into the cache.\n", (*fsp)->st_page_in); + (u_long)(*fsp)->st_page_create); + printf("%lu\tPages read into the cache.\n", + (u_long)(*fsp)->st_page_in); printf("%lu\tPages written from the cache to the backing file.\n", - (*fsp)->st_page_out); + (u_long)(*fsp)->st_page_out); } } /* - * tstat -- + * txn_stats -- * Display transaction statistics. */ void -tstat(dbenv) +txn_stats(dbenv) DB_ENV *dbenv; { DB_TXN_STAT *tstat; @@ -311,7 +370,7 @@ tstat(dbenv) p = tstat->st_pending_ckp.file == 0 ? "No pending checkpoint LSN." : "File/offset for last pending checkpoint LSN."; - printf("%lu/%lu\t%s.\n", + printf("%lu/%lu\t%s\n", (u_long)tstat->st_pending_ckp.file, (u_long)tstat->st_pending_ckp.offset, p); if (tstat->st_time_ckp == 0) @@ -391,27 +450,58 @@ db_init(home, ttype) DB_ENV *dbenv; int flags; + if ((dbenv = (DB_ENV *)malloc(sizeof(DB_ENV))) == NULL) { + errno = ENOMEM; + err(1, NULL); + } + + /* + * Try and use the shared regions when reporting statistics on the + * DB databases, so our information is as up-to-date as possible, + * even if the mpool cache hasn't been flushed. If that fails, we + * turn off the DB_INIT_MPOOL flag and try again. + */ flags = DB_USE_ENVIRON; switch (ttype) { + case T_DB: case T_MPOOL: flags |= DB_INIT_MPOOL; break; + case T_LOG: + flags |= DB_INIT_LOG; + break; case T_TXN: flags |= DB_INIT_TXN; break; - default: - break; + case T_NOTSET: + abort(); + /* NOTREACHED */ } - if ((dbenv = (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) { - errno = ENOMEM; - err(1, NULL); + /* + * If it works, we're done. Set the error output options so that + * future errors are correctly reported. + */ + memset(dbenv, 0, sizeof(*dbenv)); + if ((errno = db_appinit(home, NULL, dbenv, flags)) == 0) { + dbenv->db_errfile = stderr; + dbenv->db_errpfx = progname; + return (dbenv); } + + /* Turn off the DB_INIT_MPOOL flag if it's a database. */ + if (ttype == T_DB) + flags &= ~DB_INIT_MPOOL; + + /* Set the error output options -- this time we want a message. */ + memset(dbenv, 0, sizeof(*dbenv)); dbenv->db_errfile = stderr; dbenv->db_errpfx = progname; + /* Try again, and it's fatal if we fail. */ if ((errno = db_appinit(home, NULL, dbenv, flags)) != 0) err(1, "db_appinit"); + return (dbenv); } @@ -430,6 +520,6 @@ onint(signo) void usage() { - fprintf(stderr, "usage: db_stat [-mt] [-d file] [-h home]\n"); + fprintf(stderr, "usage: db_stat [-mlt] [-d file] [-h home]\n"); exit (1); } diff --git a/db2/txn/txn.c b/db2/txn/txn.c index 9a0d626c3e..55423f0470 100644 --- a/db2/txn/txn.c +++ b/db2/txn/txn.c @@ -43,7 +43,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)txn.c 10.30 (Sleepycat) 9/23/97"; +static const char sccsid[] = "@(#)txn.c 10.35 (Sleepycat) 11/2/97"; #endif /* not lint */ @@ -187,7 +187,7 @@ retry1: if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE, } /* Now, create the transaction manager structure and set its fields. */ - if ((tmgrp = (DB_TXNMGR *)malloc(sizeof(DB_TXNMGR))) == NULL) { + if ((tmgrp = (DB_TXNMGR *)__db_malloc(sizeof(DB_TXNMGR))) == NULL) { __db_err(dbenv, "txn_open: %s", strerror(ENOMEM)); ret = ENOMEM; goto out; @@ -205,7 +205,7 @@ retry1: if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE, TAILQ_INIT(&tmgrp->txn_chain); if (LF_ISSET(DB_THREAD)) { LOCK_TXNREGION(tmgrp); - if ((ret = __db_shalloc(tmgrp->mem, sizeof(db_mutex_t), + if ((ret = __db_shalloc(tmgrp->mem, sizeof(db_mutex_t), MUTEX_ALIGNMENT, &tmgrp->mutexp)) == 0) __db_mutex_init(tmgrp->mutexp, -1); UNLOCK_TXNREGION(tmgrp); @@ -225,7 +225,7 @@ out: if (txn_regionp != NULL) __db_shalloc_free(tmgrp->mem, tmgrp->mutexp); UNLOCK_TXNREGION(tmgrp); } - free(tmgrp); + __db_free(tmgrp); } return (ret); } @@ -254,7 +254,7 @@ txn_begin(tmgrp, parent, txnpp) if ((ret = __db_shalloc(tmgrp->mem, sizeof(TXN_DETAIL), 0, &txnp)) != 0 && ret == ENOMEM && (ret = __txn_grow_region(tmgrp)) == 0) ret = __db_shalloc(tmgrp->mem, sizeof(TXN_DETAIL), 0, &txnp); - + if (ret != 0) goto err; @@ -262,7 +262,7 @@ txn_begin(tmgrp, parent, txnpp) if (tmgrp->region->last_txnid == TXN_INVALID) return (EINVAL); - if ((retp = (DB_TXN *)malloc(sizeof(DB_TXN))) == NULL) { + if ((retp = (DB_TXN *)__db_malloc(sizeof(DB_TXN))) == NULL) { __db_err(tmgrp->dbenv, "txn_begin : %s", strerror(ENOMEM)); ret = ENOMEM; goto err1; @@ -297,7 +297,7 @@ txn_begin(tmgrp, parent, txnpp) txnp, links, __txn_detail); __db_shalloc_free(tmgrp->mem, txnp); UNLOCK_TXNREGION(tmgrp); - free (retp); + __db_free(retp); return (ret); } @@ -433,7 +433,7 @@ txn_close(tmgrp) ret = t_ret; if (ret == 0) - free (tmgrp); + __db_free(tmgrp); return (ret); } @@ -561,7 +561,7 @@ __txn_undo(txnp) ret = mgr->recover(logp, &rdbt, &key_lsn, TXN_UNDO, NULL); if (F_ISSET(logp, DB_AM_THREAD) && rdbt.data != NULL) { - free(rdbt.data); + __db_free(rdbt.data); rdbt.data = NULL; } } @@ -590,7 +590,7 @@ txn_checkpoint(mgr, kbytes, minutes) TXN_DETAIL *txnp; DB_LSN ckp_lsn, last_ckp; DB_LOG *dblp; - u_int32_t bytes_written; + u_int32_t kbytes_written; time_t last_ckp_time, now; int ret; @@ -616,10 +616,12 @@ txn_checkpoint(mgr, kbytes, minutes) if (kbytes != 0) { dblp = mgr->dbenv->lg_info; LOCK_LOGREGION(dblp); - bytes_written = dblp->lp->written; + kbytes_written = + dblp->lp->stat.st_wc_mbytes * 1024 + + dblp->lp->stat.st_wc_bytes / 1024; ckp_lsn = dblp->lp->lsn; UNLOCK_LOGREGION(dblp); - if (bytes_written >= (u_int32_t)(kbytes * 1024)) + if (kbytes_written >= (u_int32_t)kbytes) goto do_ckp; } @@ -726,12 +728,14 @@ __txn_grow_region(tp) DB_TXNMGR *tp; { size_t incr; + off_t mutex_offset; u_int32_t oldmax; u_int8_t *curaddr; int ret; oldmax = tp->region->maxtxns; incr = oldmax * sizeof(DB_TXN); + mutex_offset = (u_int8_t *)tp->mutexp - (u_int8_t *)tp->region; if ((ret = __db_rgrow(tp->dbenv, tp->fd, incr)) != 0) return (ret); @@ -744,6 +748,7 @@ __txn_grow_region(tp) curaddr = (u_int8_t *)tp->region + tp->reg_size; tp->mem = &tp->region[1]; tp->reg_size += incr; + tp->mutexp = (db_mutex_t *)((u_int8_t *)tp->region + mutex_offset); *((size_t *)curaddr) = incr - sizeof(size_t); curaddr += sizeof(size_t); @@ -776,7 +781,7 @@ txn_stat(mgr, statp, db_malloc) */ nbytes = sizeof(DB_TXN_STAT) + sizeof(DB_TXN_ACTIVE) * (nactive + 200); if (db_malloc == NULL) - stats = (DB_TXN_STAT *)malloc(nbytes); + stats = (DB_TXN_STAT *)__db_malloc(nbytes); else stats = (DB_TXN_STAT *)db_malloc(nbytes); diff --git a/db2/txn/txn_auto.c b/db2/txn/txn_auto.c index baef7333c7..9edbc03eab 100644 --- a/db2/txn/txn_auto.c +++ b/db2/txn/txn_auto.c @@ -46,7 +46,7 @@ int __txn_regop_log(logp, txnid, ret_lsnp, flags, lsnp = &txnid->last_lsn; logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + sizeof(opcode); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -65,7 +65,7 @@ int __txn_regop_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -103,7 +103,7 @@ __txn_regop_print(notused1, dbtp, lsnp, notused3, notused4) (u_long)argp->prev_lsn.offset); printf("\topcode: %lu\n", (u_long)argp->opcode); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -118,7 +118,7 @@ __txn_regop_read(recbuf, argpp) __txn_regop_args *argp; u_int8_t *bp; - argp = (__txn_regop_args *)malloc(sizeof(__txn_regop_args) + + argp = (__txn_regop_args *)__db_malloc(sizeof(__txn_regop_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); @@ -167,7 +167,7 @@ int __txn_ckp_log(logp, txnid, ret_lsnp, flags, logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + sizeof(*ckp_lsn) + sizeof(*last_ckp); - if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + if ((logrec.data = (void *)__db_malloc(logrec.size)) == NULL) return (ENOMEM); bp = logrec.data; @@ -194,7 +194,7 @@ int __txn_ckp_log(logp, txnid, ret_lsnp, flags, ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); if (txnid != NULL) txnid->last_lsn = *ret_lsnp; - free(logrec.data); + __db_free(logrec.data); return (ret); } @@ -235,7 +235,7 @@ __txn_ckp_print(notused1, dbtp, lsnp, notused3, notused4) printf("\tlast_ckp: [%lu][%lu]\n", (u_long)argp->last_ckp.file, (u_long)argp->last_ckp.offset); printf("\n"); - free(argp); + __db_free(argp); return (0); } @@ -250,7 +250,7 @@ __txn_ckp_read(recbuf, argpp) __txn_ckp_args *argp; u_int8_t *bp; - argp = (__txn_ckp_args *)malloc(sizeof(__txn_ckp_args) + + argp = (__txn_ckp_args *)__db_malloc(sizeof(__txn_ckp_args) + sizeof(DB_TXN)); if (argp == NULL) return (ENOMEM); diff --git a/db2/txn/txn_rec.c b/db2/txn/txn_rec.c index c172d874d9..679cffb567 100644 --- a/db2/txn/txn_rec.c +++ b/db2/txn/txn_rec.c @@ -40,7 +40,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)txn_rec.c 10.5 (Sleepycat) 8/27/97"; +static const char sccsid[] = "@(#)txn_rec.c 10.6 (Sleepycat) 10/25/97"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -97,7 +97,7 @@ __txn_regop_recover(logp, dbtp, lsnp, redo, info) } *lsnp = argp->prev_lsn; - free (argp); + __db_free(argp); return (0); } @@ -126,6 +126,6 @@ __txn_ckp_recover(logp, dbtp, lsnp, redo, info) return (ret); *lsnp = argp->last_ckp; - free(argp); + __db_free(argp); return (DB_TXN_CKP); } |