diff options
Diffstat (limited to 'db2/common/db_region.c')
-rw-r--r-- | db2/common/db_region.c | 129 |
1 files changed, 70 insertions, 59 deletions
diff --git a/db2/common/db_region.c b/db2/common/db_region.c index 284af6176a..12abfa524d 100644 --- a/db2/common/db_region.c +++ b/db2/common/db_region.c @@ -8,7 +8,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "@(#)db_region.c 10.46 (Sleepycat) 5/26/98"; +static const char sccsid[] = "@(#)db_region.c 10.53 (Sleepycat) 11/10/98"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -46,7 +46,7 @@ __db_rattach(infop) ret = retry_cnt = 0; /* Round off the requested size to the next page boundary. */ - DB_ROUNDOFF(infop->size); + DB_ROUNDOFF(infop->size, DB_VMPAGESIZE); /* Some architectures have hard limits on the maximum region size. */ #ifdef DB_REGIONSIZE_MAX @@ -61,7 +61,7 @@ loop: infop->addr = NULL; infop->fd = -1; infop->segid = INVALID_SEGID; if (infop->name != NULL) { - FREES(infop->name); + __os_freestr(infop->name); infop->name = NULL; } F_CLR(infop, REGION_CANGROW | REGION_CREATED); @@ -74,6 +74,11 @@ loop: infop->addr = NULL; * (Theoretically, we could probably get a file descriptor to lock * other types of shared regions, but I don't see any reason to * bother.) + * + * Since we may be using shared memory regions, e.g., shmget(2), + * and not mmap of regular files, the backing file may be only a + * few tens of bytes in length. So, this depends on the ability + * to fcntl lock file offsets much larger than the physical file. */ malloc_possible = 0; #endif @@ -91,15 +96,16 @@ loop: infop->addr = NULL; * than either anonymous memory or a shared file. */ if (malloc_possible && F_ISSET(infop, REGION_PRIVATE)) { - if ((infop->addr = __db_malloc(infop->size)) == NULL) - return (ENOMEM); + if ((ret = __os_malloc(infop->size, NULL, &infop->addr)) != 0) + return (ret); /* - * It's sometimes significantly faster to page-fault in all - * of the region's pages before we run the application, as - * we can see fairly nasty side-effects when we page-fault - * while holding various locks, i.e., the lock takes a long - * time, and other threads convoy behind the lock holder. + * It's sometimes significantly faster to page-fault in all of + * the region's pages before we run the application, as we see + * nasty side-effects when we page-fault while holding various + * locks, i.e., the lock takes a long time to acquire because + * of the underlying page fault, and the other threads convoy + * behind the lock holder. */ if (DB_GLOBAL(db_region_init)) for (p = infop->addr; @@ -159,7 +165,7 @@ loop: infop->addr = NULL; * 3. Memory backed by a regular file (mmap(2)). * * We instantiate a backing file in all cases, which contains at least - * the RLAYOUT structure, and in case #4, contains the actual region. + * the RLAYOUT structure, and in case #3, contains the actual region. * This is necessary for a couple of reasons: * * First, the mpool region uses temporary files to name regions, and @@ -218,7 +224,7 @@ loop: infop->addr = NULL; * And yes, this makes me want to take somebody and kill them, * but I can't think of any other solution. */ - if ((ret = __db_ioinfo(infop->name, + if ((ret = __os_ioinfo(infop->name, infop->fd, &mbytes, &bytes, NULL)) != 0) goto errmsg; size = mbytes * MEGABYTE + bytes; @@ -233,7 +239,7 @@ loop: infop->addr = NULL; if (size < sizeof(RLAYOUT)) goto retry; if ((ret = - __db_read(infop->fd, &rl, sizeof(rl), &nr)) != 0) + __os_read(infop->fd, &rl, sizeof(rl), &nr)) != 0) goto retry; if (rl.valid != DB_REGIONMAGIC) goto retry; @@ -284,6 +290,7 @@ loop: infop->addr = NULL; } else goto err; } + region_init: /* * Initialize the common region information. @@ -321,6 +328,7 @@ region_init: rlp->refcnt = 1; rlp->size = infop->size; db_version(&rlp->majver, &rlp->minver, &rlp->patch); + rlp->panic = 0; rlp->segid = infop->segid; rlp->flags = 0; if (F_ISSET(infop, REGION_ANONYMOUS)) @@ -347,13 +355,19 @@ region_init: * the file. */ if (F_ISSET(infop, REGION_ANONYMOUS)) { - if ((ret = __db_seek(infop->fd, 0, 0, 0, 0, 0)) != 0) + if ((ret = __os_seek(infop->fd, 0, 0, 0, 0, 0)) != 0) goto err; if ((ret = - __db_write(infop->fd, rlp, sizeof(*rlp), &nw)) != 0) + __os_write(infop->fd, rlp, sizeof(*rlp), &nw)) != 0) goto err; } } else { + /* Check to see if the region has had catastrophic failure. */ + if (rlp->panic) { + ret = DB_RUNRECOVERY; + goto err; + } + /* * Check the valid flag to ensure the region is initialized. * If the valid flag has not been set, the mutex may not have @@ -380,18 +394,6 @@ region_init: } /* - * Problem #2: We want a bigger region than has previously been - * created. Detected by checking if the region is smaller than - * our caller requested. If it is, we grow the region, (which - * does the detach and re-attach for us). - */ - if (grow_region != 0 && - (ret = __db_rgrow(infop, grow_region)) != 0) { - (void)__db_mutex_unlock(&rlp->lock, infop->fd); - goto err; - } - - /* * Problem #3: when we checked the size of the file, it was * still growing as part of creation. Detected by the fact * that infop->size isn't the same size as the region. @@ -419,16 +421,16 @@ retry: /* Discard the region. */ /* Discard the backing file. */ if (infop->fd != -1) { - (void)__db_close(infop->fd); + (void)__os_close(infop->fd); infop->fd = -1; if (F_ISSET(infop, REGION_CREATED)) - (void)__db_unlink(infop->name); + (void)__os_unlink(infop->name); } /* Discard the name. */ if (infop->name != NULL) { - FREES(infop->name); + __os_freestr(infop->name); infop->name = NULL; } @@ -438,7 +440,7 @@ retry: /* Discard the region. */ */ if (ret == 0) { if (++retry_cnt <= 3) { - __db_sleep(retry_cnt * 2, 0); + __os_sleep(retry_cnt * 2, 0); goto loop; } ret = EAGAIN; @@ -481,10 +483,11 @@ retry: /* Discard the region. */ F_SET(infop, REGION_REMOVED); F_CLR(infop, REGION_CANGROW); - (void)__db_close(infop->fd); - (void)__db_unlink(infop->name); + (void)__os_close(infop->fd); + (void)__os_unlink(infop->name); } } + return (ret); } @@ -514,7 +517,7 @@ __db_rdetach(infop) * action required is freeing the memory. */ if (F_ISSET(infop, REGION_MALLOC)) { - __db_free(infop->addr); + __os_free(infop->addr, 0); goto done; } @@ -549,7 +552,7 @@ __db_rdetach(infop) (void)__db_mutex_unlock(&rlp->lock, infop->fd); /* Close the backing file descriptor. */ - (void)__db_close(infop->fd); + (void)__os_close(infop->fd); infop->fd = -1; /* Discard our mapping of the region. */ @@ -561,13 +564,13 @@ __db_rdetach(infop) if ((t_ret = __db_unlinkregion(infop->name, infop) != 0) && ret == 0) ret = t_ret; - if ((t_ret = __db_unlink(infop->name) != 0) && ret == 0) + if ((t_ret = __os_unlink(infop->name) != 0) && ret == 0) ret = t_ret; } done: /* Discard the name. */ if (infop->name != NULL) { - FREES(infop->name); + __os_freestr(infop->name); infop->name = NULL; } @@ -629,8 +632,8 @@ __db_runlink(infop, force) * (REGION_PRIVATE) ones, regardless of whether or not it's used to * back the region. If that file doesn't exist, we're done. */ - if (__db_exists(name, NULL) != 0) { - FREES(name); + if (__os_exists(name, NULL) != 0) { + __os_freestr(name); return (0); } @@ -641,12 +644,12 @@ __db_runlink(infop, force) */ if ((ret = __db_open(name, DB_RDONLY, DB_RDONLY, 0, &fd)) != 0) goto errmsg; - if ((ret = __db_ioinfo(name, fd, &mbytes, &bytes, NULL)) != 0) + if ((ret = __os_ioinfo(name, fd, &mbytes, &bytes, NULL)) != 0) goto errmsg; size = mbytes * MEGABYTE + bytes; if (size <= sizeof(RLAYOUT)) { - if ((ret = __db_read(fd, &rl, sizeof(rl), &nr)) != 0) + if ((ret = __os_read(fd, &rl, sizeof(rl), &nr)) != 0) goto errmsg; if (rl.valid != DB_REGIONMAGIC) { __db_err(infop->dbenv, @@ -673,16 +676,16 @@ __db_runlink(infop, force) * because some architectures (e.g., Win32) won't unlink a file if * open file descriptors remain. */ - (void)__db_close(fd); - if ((t_ret = __db_unlink(name)) != 0 && ret == 0) + (void)__os_close(fd); + if ((t_ret = __os_unlink(name)) != 0 && ret == 0) ret = t_ret; if (0) { errmsg: __db_err(infop->dbenv, "%s: %s", name, strerror(ret)); -err: (void)__db_close(fd); +err: (void)__os_close(fd); } - FREES(name); + __os_freestr(name); return (ret); } @@ -715,7 +718,7 @@ __db_rgrow(infop, new_size) * determine the additional space required. */ rlp = (RLAYOUT *)infop->addr; - DB_ROUNDOFF(new_size); + DB_ROUNDOFF(new_size, DB_VMPAGESIZE); increment = new_size - rlp->size; if ((ret = __db_growregion(infop, increment)) != 0) @@ -745,7 +748,7 @@ __db_growregion(infop, increment) char buf[DB_VMPAGESIZE]; /* Seek to the end of the region. */ - if ((ret = __db_seek(infop->fd, 0, 0, 0, 0, SEEK_END)) != 0) + if ((ret = __os_seek(infop->fd, 0, 0, 0, 0, SEEK_END)) != 0) goto err; /* Write nuls to the new bytes. */ @@ -760,7 +763,7 @@ __db_growregion(infop, increment) /* Extend the region by writing each new page. */ for (i = 0; i < increment; i += DB_VMPAGESIZE) { if ((ret = - __db_write(infop->fd, buf, sizeof(buf), &nw)) != 0) + __os_write(infop->fd, buf, sizeof(buf), &nw)) != 0) goto err; if (nw != sizeof(buf)) goto eio; @@ -776,36 +779,44 @@ __db_growregion(infop, increment) */ pages = (increment - DB_VMPAGESIZE) / MEGABYTE; relative = (increment - DB_VMPAGESIZE) % MEGABYTE; - if ((ret = __db_seek(infop->fd, + if ((ret = __os_seek(infop->fd, MEGABYTE, pages, relative, 0, SEEK_CUR)) != 0) goto err; - if ((ret = __db_write(infop->fd, buf, sizeof(buf), &nw)) != 0) + if ((ret = __os_write(infop->fd, buf, sizeof(buf), &nw)) != 0) goto err; if (nw != sizeof(buf)) goto eio; /* - * It's sometimes significantly faster to page-fault in all - * of the region's pages before we run the application, as - * we can see fairly nasty side-effects when we page-fault - * while holding various locks, i.e., the lock takes a long - * time, and other threads convoy behind the lock holder. + * It's sometimes significantly faster to page-fault in all of + * the region's pages before we run the application, as we see + * nasty side-effects when we page-fault while holding various + * locks, i.e., the lock takes a long time to acquire because + * of the underlying page fault, and the other threads convoy + * behind the lock holder. + * + * We also use REGION_INIT to guarantee that there is enough + * disk space for the region, so we also write a byte to each + * page. Reading the byte is insufficient as some systems + * (e.g., Solaris) do not instantiate disk pages to satisfy + * a read, and so we don't know if there is enough disk space + * or not. */ if (DB_GLOBAL(db_region_init)) { pages = increment / MEGABYTE; relative = increment % MEGABYTE; - if ((ret = __db_seek(infop->fd, + if ((ret = __os_seek(infop->fd, MEGABYTE, pages, relative, 1, SEEK_END)) != 0) goto err; - /* Read a byte from each page. */ + /* Write a byte to each page. */ for (i = 0; i < increment; i += DB_VMPAGESIZE) { if ((ret = - __db_read(infop->fd, buf, 1, &nr)) != 0) + __os_write(infop->fd, buf, 1, &nr)) != 0) goto err; if (nr != 1) goto eio; - if ((ret = __db_seek(infop->fd, + if ((ret = __os_seek(infop->fd, 0, 0, DB_VMPAGESIZE - 1, 0, SEEK_CUR)) != 0) goto err; } |