diff options
Diffstat (limited to 'rt/aio_misc.c')
-rw-r--r-- | rt/aio_misc.c | 588 |
1 files changed, 414 insertions, 174 deletions
diff --git a/rt/aio_misc.c b/rt/aio_misc.c index e4bb12c500..6ea30c2158 100644 --- a/rt/aio_misc.c +++ b/rt/aio_misc.c @@ -21,7 +21,6 @@ #include <aio.h> #include <errno.h> #include <pthread.h> -#include <semaphore.h> #include <stdlib.h> #include <unistd.h> #include <sys/stat.h> @@ -29,40 +28,199 @@ #include "aio_misc.h" -/* We need a list of pending operations. This is sorted according to - the priority given in the aio_reqprio member. */ -aiocb_union *__aio_requests; +/* Pool of request list entries. */ +static struct requestlist **pool; -/* Since the list is global we need a semaphore protecting it. */ -sem_t __aio_requests_sema; +/* Number of total and allocated pool entries. */ +static size_t pool_tab_size; +static size_t pool_size; +/* We implement a two dimensional array but allocate each row separately. + The macro below determines how many entries should be used per row. + It should better be a power of two. */ +#define ENTRIES_PER_ROW 16 -/* The initialization function. It gets automatically called if any - aio_* function is used in the program. */ -static void -__attribute__ ((unused)) -aio_initialize (void) +/* The row table is incremented in units of this. */ +#define ROW_STEP 8 + +/* List of available entries. */ +static struct requestlist *freelist; + +/* List of request waiting to be processed. */ +static struct requestlist *runlist; + +/* Structure list of all currently processed requests. */ +static struct requestlist *requests; + +/* Number of threads currently running. */ +static int nthreads; + + +/* These are the values used to optimize the use of AIO. The user can + overwrite them by using the `aio_init' function. */ +static struct aioinit optim = +{ + 20, /* int aio_threads; Maximal number of threads. */ + 256, /* int aio_num; Number of expected simultanious requests. */ + 0, + 0, + 0, + 0, + { 0, } +}; + + +/* Since the list is global we need a mutex protecting it. */ +pthread_mutex_t __aio_requests_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; + + +/* Functions to handle request list pool. */ +static struct requestlist * +get_elem (void) { - /* Initialize the semaphore. We allow exactly one user at a time. */ - sem_init (&__aio_requests_sema, 0, 1); + struct requestlist *result; + + if (freelist == NULL) + { + struct requestlist *new_row; + size_t new_size; + + /* Compute new size. */ + new_size = pool_size ? pool_size + ENTRIES_PER_ROW : optim.aio_num; + + if ((new_size / ENTRIES_PER_ROW) >= pool_tab_size) + { + size_t new_tab_size = new_size / ENTRIES_PER_ROW; + struct requestlist **new_tab; + + new_tab = (struct requestlist **) + realloc (pool, (new_tab_size * sizeof (struct requestlist *))); + + if (new_tab == NULL) + return NULL; + + pool_tab_size = new_tab_size; + pool = new_tab; + } + + if (pool_size == 0) + { + size_t cnt; + + new_row = (struct requestlist *) + calloc (new_size, sizeof (struct requestlist)); + + if (new_row == NULL) + return NULL; + + for (cnt = 0; cnt < new_size / ENTRIES_PER_ROW; ++cnt) + pool[cnt] = &new_row[cnt * ENTRIES_PER_ROW]; + } + else + { + /* Allocat one new row. */ + new_row = (struct requestlist *) + calloc (ENTRIES_PER_ROW, sizeof (struct requestlist)); + if (new_row == NULL) + return NULL; + + pool[new_size / ENTRIES_PER_ROW] = new_row; + } + + /* Put all the new entries in the freelist. */ + do + { + new_row->next_prio = freelist; + freelist = new_row++; + } + while (++pool_size < new_size); + } + + result = freelist; + freelist = freelist->next_prio; + + return result; } -text_set_element (__libc_subinit, aio_initialize); + +void +__aio_free_req (struct requestlist *elem) +{ + elem->running = no; + elem->next_prio = freelist; + freelist = elem; +} + + +struct requestlist * +__aio_find_req (aiocb_union *elem) +{ + struct requestlist *runp = requests; + int fildes = elem->aiocb.aio_fildes; + + while (runp != NULL && runp->aiocbp->aiocb.aio_fildes < fildes) + runp = runp->next_fd; + + if (runp != NULL) + if (runp->aiocbp->aiocb.aio_fildes != fildes) + runp = NULL; + else + while (runp != NULL && runp->aiocbp != elem) + runp = runp->next_prio; + + return runp; +} + + +struct requestlist * +__aio_find_req_fd (int fildes) +{ + struct requestlist *runp = requests; + + while (runp != NULL && runp->aiocbp->aiocb.aio_fildes < fildes) + runp = runp->next_fd; + + return (runp != NULL && runp->aiocbp->aiocb.aio_fildes == fildes + ? runp : NULL); +} /* The thread handler. */ static void *handle_fildes_io (void *arg); +/* User optimization. */ +void +__aio_init (const struct aioinit *init) +{ + /* Get the mutex. */ + pthread_mutex_lock (&__aio_requests_mutex); + + /* Only allow writing new values if the table is not yet allocated. */ + if (pool == NULL) + { + optim.aio_threads = init->aio_threads < 1 ? 1 : init->aio_threads; + optim.aio_num = (init->aio_num < ENTRIES_PER_ROW + ? ENTRIES_PER_ROW + : init->aio_num & ~ENTRIES_PER_ROW); + } + + /* Release the mutex. */ + pthread_mutex_unlock (&__aio_requests_mutex); +} +weak_alias (__aio_init, aio_init) + + /* The main function of the async I/O handling. It enqueues requests and if necessary starts and handles threads. */ -int -__aio_enqueue_request (aiocb_union *aiocbp, int operation, int require_lock) +struct requestlist * +__aio_enqueue_request (aiocb_union *aiocbp, int operation) { - int result; + int result = 0; int policy, prio; struct sched_param param; - aiocb_union *runp; + struct requestlist *last, *runp, *newp; + int running = no; if (aiocbp->aiocb.aio_reqprio < 0 || aiocbp->aiocb.aio_reqprio > AIO_PRIO_DELTA_MAX) @@ -71,94 +229,160 @@ __aio_enqueue_request (aiocb_union *aiocbp, int operation, int require_lock) __set_errno (EINVAL); aiocbp->aiocb.__error_code = EINVAL; aiocbp->aiocb.__return_value = -1; - return -1; - } - - if (pthread_getschedparam (pthread_self (), &policy, ¶m) < 0) - { - /* Something went wrong. */ - aiocbp->aiocb.__error_code = errno; - aiocbp->aiocb.__return_value = -1; - return -1; + return NULL; } /* Compute priority for this request. */ + pthread_getschedparam (pthread_self (), &policy, ¶m); prio = param.sched_priority - aiocbp->aiocb.aio_reqprio; + /* Get the mutex. */ + pthread_mutex_lock (&__aio_requests_mutex); - /* Get the semaphore. */ - if (require_lock) - sem_wait (&__aio_requests_sema); - - runp = __aio_requests; + last = NULL; + runp = requests; /* First look whether the current file descriptor is currently worked with. */ - while (runp != NULL && runp->aiocb.aio_fildes < aiocbp->aiocb.aio_fildes) - runp = (aiocb_union *) runp->aiocb.__next_fd; + while (runp != NULL + && runp->aiocbp->aiocb.aio_fildes < aiocbp->aiocb.aio_fildes) + { + last = runp; + runp = runp->next_fd; + } - if (runp != NULL) + /* Get a new element for the waiting list. */ + newp = get_elem (); + if (newp == NULL) + { + __set_errno (EAGAIN); + pthread_mutex_unlock (&__aio_requests_mutex); + return NULL; + } + newp->aiocbp = aiocbp; + newp->waiting = NULL; + + aiocbp->aiocb.__abs_prio = prio; + aiocbp->aiocb.__policy = policy; + aiocbp->aiocb.aio_lio_opcode = operation; + aiocbp->aiocb.__error_code = EINPROGRESS; + aiocbp->aiocb.__return_value = 0; + + if (runp != NULL + && runp->aiocbp->aiocb.aio_fildes == aiocbp->aiocb.aio_fildes) { /* The current file descriptor is worked on. It makes no sense - to start another thread since this new thread would have to - wait for the previous one to terminate. Simply enqueue it - after the running one according to the priority. */ - while (runp->aiocb.__next_prio != NULL - && runp->aiocb.__next_prio->__abs_prio >= prio) - runp = (aiocb_union *) runp->aiocb.__next_prio; - - aiocbp->aiocb.__next_prio = runp->aiocb.__next_prio; - aiocbp->aiocb.__abs_prio = prio; - aiocbp->aiocb.__policy = policy; - aiocbp->aiocb.aio_lio_opcode = operation; - aiocbp->aiocb.__error_code = EINPROGRESS; - aiocbp->aiocb.__return_value = 0; - runp->aiocb.__next_prio = (struct aiocb *) aiocbp; - - result = 0; + to start another thread since this new thread would fight + with the running thread for the resources. But we also cannot + say that the thread processing this desriptor shall imeediately + after finishing the current job process this request if there + are other threads in the running queue which have a higher + priority. */ + + /* Simply enqueue it after the running one according to the + priority. */ + while (runp->next_prio != NULL + && runp->next_prio->aiocbp->aiocb.__abs_prio >= prio) + runp = runp->next_prio; + + newp->next_prio = runp->next_prio; + runp->next_prio = newp; + + running = queued; } else { - /* We create a new thread for this file descriptor. The + /* Enqueue this request for a new descriptor. */ + if (last == NULL) + { + newp->last_fd = NULL; + newp->next_fd = requests; + if (requests != NULL) + requests->last_fd = newp; + requests = newp; + } + else + { + newp->next_fd = last->next_fd; + newp->last_fd = last; + last->next_fd = newp; + if (newp->next_fd != NULL) + newp->next_fd->last_fd = newp; + } + + newp->next_prio = NULL; + } + + if (running == no) + { + /* We try to create a new thread for this file descriptor. The function which gets called will handle all available requests for this descriptor and when all are processed it will - terminate. */ - pthread_t thid; - pthread_attr_t attr; - - /* First enqueue the request (the list is empty). */ - aiocbp->aiocb.__next_fd = NULL; - aiocbp->aiocb.__last_fd = NULL; - - aiocbp->aiocb.__next_prio = NULL; - aiocbp->aiocb.__abs_prio = prio; - aiocbp->aiocb.__policy = policy; - aiocbp->aiocb.aio_lio_opcode = operation; - aiocbp->aiocb.__error_code = EINPROGRESS; - aiocbp->aiocb.__return_value = 0; - - /* Make sure the thread is created detached. */ - pthread_attr_init (&attr); - pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); - - /* Now try to start a thread. */ - if (pthread_create (&thid, &attr, handle_fildes_io, aiocbp) < 0) + terminate. + + If no new thread can be created or if the specified limit of + threads for AIO is reached we queue the request. */ + + /* See if we can create a thread. */ + if (nthreads < optim.aio_threads) { - result = -1; - aiocbp->aiocb.__error_code = errno; - aiocbp->aiocb.__return_value = -1; + pthread_t thid; + pthread_attr_t attr; + + /* Make sure the thread is created detached. */ + pthread_attr_init (&attr); + pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); + + /* Now try to start a thread. */ + if (pthread_create (&thid, &attr, handle_fildes_io, newp) == 0) + { + /* We managed to enqueue the request. All errors which can + happen now can be recognized by calls to `aio_return' and + `aio_error'. */ + running = allocated; + ++nthreads; + } + else if (nthreads == 0) + /* We cannot create a thread in the moment and there is + also no thread running. This is a problem. `errno' is + set to EAGAIN if this is only a temporary problem. */ + result = -1; + } + } + + /* Enqueue the request in the run queue if it is not yet running. */ + if (running < yes && result == 0) + { + if (runlist == NULL || runlist->aiocbp->aiocb.__abs_prio < prio) + { + newp->next_run = runlist; + runlist = newp; } else - /* We managed to enqueue the request. All errors which can - happen now can be recognized by calls to `aio_return' and - `aio_error'. */ - result = 0; + { + runp = runlist; + + while (runp->next_run != NULL + && runp->next_run->aiocbp->aiocb.__abs_prio >= prio) + runp = runp->next_run; + + newp->next_run = runp->next_run; + runp->next_run = newp; + } } - /* Release the semaphore. */ - if (require_lock) - sem_post (&__aio_requests_sema); + if (result == 0) + newp->running = running; + else + { + /* Something went wrong. */ + __aio_free_req (newp); + newp = NULL; + } - return result; + /* Release the mutex. */ + pthread_mutex_unlock (&__aio_requests_mutex); + + return newp; } @@ -167,140 +391,156 @@ handle_fildes_io (void *arg) { pthread_t self = pthread_self (); struct sched_param param; - aiocb_union *runp = (aiocb_union *) arg; + struct requestlist *runp = (struct requestlist *) arg; + aiocb_union *aiocbp = runp->aiocbp; int policy; - int fildes = runp->aiocb.aio_fildes; /* This is always the same. */ + int fildes = runp->aiocbp->aiocb.aio_fildes; pthread_getschedparam (self, &policy, ¶m); do { /* Change the priority to the requested value (if necessary). */ - if (runp->aiocb.__abs_prio != param.sched_priority - || runp->aiocb.__policy != policy) + if (aiocbp->aiocb.__abs_prio != param.sched_priority + || aiocbp->aiocb.__policy != policy) { - param.sched_priority = runp->aiocb.__abs_prio; - policy = runp->aiocb.__policy; + param.sched_priority = aiocbp->aiocb.__abs_prio; + policy = aiocbp->aiocb.__policy; pthread_setschedparam (self, policy, ¶m); } /* Process request pointed to by RUNP. We must not be disturbed by signals. */ - if ((runp->aiocb.aio_lio_opcode & 127) == LIO_READ) + if ((aiocbp->aiocb.aio_lio_opcode & 127) == LIO_READ) { - if (runp->aiocb.aio_lio_opcode & 128) - runp->aiocb.__return_value = + if (aiocbp->aiocb.aio_lio_opcode & 128) + aiocbp->aiocb.__return_value = TEMP_FAILURE_RETRY (__pread64 (fildes, - (void *) runp->aiocb64.aio_buf, - runp->aiocb64.aio_nbytes, - runp->aiocb64.aio_offset)); + (void *) aiocbp->aiocb64.aio_buf, + aiocbp->aiocb64.aio_nbytes, + aiocbp->aiocb64.aio_offset)); else - runp->aiocb.__return_value = + aiocbp->aiocb.__return_value = TEMP_FAILURE_RETRY (__pread (fildes, - (void *) runp->aiocb.aio_buf, - runp->aiocb.aio_nbytes, - runp->aiocb.aio_offset)); + (void *) aiocbp->aiocb.aio_buf, + aiocbp->aiocb.aio_nbytes, + aiocbp->aiocb.aio_offset)); } - else if ((runp->aiocb.aio_lio_opcode & 127) == LIO_WRITE) + else if ((aiocbp->aiocb.aio_lio_opcode & 127) == LIO_WRITE) { - if (runp->aiocb.aio_lio_opcode & 128) - runp->aiocb.__return_value = + if (aiocbp->aiocb.aio_lio_opcode & 128) + aiocbp->aiocb.__return_value = TEMP_FAILURE_RETRY (__pwrite64 (fildes, - (const void *) runp->aiocb64.aio_buf, - runp->aiocb64.aio_nbytes, - runp->aiocb64.aio_offset)); + (const void *) aiocbp->aiocb64.aio_buf, + aiocbp->aiocb64.aio_nbytes, + aiocbp->aiocb64.aio_offset)); else - runp->aiocb.__return_value = + aiocbp->aiocb.__return_value = TEMP_FAILURE_RETRY (__pwrite (fildes, - (const void *) runp->aiocb.aio_buf, - runp->aiocb.aio_nbytes, - runp->aiocb.aio_offset)); + (const void *) aiocbp->aiocb.aio_buf, + aiocbp->aiocb.aio_nbytes, + aiocbp->aiocb.aio_offset)); } - else if (runp->aiocb.aio_lio_opcode == __LIO_DSYNC) - runp->aiocb.__return_value = TEMP_FAILURE_RETRY (fdatasync (fildes)); - else if (runp->aiocb.aio_lio_opcode == __LIO_SYNC) - runp->aiocb.__return_value = TEMP_FAILURE_RETRY (fsync (fildes)); + else if (aiocbp->aiocb.aio_lio_opcode == LIO_DSYNC) + aiocbp->aiocb.__return_value = TEMP_FAILURE_RETRY (fdatasync (fildes)); + else if (aiocbp->aiocb.aio_lio_opcode == LIO_SYNC) + aiocbp->aiocb.__return_value = TEMP_FAILURE_RETRY (fsync (fildes)); else { /* This is an invalid opcode. */ - runp->aiocb.__return_value = -1; + aiocbp->aiocb.__return_value = -1; __set_errno (EINVAL); } - if (runp->aiocb.__return_value == -1) - runp->aiocb.__error_code = errno; + /* Get the mutex. */ + pthread_mutex_lock (&__aio_requests_mutex); + + if (aiocbp->aiocb.__return_value == -1) + aiocbp->aiocb.__error_code = errno; else - runp->aiocb.__error_code = 0; + aiocbp->aiocb.__error_code = 0; /* Send the signal to notify about finished processing of the request. */ - if (runp->aiocb.aio_sigevent.sigev_notify == SIGEV_THREAD) + __aio_notify (runp); + + /* Now dequeue the current request. */ + if (runp->next_prio == NULL) { - /* We have to start a thread. */ - pthread_t tid; - pthread_attr_t attr, *pattr; + /* No outstanding request for this descriptor. Process the + runlist if necessary. */ + if (runp->next_fd != NULL) + runp->next_fd->last_fd = runp->last_fd; + if (runp->last_fd != NULL) + runp->last_fd->next_fd = runp->next_fd; + } + else + { + runp->next_prio->last_fd = runp->last_fd; + runp->next_prio->next_fd = runp->next_fd; + runp->next_prio->running = yes; + if (runp->next_fd != NULL) + runp->next_fd->last_fd = runp->next_prio; + if (runp->last_fd != NULL) + runp->last_fd->next_fd = runp->next_prio; + } + + /* Free the old element. */ + __aio_free_req (runp); - pattr = (pthread_attr_t *) - runp->aiocb.aio_sigevent.sigev_notify_attributes; - if (pattr == NULL) + runp = freelist; + if (runp != NULL) + { + /* We must not run requests which are not marked `running'. */ + if (runp->running == yes) { - pthread_attr_init (&attr); - pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); - pattr = &attr; + freelist = runp->next_run; + runp->running = allocated; } - - if (pthread_create (&tid, - (pthread_attr_t *) - runp->aiocb.aio_sigevent.sigev_notify_attributes, - (void *(*) (void *)) - runp->aiocb.aio_sigevent.sigev_notify_function, - runp->aiocb.aio_sigevent.sigev_value.sival_ptr) - < 0) + else { - /* XXX What shall we do if already an error is set by - read/write/fsync? */ - runp->aiocb.__error_code = errno; - runp->aiocb.__return_value = -1; + struct requestlist *old; + + do + { + old = runp; + runp = runp->next_run; + } + while (runp != NULL && runp->running != yes); + + if (runp != NULL) + old->next_run = runp->next_run; } } - else if (runp->aiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL) - /* We have to send a signal. */ - if (__aio_sigqueue (runp->aiocb.aio_sigevent.sigev_signo, - runp->aiocb.aio_sigevent.sigev_value) < 0) - { - /* XXX What shall we do if already an error is set by - read/write/fsync? */ - runp->aiocb.__error_code = errno; - runp->aiocb.__return_value = -1; - } - - /* Get the semaphore. */ - sem_wait (&__aio_requests_sema); - /* Now dequeue the current request. */ - if (runp->aiocb.__next_prio == NULL) - { - if (runp->aiocb.__next_fd != NULL) - runp->aiocb.__next_fd->__last_fd = runp->aiocb.__last_fd; - if (runp->aiocb.__last_fd != NULL) - runp->aiocb.__last_fd->__next_fd = runp->aiocb.__next_fd; - runp = NULL; - } - else - { - runp->aiocb.__next_prio->__last_fd = runp->aiocb.__last_fd; - runp->aiocb.__next_prio->__next_fd = runp->aiocb.__next_fd; - if (runp->aiocb.__next_fd != NULL) - runp->aiocb.__next_fd->__last_fd = runp->aiocb.__next_prio; - if (runp->aiocb.__last_fd != NULL) - runp->aiocb.__last_fd->__next_fd = runp->aiocb.__next_prio; - runp = (aiocb_union *) runp->aiocb.__next_prio; - } + /* If no request to work on we will stop the thread. */ + if (runp == NULL) + --nthreads; - /* Release the semaphore. */ - sem_post (&__aio_requests_sema); + /* Release the mutex. */ + pthread_mutex_unlock (&__aio_requests_mutex); } while (runp != NULL); pthread_exit (NULL); } + + +/* Free allocated resources. */ +static void +__attribute__ ((unused)) +free_res (void) +{ + size_t row; + + /* The first block of rows as specified in OPTIM is allocated in + one chunk. */ + free (pool[0]); + + for (row = optim.aio_num / ENTRIES_PER_ROW; row < pool_tab_size; ++row) + free (pool[row]); + + free (pool); +} + +text_set_element (__libc_subfreeres, free_res); |