diff options
-rw-r--r-- | malloc/Makefile | 3 | ||||
-rw-r--r-- | malloc/malloc.c | 260 | ||||
-rw-r--r-- | malloc/tst-memalign-2.c | 155 |
3 files changed, 390 insertions, 28 deletions
diff --git a/malloc/Makefile b/malloc/Makefile index 0717df626e..f49675845e 100644 --- a/malloc/Makefile +++ b/malloc/Makefile @@ -43,6 +43,7 @@ tests := mallocbug tst-malloc tst-valloc tst-calloc tst-obstack \ tst-tcfree1 tst-tcfree2 tst-tcfree3 \ tst-safe-linking \ tst-mallocalign1 \ + tst-memalign-2 tests-static := \ tst-interpose-static-nothread \ @@ -70,7 +71,7 @@ test-srcs = tst-mtrace # with MALLOC_CHECK_=3 because they expect a specific failure. tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \ tst-mxfast tst-safe-linking \ - tst-compathooks-off tst-compathooks-on + tst-compathooks-off tst-compathooks-on tst-memalign-2 # Run all tests with MALLOC_CHECK_=3 tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \ diff --git a/malloc/malloc.c b/malloc/malloc.c index 05e65a2d54..0315ac5d16 100644 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@ -3156,19 +3156,44 @@ tcache_put (mchunkptr chunk, size_t tc_idx) } /* Caller must ensure that we know tc_idx is valid and there's - available chunks to remove. */ + available chunks to remove. Removes chunk from the middle of the + list. */ static __always_inline void * -tcache_get (size_t tc_idx) +tcache_get_n (size_t tc_idx, tcache_entry **ep) { - tcache_entry *e = tcache->entries[tc_idx]; + tcache_entry *e; + if (ep == &(tcache->entries[tc_idx])) + e = *ep; + else + e = REVEAL_PTR (*ep); + if (__glibc_unlikely (!aligned_OK (e))) malloc_printerr ("malloc(): unaligned tcache chunk detected"); - tcache->entries[tc_idx] = REVEAL_PTR (e->next); + + if (ep == &(tcache->entries[tc_idx])) + *ep = REVEAL_PTR (e->next); + else + *ep = PROTECT_PTR (ep, REVEAL_PTR (e->next)); + --(tcache->counts[tc_idx]); e->key = 0; return (void *) e; } +/* Like the above, but removes from the head of the list. */ +static __always_inline void * +tcache_get (size_t tc_idx) +{ + return tcache_get_n (tc_idx, & tcache->entries[tc_idx]); +} + +/* Iterates through the tcache linked list. */ +static __always_inline tcache_entry * +tcache_next (tcache_entry *e) +{ + return (tcache_entry *) REVEAL_PTR (e->next); +} + static void tcache_thread_shutdown (void) { @@ -3277,7 +3302,7 @@ __libc_malloc (size_t bytes) DIAG_PUSH_NEEDS_COMMENT; if (tc_idx < mp_.tcache_bins - && tcache + && tcache != NULL && tcache->counts[tc_idx] > 0) { victim = tcache_get (tc_idx); @@ -3536,6 +3561,38 @@ _mid_memalign (size_t alignment, size_t bytes, void *address) alignment = a; } +#if USE_TCACHE + { + size_t tbytes; + tbytes = checked_request2size (bytes); + if (tbytes == 0) + { + __set_errno (ENOMEM); + return NULL; + } + size_t tc_idx = csize2tidx (tbytes); + + if (tc_idx < mp_.tcache_bins + && tcache != NULL + && tcache->counts[tc_idx] > 0) + { + /* The tcache itself isn't encoded, but the chain is. */ + tcache_entry **tep = & tcache->entries[tc_idx]; + tcache_entry *te = *tep; + while (te != NULL && !PTR_IS_ALIGNED (te, alignment)) + { + tep = & (te->next); + te = tcache_next (te); + } + if (te != NULL) + { + void *victim = tcache_get_n (tc_idx, tep); + return tag_new_usable (victim); + } + } + } +#endif + if (SINGLE_THREAD_P) { p = _int_memalign (&main_arena, alignment, bytes); @@ -3841,7 +3898,7 @@ _int_malloc (mstate av, size_t bytes) /* While we're here, if we see other chunks of the same size, stash them in the tcache. */ size_t tc_idx = csize2tidx (nb); - if (tcache && tc_idx < mp_.tcache_bins) + if (tcache != NULL && tc_idx < mp_.tcache_bins) { mchunkptr tc_victim; @@ -3899,7 +3956,7 @@ _int_malloc (mstate av, size_t bytes) /* While we're here, if we see other chunks of the same size, stash them in the tcache. */ size_t tc_idx = csize2tidx (nb); - if (tcache && tc_idx < mp_.tcache_bins) + if (tcache != NULL && tc_idx < mp_.tcache_bins) { mchunkptr tc_victim; @@ -3961,7 +4018,7 @@ _int_malloc (mstate av, size_t bytes) #if USE_TCACHE INTERNAL_SIZE_T tcache_nb = 0; size_t tc_idx = csize2tidx (nb); - if (tcache && tc_idx < mp_.tcache_bins) + if (tcache != NULL && tc_idx < mp_.tcache_bins) tcache_nb = nb; int return_cached = 0; @@ -4041,7 +4098,7 @@ _int_malloc (mstate av, size_t bytes) #if USE_TCACHE /* Fill cache first, return to user only if cache fills. We may return one of these chunks later. */ - if (tcache_nb + if (tcache_nb > 0 && tcache->counts[tc_idx] < mp_.tcache_count) { tcache_put (victim, tc_idx); @@ -4915,6 +4972,43 @@ _int_realloc (mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize, ------------------------------ memalign ------------------------------ */ +/* Returns 0 if the chunk is not and does not contain the requested + aligned sub-chunk, else returns the amount of "waste" from + trimming. BYTES is the *user* byte size, not the chunk byte + size. */ +static size_t +chunk_ok_for_memalign (mchunkptr p, size_t alignment, size_t bytes) +{ + void *m = chunk2mem (p); + INTERNAL_SIZE_T size = memsize (p); + void *aligned_m = m; + + if (__glibc_unlikely (misaligned_chunk (p))) + malloc_printerr ("_int_memalign(): unaligned chunk detected"); + + aligned_m = PTR_ALIGN_UP (m, alignment); + + INTERNAL_SIZE_T front_extra = (intptr_t) aligned_m - (intptr_t) m; + + /* We can't trim off the front as it's too small. */ + if (front_extra > 0 && front_extra < MINSIZE) + return 0; + + /* If it's a perfect fit, it's an exception to the return value rule + (we would return zero waste, which looks like "not usable"), so + handle it here by returning a small non-zero value instead. */ + if (size == bytes && front_extra == 0) + return 1; + + /* If the block we need fits in the chunk, calculate total waste. */ + if (size > bytes + front_extra) + return size - bytes; + + /* Can't use this chunk. */ + return 0; +} + +/* BYTES is user requested bytes, not requested chunksize bytes. */ static void * _int_memalign (mstate av, size_t alignment, size_t bytes) { @@ -4928,8 +5022,7 @@ _int_memalign (mstate av, size_t alignment, size_t bytes) mchunkptr remainder; /* spare room at end to split off */ unsigned long remainder_size; /* its size */ INTERNAL_SIZE_T size; - - + mchunkptr victim; nb = checked_request2size (bytes); if (nb == 0) @@ -4938,29 +5031,142 @@ _int_memalign (mstate av, size_t alignment, size_t bytes) return NULL; } - /* - Strategy: find a spot within that chunk that meets the alignment + /* We can't check tcache here because we hold the arena lock, which + tcache doesn't expect. We expect it has been checked + earlier. */ + + /* Strategy: search the bins looking for an existing block that + meets our needs. We scan a range of bins from "exact size" to + "just under 2x", spanning the small/large barrier if needed. If + we don't find anything in those bins, the common malloc code will + scan starting at 2x. */ + + /* This will be set if we found a candidate chunk. */ + victim = NULL; + + /* Fast bins are singly-linked, hard to remove a chunk from the middle + and unlikely to meet our alignment requirements. We have not done + any experimentation with searching for aligned fastbins. */ + + int first_bin_index; + int first_largebin_index; + int last_bin_index; + + if (in_smallbin_range (nb)) + first_bin_index = smallbin_index (nb); + else + first_bin_index = largebin_index (nb); + + if (in_smallbin_range (nb * 2)) + last_bin_index = smallbin_index (nb * 2); + else + last_bin_index = largebin_index (nb * 2); + + first_largebin_index = largebin_index (MIN_LARGE_SIZE); + + int victim_index; /* its bin index */ + + for (victim_index = first_bin_index; + victim_index < last_bin_index; + victim_index ++) + { + victim = NULL; + + if (victim_index < first_largebin_index) + { + /* Check small bins. Small bin chunks are doubly-linked despite + being the same size. */ + + mchunkptr fwd; /* misc temp for linking */ + mchunkptr bck; /* misc temp for linking */ + + bck = bin_at (av, victim_index); + fwd = bck->fd; + while (fwd != bck) + { + if (chunk_ok_for_memalign (fwd, alignment, bytes) > 0) + { + victim = fwd; + + /* Unlink it */ + victim->fd->bk = victim->bk; + victim->bk->fd = victim->fd; + break; + } + + fwd = fwd->fd; + } + } + else + { + /* Check large bins. */ + mchunkptr fwd; /* misc temp for linking */ + mchunkptr bck; /* misc temp for linking */ + mchunkptr best = NULL; + size_t best_size = 0; + + bck = bin_at (av, victim_index); + fwd = bck->fd; + + while (fwd != bck) + { + int extra; + + if (chunksize (fwd) < nb) + break; + extra = chunk_ok_for_memalign (fwd, alignment, bytes); + if (extra > 0 + && (extra <= best_size || best == NULL)) + { + best = fwd; + best_size = extra; + } + + fwd = fwd->fd; + } + victim = best; + + if (victim != NULL) + { + unlink_chunk (av, victim); + break; + } + } + + if (victim != NULL) + break; + } + + /* Strategy: find a spot within that chunk that meets the alignment request, and then possibly free the leading and trailing space. - */ + This strategy is incredibly costly and can lead to external + fragmentation if header and footer chunks are unused. */ - /* Call malloc with worst case padding to hit alignment. */ + if (victim != NULL) + { + p = victim; + m = chunk2mem (p); + set_inuse (p); + } + else + { + /* Call malloc with worst case padding to hit alignment. */ - m = (char *) (_int_malloc (av, nb + alignment + MINSIZE)); + m = (char *) (_int_malloc (av, nb + alignment + MINSIZE)); - if (m == 0) - return 0; /* propagate failure */ + if (m == 0) + return 0; /* propagate failure */ - p = mem2chunk (m); + p = mem2chunk (m); + } if ((((unsigned long) (m)) % alignment) != 0) /* misaligned */ - - { /* - Find an aligned spot inside chunk. Since we need to give back - leading space in a chunk of at least MINSIZE, if the first - calculation places us at a spot with less than MINSIZE leader, - we can move to the next aligned spot -- we've allocated enough - total room so that this is always possible. - */ + { + /* Find an aligned spot inside chunk. Since we need to give back + leading space in a chunk of at least MINSIZE, if the first + calculation places us at a spot with less than MINSIZE leader, + we can move to the next aligned spot -- we've allocated enough + total room so that this is always possible. */ brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) & - ((signed long) alignment)); if ((unsigned long) (brk - (char *) (p)) < MINSIZE) diff --git a/malloc/tst-memalign-2.c b/malloc/tst-memalign-2.c new file mode 100644 index 0000000000..4996578e9f --- /dev/null +++ b/malloc/tst-memalign-2.c @@ -0,0 +1,155 @@ +/* Test for memalign chunk reuse. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <malloc.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <array_length.h> +#include <libc-pointer-arith.h> +#include <support/check.h> + +typedef struct TestCase { + size_t size; + size_t alignment; + void *ptr1; + void *ptr2; +} TestCase; + +static TestCase tcache_allocs[] = { + { 24, 8, NULL, NULL }, + { 24, 16, NULL, NULL }, + { 128, 32, NULL, NULL } +}; +#define TN array_length (tcache_allocs) + +static TestCase large_allocs[] = { + { 23450, 64, NULL, NULL }, + { 23450, 64, NULL, NULL }, + { 23550, 64, NULL, NULL }, + { 23550, 64, NULL, NULL }, + { 23650, 64, NULL, NULL }, + { 23650, 64, NULL, NULL }, + { 33650, 64, NULL, NULL }, + { 33650, 64, NULL, NULL } +}; +#define LN array_length (large_allocs) + +void *p; + +/* Sanity checks, ancillary to the actual test. */ +#define CHECK(p,a) \ + if (p == NULL || !PTR_IS_ALIGNED (p, a)) \ + FAIL_EXIT1 ("NULL or misaligned memory detected.\n"); + +static int +do_test (void) +{ + int i, j; + int count; + void *ptr[10]; + void *p; + + /* TCache test. */ + + for (i = 0; i < TN; ++ i) + { + tcache_allocs[i].ptr1 = memalign (tcache_allocs[i].alignment, tcache_allocs[i].size); + CHECK (tcache_allocs[i].ptr1, tcache_allocs[i].alignment); + free (tcache_allocs[i].ptr1); + /* This should return the same chunk as was just free'd. */ + tcache_allocs[i].ptr2 = memalign (tcache_allocs[i].alignment, tcache_allocs[i].size); + CHECK (tcache_allocs[i].ptr2, tcache_allocs[i].alignment); + free (tcache_allocs[i].ptr2); + + TEST_VERIFY (tcache_allocs[i].ptr1 == tcache_allocs[i].ptr2); + } + + /* Test for non-head tcache hits. */ + for (i = 0; i < array_length (ptr); ++ i) + { + if (i == 4) + { + ptr[i] = memalign (64, 256); + CHECK (ptr[i], 64); + } + else + { + ptr[i] = malloc (256); + CHECK (ptr[i], 4); + } + } + for (i = 0; i < array_length (ptr); ++ i) + free (ptr[i]); + + p = memalign (64, 256); + CHECK (p, 64); + + count = 0; + for (i = 0; i < 10; ++ i) + if (ptr[i] == p) + ++ count; + free (p); + TEST_VERIFY (count > 0); + + /* Large bins test. */ + + for (i = 0; i < LN; ++ i) + { + large_allocs[i].ptr1 = memalign (large_allocs[i].alignment, large_allocs[i].size); + CHECK (large_allocs[i].ptr1, large_allocs[i].alignment); + /* Keep chunks from combining by fragmenting the heap. */ + p = malloc (512); + CHECK (p, 4); + } + + for (i = 0; i < LN; ++ i) + free (large_allocs[i].ptr1); + + /* Force the unsorted bins to be scanned and moved to small/large + bins. */ + p = malloc (60000); + + for (i = 0; i < LN; ++ i) + { + large_allocs[i].ptr2 = memalign (large_allocs[i].alignment, large_allocs[i].size); + CHECK (large_allocs[i].ptr2, large_allocs[i].alignment); + } + + count = 0; + for (i = 0; i < LN; ++ i) + { + int ok = 0; + for (j = 0; j < LN; ++ j) + if (large_allocs[i].ptr1 == large_allocs[j].ptr2) + ok = 1; + if (ok == 1) + count ++; + } + + /* The allocation algorithm is complicated outside of the memalign + logic, so just make sure it's working for most of the + allocations. This avoids possible boundary conditions with + empty/full heaps. */ + TEST_VERIFY (count > LN / 2); + + return 0; +} + +#include <support/test-driver.c> |