/* Copyright (C) 1991-2024 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see . */ /* If you consider tuning this algorithm, you should consult first: Engineering a sort function; Jon Bentley and M. Douglas McIlroy; Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */ #include #include #include #include #include #include /* Swap SIZE bytes between addresses A and B. These helpers are provided along the generic one as an optimization. */ enum swap_type_t { SWAP_WORDS_64, SWAP_WORDS_32, SWAP_VOID_ARG, SWAP_BYTES }; typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t; typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t; static inline void swap_words_64 (void * restrict a, void * restrict b, size_t n) { do { n -= 8; u64_alias_t t = *(u64_alias_t *)(a + n); *(u64_alias_t *)(a + n) = *(u64_alias_t *)(b + n); *(u64_alias_t *)(b + n) = t; } while (n); } static inline void swap_words_32 (void * restrict a, void * restrict b, size_t n) { do { n -= 4; u32_alias_t t = *(u32_alias_t *)(a + n); *(u32_alias_t *)(a + n) = *(u32_alias_t *)(b + n); *(u32_alias_t *)(b + n) = t; } while (n); } /* Replace the indirect call with a serie of if statements. It should help the branch predictor. */ static void do_swap (void * restrict a, void * restrict b, size_t size, enum swap_type_t swap_type) { if (swap_type == SWAP_WORDS_64) swap_words_64 (a, b, size); else if (swap_type == SWAP_WORDS_32) swap_words_32 (a, b, size); else __memswap (a, b, size); } /* Establish the heap condition at index K, that is, the key at K will not be less than either of its children, at 2 * K + 1 and 2 * K + 2 (if they exist). N is the last valid index. */ static inline void siftdown (void *base, size_t size, size_t k, size_t n, enum swap_type_t swap_type, __compar_d_fn_t cmp, void *arg) { /* There can only be a heap condition violation if there are children. */ while (2 * k + 1 <= n) { /* Left child. */ size_t j = 2 * k + 1; /* If the right child is larger, use it. */ if (j < n && cmp (base + (j * size), base + ((j + 1) * size), arg) < 0) j++; /* If k is already >= to its children, we are done. */ if (j == k || cmp (base + (k * size), base + (j * size), arg) >= 0) break; /* Heal the violation. */ do_swap (base + (size * j), base + (k * size), size, swap_type); /* Swapping with j may have introduced a violation at j. Fix it in the next loop iteration. */ k = j; } } /* Establish the heap condition for the indices 0 to N (inclusive). */ static inline void heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type, __compar_d_fn_t cmp, void *arg) { /* If n is odd, k = n / 2 has a left child at n, so this is the largest index that can have a heap condition violation regarding its children. */ size_t k = n / 2; while (1) { siftdown (base, size, k, n, swap_type, cmp, arg); if (k-- == 0) break; } } static enum swap_type_t get_swap_type (void *const pbase, size_t size) { if ((size & (sizeof (uint32_t) - 1)) == 0 && ((uintptr_t) pbase) % __alignof__ (uint32_t) == 0) { if (size == sizeof (uint32_t)) return SWAP_WORDS_32; else if (size == sizeof (uint64_t) && ((uintptr_t) pbase) % __alignof__ (uint64_t) == 0) return SWAP_WORDS_64; } return SWAP_BYTES; } /* A non-recursive heapsort with worst-case performance of O(nlog n) and worst-case space complexity of O(1). It sorts the array starting at BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback function used to swap elements, and CMP is the function used to compare elements. */ static void heapsort_r (void *base, size_t n, size_t size, __compar_d_fn_t cmp, void *arg) { if (n == 0) return; enum swap_type_t swap_type = get_swap_type (base, size); /* Build the binary heap, largest value at the base[0]. */ heapify (base, size, n, swap_type, cmp, arg); while (true) { /* Indices 0 .. n contain the binary heap. Extract the largest element put it into the final position in the array. */ do_swap (base, base + (n * size), size, swap_type); /* The heap is now one element shorter. */ n--; if (n == 0) break; /* By swapping in elements 0 and the previous value of n (now at n + 1), we likely introduced a heap condition violation. Fix it for the reduced heap. */ siftdown (base, size, 0, n, swap_type, cmp, arg); } } /* The maximum size in bytes required by mergesort that will be provided through a buffer allocated in the stack. */ #define QSORT_STACK_SIZE 1024 /* Elements larger than this value will be sorted through indirect sorting to minimize the need to memory swap calls. */ #define INDIRECT_SORT_SIZE_THRES 32 struct msort_param { size_t s; enum swap_type_t var; __compar_d_fn_t cmp; void *arg; char *t; }; static void msort_with_tmp (const struct msort_param *p, void *b, size_t n) { char *b1, *b2; size_t n1, n2; if (n <= 1) return; n1 = n / 2; n2 = n - n1; b1 = b; b2 = (char *) b + (n1 * p->s); msort_with_tmp (p, b1, n1); msort_with_tmp (p, b2, n2); char *tmp = p->t; const size_t s = p->s; __compar_d_fn_t cmp = p->cmp; void *arg = p->arg; switch (p->var) { case SWAP_WORDS_32: while (n1 > 0 && n2 > 0) { if (cmp (b1, b2, arg) <= 0) { *(u32_alias_t *) tmp = *(u32_alias_t *) b1; b1 += sizeof (u32_alias_t); --n1; } else { *(u32_alias_t *) tmp = *(u32_alias_t *) b2; b2 += sizeof (u32_alias_t); --n2; } tmp += sizeof (u32_alias_t); } break; case SWAP_WORDS_64: while (n1 > 0 && n2 > 0) { if (cmp (b1, b2, arg) <= 0) { *(u64_alias_t *) tmp = *(u64_alias_t *) b1; b1 += sizeof (u64_alias_t); --n1; } else { *(u64_alias_t *) tmp = *(u64_alias_t *) b2; b2 += sizeof (u64_alias_t); --n2; } tmp += sizeof (u64_alias_t); } break; case SWAP_VOID_ARG: while (n1 > 0 && n2 > 0) { if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0) { *(void **) tmp = *(void **) b1; b1 += sizeof (void *); --n1; } else { *(void **) tmp = *(void **) b2; b2 += sizeof (void *); --n2; } tmp += sizeof (void *); } break; default: while (n1 > 0 && n2 > 0) { if (cmp (b1, b2, arg) <= 0) { tmp = (char *) __mempcpy (tmp, b1, s); b1 += s; --n1; } else { tmp = (char *) __mempcpy (tmp, b2, s); b2 += s; --n2; } } break; } if (n1 > 0) memcpy (tmp, b1, n1 * s); memcpy (b, p->t, (n - n2) * s); } static void __attribute_used__ indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n, size_t s) { /* Indirect sorting. */ char *ip = (char *) b; void **tp = (void **) (p->t + n * sizeof (void *)); void **t = tp; void *tmp_storage = (void *) (tp + n); while ((void *) t < tmp_storage) { *t++ = ip; ip += s; } msort_with_tmp (p, p->t + n * sizeof (void *), n); /* tp[0] .. tp[n - 1] is now sorted, copy around entries of the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */ char *kp; size_t i; for (i = 0, ip = (char *) b; i < n; i++, ip += s) if ((kp = tp[i]) != ip) { size_t j = i; char *jp = ip; memcpy (tmp_storage, ip, s); do { size_t k = (kp - (char *) b) / s; tp[j] = jp; memcpy (jp, kp, s); j = k; jp = kp; kp = tp[k]; } while (kp != ip); tp[j] = jp; memcpy (jp, tmp_storage, s); } } void __qsort_r (void *const pbase, size_t total_elems, size_t size, __compar_d_fn_t cmp, void *arg) { if (total_elems <= 1) return; /* Align to the maximum size used by the swap optimization. */ _Alignas (uint64_t) char tmp[QSORT_STACK_SIZE]; size_t total_size = total_elems * size; char *buf; if (size > INDIRECT_SORT_SIZE_THRES) total_size = 2 * total_elems * sizeof (void *) + size; if (total_size <= sizeof tmp) buf = tmp; else { int save = errno; buf = malloc (total_size); __set_errno (save); if (buf == NULL) { /* Fallback to heapsort in case of memory failure. */ heapsort_r (pbase, total_elems - 1, size, cmp, arg); return; } } if (size > INDIRECT_SORT_SIZE_THRES) { const struct msort_param msort_param = { .s = sizeof (void *), .cmp = cmp, .arg = arg, .var = SWAP_VOID_ARG, .t = buf, }; indirect_msort_with_tmp (&msort_param, pbase, total_elems, size); } else { const struct msort_param msort_param = { .s = size, .cmp = cmp, .arg = arg, .var = get_swap_type (pbase, size), .t = buf, }; msort_with_tmp (&msort_param, pbase, total_elems); } if (buf != tmp) free (buf); } libc_hidden_def (__qsort_r) weak_alias (__qsort_r, qsort_r) void qsort (void *b, size_t n, size_t s, __compar_fn_t cmp) { return __qsort_r (b, n, s, (__compar_d_fn_t) cmp, NULL); } libc_hidden_def (qsort)