about summary refs log tree commit diff
path: root/stdlib/qsort.c
diff options
context:
space:
mode:
authorFlorian Weimer <fweimer@redhat.com>2023-11-21 16:45:35 +0100
committerFlorian Weimer <fweimer@redhat.com>2023-11-21 16:46:02 +0100
commit55364e1f7dfab372f0710513c4d1c967c4965f71 (patch)
tree3cbcd4bb82bc1ab8db4de3c0a07c1466635b9fe7 /stdlib/qsort.c
parente4d8117b82065dc72e8df80097360e7c05a349b9 (diff)
downloadglibc-55364e1f7dfab372f0710513c4d1c967c4965f71.tar.gz
glibc-55364e1f7dfab372f0710513c4d1c967c4965f71.tar.xz
glibc-55364e1f7dfab372f0710513c4d1c967c4965f71.zip
stdlib: Handle various corner cases in the fallback heapsort for qsort
The previous implementation did not consistently apply the rule that
the child nodes of node K are at 2 * K + 1 and 2 * K + 2, or
that the parent node is at (K - 1) / 2.

Add an internal test that targets the heapsort implementation
directly.

Reported-by: Stepan Golosunov <stepan@golosunov.pp.ru>
Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
Diffstat (limited to 'stdlib/qsort.c')
-rw-r--r--stdlib/qsort.c55
1 files changed, 38 insertions, 17 deletions
diff --git a/stdlib/qsort.c b/stdlib/qsort.c
index 6d0c4447ec..a2f9e916ef 100644
--- a/stdlib/qsort.c
+++ b/stdlib/qsort.c
@@ -125,29 +125,44 @@ pop (stack_node *top, char **lo, char **hi, size_t *depth)
   return top;
 }
 
-/* NB: N is inclusive bound for BASE.  */
+/* Establish the heap condition at index K, that is, the key at K will
+   not be less than either of its children, at 2 * K + 1 and 2 * K + 2
+   (if they exist).  N is the last valid index. */
 static inline void
 siftdown (void *base, size_t size, size_t k, size_t n,
 	  enum swap_type_t swap_type, __compar_d_fn_t cmp, void *arg)
 {
-  while (k <= n / 2)
+  /* There can only be a heap condition violation if there are
+     children.  */
+  while (2 * k + 1 <= n)
     {
-      size_t j = 2 * k;
+      /* Left child.  */
+      size_t j = 2 * k + 1;
+      /* If the right child is larger, use it.  */
       if (j < n && cmp (base + (j * size), base + ((j + 1) * size), arg) < 0)
 	j++;
 
+      /* If k is already >= to its children, we are done.  */
       if (j == k || cmp (base + (k * size), base + (j * size), arg) >= 0)
 	break;
 
+      /* Heal the violation.  */
       do_swap (base + (size * j), base + (k * size), size, swap_type);
+
+      /* Swapping with j may have introduced a violation at j.  Fix
+	 it in the next loop iteration.  */
       k = j;
     }
 }
 
+/* Establish the heap condition for the indices 0 to N (inclusive).  */
 static inline void
 heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type,
 	 __compar_d_fn_t cmp, void *arg)
 {
+  /* If n is odd, k = n / 2 has a left child at n, so this is the
+     largest index that can have a heap condition violation regarding
+     its children.  */
   size_t k = n / 2;
   while (1)
     {
@@ -157,32 +172,38 @@ heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type,
     }
 }
 
-/* A non-recursive heapsort, used on introsort implementation as a fallback
-   routine with worst-case performance of O(nlog n) and worst-case space
-   complexity of O(1).  It sorts the array starting at BASE and ending at
-   END, with each element of SIZE bytes.  The SWAP_TYPE is the callback
-   function used to swap elements, and CMP is the function used to compare
-   elements.   */
+/* A non-recursive heapsort, used on introsort implementation as a
+   fallback routine with worst-case performance of O(nlog n) and
+   worst-case space complexity of O(1).  It sorts the array starting
+   at BASE and ending at END (inclusive), with each element of SIZE
+   bytes.  The SWAP_TYPE is the callback function used to swap
+   elements, and CMP is the function used to compare elements.  */
 static void
 heapsort_r (void *base, void *end, size_t size, enum swap_type_t swap_type,
 	    __compar_d_fn_t cmp, void *arg)
 {
-  const size_t count = ((uintptr_t) end - (uintptr_t) base) / size;
-
-  if (count < 2)
+  size_t n = ((uintptr_t) end - (uintptr_t) base) / size;
+  if (n <= 1)
+    /* Handled by insertion sort.  */
     return;
 
-  size_t n = count - 1;
-
   /* Build the binary heap, largest value at the base[0].  */
   heapify (base, size, n, swap_type, cmp, arg);
 
-  /* On each iteration base[0:n] is the binary heap, while base[n:count]
-     is sorted.  */
-  while (n > 0)
+  while (true)
     {
+      /* Indices 0 .. n contain the binary heap.  Extract the largest
+	 element put it into the final position in the array.  */
       do_swap (base, base + (n * size), size, swap_type);
+
+      /* The heap is now one element shorter.  */
       n--;
+      if (n == 0)
+	break;
+
+      /* By swapping in elements 0 and the previous value of n (now at
+	 n + 1), we likely introduced a heap condition violation.  Fix
+	 it for the reduced heap.  */
       siftdown (base, size, 0, n, swap_type, cmp, arg);
     }
 }