about summary refs log tree commit diff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2007-10-05 06:50:35 +0000
committerUlrich Drepper <drepper@redhat.com>2007-10-05 06:50:35 +0000
commit375d942968a95ec6c324d89922ee7a574c40630f (patch)
treefd9e064e4b293e36080fee72ebc1b44f7b9934f6
parent174420d2bc39c611fbc09669490b96fc6cc81520 (diff)
downloadglibc-375d942968a95ec6c324d89922ee7a574c40630f.tar.gz
glibc-375d942968a95ec6c324d89922ee7a574c40630f.tar.xz
glibc-375d942968a95ec6c324d89922ee7a574c40630f.zip
2007-10-04 Jakub Jelinek
	* stdlib/msort.c: Include stdint.h.
	(struct msort_param): New type.
	(msort_with_tmp): Use struct msort_param pointer for unchanging
	parameters.  Add optimized handling for several common sizes
	and indirect sorting mode.
	(qsort): Adjust msort_with_tmp callers.  For big S use indirect
	sorting.
	Suggested by Belazougui Djamel .

	* stdlib/Makefile (tests): Add tst-qsort2.
	* stdlib/tst-qsort2.c: New test.
-rw-r--r--ChangeLog14
-rw-r--r--stdlib/Makefile2
-rw-r--r--stdlib/msort.c276
-rw-r--r--stdlib/tst-qsort2.c89
4 files changed, 309 insertions, 72 deletions
diff --git a/ChangeLog b/ChangeLog
index eed866ff58..ee214fc199 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2007-10-04  Jakub Jelinek
+
+	* stdlib/msort.c: Include stdint.h.
+	(struct msort_param): New type.
+	(msort_with_tmp): Use struct msort_param pointer for unchanging
+	parameters.  Add optimized handling for several common sizes
+	and indirect sorting mode.
+	(qsort): Adjust msort_with_tmp callers.  For big S use indirect
+	sorting.
+	Suggested by Belazougui Djamel .
+
+	* stdlib/Makefile (tests): Add tst-qsort2.
+	* stdlib/tst-qsort2.c: New test.
+
 2007-10-04  Ulrich Drepper  <drepper@redhat.com>
 
 	* login/login_tty.c (login_tty): The Linux kernel can return EBUSY
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 7390647d8e..e72ab5b99d 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -68,7 +68,7 @@ tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
 		   tst-limits tst-rand48 bug-strtod tst-setcontext	    \
 		   test-a64l tst-qsort tst-system testmb2 bug-strtod2	    \
 		   tst-atof1 tst-atof2 tst-strtod2 tst-strtod3 tst-rand48-2 \
-		   tst-makecontext tst-strtod4 tst-strtod5
+		   tst-makecontext tst-strtod4 tst-strtod5 tst-qsort2
 
 include ../Makeconfig
 
diff --git a/stdlib/msort.c b/stdlib/msort.c
index e69b4011c4..3961e9e981 100644
--- a/stdlib/msort.c
+++ b/stdlib/msort.c
@@ -1,6 +1,6 @@
 /* An alternative to qsort, with an identical interface.
    This file is part of the GNU C Library.
-   Copyright (C) 1992,95-97,99,2000,01,02,04 Free Software Foundation, Inc.
+   Copyright (C) 1992,95-97,99,2000,01,02,04,07 Free Software Foundation, Inc.
    Written by Mike Haertel, September 1988.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,20 +19,25 @@
    02111-1307 USA.  */
 
 #include <alloca.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <memcopy.h>
 #include <errno.h>
 
-static void msort_with_tmp (void *b, size_t n, size_t s,
-			    __compar_fn_t cmp, char *t);
+struct msort_param
+{
+  size_t s;
+  size_t var;
+  __compar_fn_t cmp;
+  char *t;
+};
+static void msort_with_tmp (const struct msort_param *p, void *b, size_t n);
 
 static void
-msort_with_tmp (void *b, size_t n, size_t s, __compar_fn_t cmp,
-		char *t)
+msort_with_tmp (const struct msort_param *p, void *b, size_t n)
 {
-  char *tmp;
   char *b1, *b2;
   size_t n1, n2;
 
@@ -42,65 +47,131 @@ msort_with_tmp (void *b, size_t n, size_t s, __compar_fn_t cmp,
   n1 = n / 2;
   n2 = n - n1;
   b1 = b;
-  b2 = (char *) b + (n1 * s);
+  b2 = (char *) b + (n1 * p->s);
 
-  msort_with_tmp (b1, n1, s, cmp, t);
-  msort_with_tmp (b2, n2, s, cmp, t);
+  msort_with_tmp (p, b1, n1);
+  msort_with_tmp (p, b2, n2);
 
-  tmp = t;
+  char *tmp = p->t;
+  const size_t s = p->s;
+  __compar_fn_t cmp = p->cmp;
+  switch (p->var)
+    {
+    case 0:
+      while (n1 > 0 && n2 > 0)
+	{
+	  if ((*cmp) (b1, b2) <= 0)
+	    {
+	      *(uint32_t *) tmp = *(uint32_t *) b1;
+	      b1 += sizeof (uint32_t);
+	      --n1;
+	    }
+	  else
+	    {
+	      *(uint32_t *) tmp = *(uint32_t *) b2;
+	      b2 += sizeof (uint32_t);
+	      --n2;
+	    }
+	  tmp += sizeof (uint32_t);
+	}
+      break;
+    case 1:
+      while (n1 > 0 && n2 > 0)
+	{
+	  if ((*cmp) (b1, b2) <= 0)
+	    {
+	      *(uint64_t *) tmp = *(uint64_t *) b1;
+	      b1 += sizeof (uint64_t);
+	      --n1;
+	    }
+	  else
+	    {
+	      *(uint64_t *) tmp = *(uint64_t *) b2;
+	      b2 += sizeof (uint64_t);
+	      --n2;
+	    }
+	  tmp += sizeof (uint64_t);
+	}
+      break;
+    case 2:
+      while (n1 > 0 && n2 > 0)
+	{
+	  unsigned long *tmpl = (unsigned long *) tmp;
+	  unsigned long *bl;
+
+	  tmp += s;
+	  if ((*cmp) (b1, b2) <= 0)
+	    {
+	      bl = (unsigned long *) b1;
+	      b1 += s;
+	      --n1;
+	    }
+	  else
+	    {
+	      bl = (unsigned long *) b2;
+	      b2 += s;
+	      --n2;
+	    }
+	  while (tmpl < (unsigned long *) tmp)
+	    *tmpl++ = *bl++;
+	}
+      break;
+    case 3:
+      while (n1 > 0 && n2 > 0)
+	{
+	  if ((*cmp) (*(const void **) b1, *(const void **) b2) <= 0)
+	    {
+	      *(void **) tmp = *(void **) b1;
+	      b1 += sizeof (void *);
+	      --n1;
+	    }
+	  else
+	    {
+	      *(void **) tmp = *(void **) b2;
+	      b2 += sizeof (void *);
+	      --n2;
+	    }
+	  tmp += sizeof (void *);
+	}
+      break;
+    default:
+      while (n1 > 0 && n2 > 0)
+	{
+	  if ((*cmp) (b1, b2) <= 0)
+	    {
+	      tmp = (char *) __mempcpy (tmp, b1, s);
+	      b1 += s;
+	      --n1;
+	    }
+	  else
+	    {
+	      tmp = (char *) __mempcpy (tmp, b2, s);
+	      b2 += s;
+	      --n2;
+	    }
+	}
+      break;
+    }
 
-  if (s == OPSIZ && (b1 - (char *) 0) % OPSIZ == 0)
-    /* We are operating on aligned words.  Use direct word stores.  */
-    while (n1 > 0 && n2 > 0)
-      {
-	if ((*cmp) (b1, b2) <= 0)
-	  {
-	    --n1;
-	    *((op_t *) tmp) = *((op_t *) b1);
-	    tmp += sizeof (op_t);
-	    b1 += sizeof (op_t);
-	  }
-	else
-	  {
-	    --n2;
-	    *((op_t *) tmp) = *((op_t *) b2);
-	    tmp += sizeof (op_t);
-	    b2 += sizeof (op_t);
-	  }
-      }
-  else
-    while (n1 > 0 && n2 > 0)
-      {
-	if ((*cmp) (b1, b2) <= 0)
-	  {
-	    tmp = (char *) __mempcpy (tmp, b1, s);
-	    b1 += s;
-	    --n1;
-	  }
-	else
-	  {
-	    tmp = (char *) __mempcpy (tmp, b2, s);
-	    b2 += s;
-	    --n2;
-	  }
-      }
   if (n1 > 0)
     memcpy (tmp, b1, n1 * s);
-  memcpy (b, t, (n - n2) * s);
+  memcpy (b, p->t, (n - n2) * s);
 }
 
 void
 qsort (void *b, size_t n, size_t s, __compar_fn_t cmp)
 {
-  const size_t size = n * s;
+  size_t size = n * s;
+  char *tmp = NULL;
+  struct msort_param p;
 
-  if (size < 1024)
-    {
-      void *buf = __alloca (size);
+  /* For large object sizes use indirect sorting.  */
+  if (s > 32)
+    size = 2 * n * sizeof (void *) + s;
 
-      /* The temporary array is small, so put it on the stack.  */
-      msort_with_tmp (b, n, s, cmp, buf);
-    }
+  if (size < 1024)
+    /* The temporary array is small, so put it on the stack.  */
+    p.t = __alloca (size);
   else
     {
       /* We should avoid allocating too much memory since this might
@@ -135,26 +206,89 @@ qsort (void *b, size_t n, size_t s, __compar_fn_t cmp)
 
       /* If the memory requirements are too high don't allocate memory.  */
       if (size / pagesize > (size_t) phys_pages)
-	_quicksort (b, n, s, cmp);
-      else
 	{
-	  /* It's somewhat large, so malloc it.  */
-	  int save = errno;
-	  char *tmp = malloc (size);
-	  if (tmp == NULL)
-	    {
-	      /* Couldn't get space, so use the slower algorithm
-		 that doesn't need a temporary array.  */
-	      __set_errno (save);
-	      _quicksort (b, n, s, cmp);
-	    }
-	  else
-	    {
-	      __set_errno (save);
-	      msort_with_tmp (b, n, s, cmp, tmp);
-	      free (tmp);
-	    }
+	  _quicksort (b, n, s, cmp);
+	  return;
+	}
+
+      /* It's somewhat large, so malloc it.  */
+      int save = errno;
+      tmp = malloc (size);
+      __set_errno (save);
+      if (tmp == NULL)
+	{
+	  /* Couldn't get space, so use the slower algorithm
+	     that doesn't need a temporary array.  */
+	  _quicksort (b, n, s, cmp);
+	  return;
+	}
+      p.t = tmp;
+    }
+
+  p.s = s;
+  p.cmp = cmp;
+  p.var = 4;
+
+  if (s > 32)
+    {
+      /* Indirect sorting.  */
+      char *ip = (char *) b;
+      void **tp = (void **) (p.t + n * sizeof (void *));
+      void **t = tp;
+      void *tmp_storage = (void *) (tp + n);
+
+      while ((void *) t < tmp_storage)
+	{
+	  *t++ = ip;
+	  ip += s;
+	}
+      p.s = sizeof (void *);
+      p.var = 3;
+      msort_with_tmp (&p, p.t + n * sizeof (void *), n);
+
+      /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
+	 the original array.  Knuth vol. 3 (2nd ed.) exercise 5.2-10.  */
+      char *kp;
+      size_t i;
+      for (i = 0, ip = (char *) b; i < n; i++, ip += s)
+	if ((kp = tp[i]) != ip)
+	  {
+	    size_t j = i;
+	    char *jp = ip;
+	    memcpy (tmp_storage, ip, s);
+
+	    do
+	      {
+		size_t k = (kp - (char *) b) / s;
+		tp[j] = jp;
+		memcpy (jp, kp, s);
+		j = k;
+		jp = kp;
+		kp = tp[k];
+	      }
+	    while (kp != ip);
+
+	    tp[j] = jp;
+	    memcpy (jp, tmp_storage, s); 
+	  }
+    }
+  else
+    {
+      if ((s & (sizeof (uint32_t) - 1)) == 0
+	  && ((char *) b - (char *) 0) % __alignof__ (uint32_t) == 0)
+	{
+	  if (s == sizeof (uint32_t))
+	    p.var = 0;
+	  else if (s == sizeof (uint64_t)
+		   && ((char *) b - (char *) 0) % __alignof__ (uint64_t) == 0)
+	    p.var = 1;
+	  else if ((s & (sizeof (unsigned long) - 1)) == 0
+		   && ((char *) b - (char *) 0)
+		      % __alignof__ (unsigned long) == 0)
+	    p.var = 2;
 	}
+      msort_with_tmp (&p, b, n);
     }
+  free (tmp);
 }
 libc_hidden_def (qsort)
diff --git a/stdlib/tst-qsort2.c b/stdlib/tst-qsort2.c
new file mode 100644
index 0000000000..75d4a1732d
--- /dev/null
+++ b/stdlib/tst-qsort2.c
@@ -0,0 +1,89 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+char *array;
+char *array_end;
+size_t member_size;
+
+int
+compare (const void *a1, const void *b1)
+{
+  const char *a = a1;
+  const char *b = b1;
+
+  if (! (array <= a && a < array_end
+	 && array <= b && b < array_end))
+    {
+      puts ("compare arguments not inside of the array");
+      exit (EXIT_FAILURE);
+    }
+  int ret = b[0] - a[0];
+  if (ret)
+    return ret;
+  if (member_size > 1)
+    return b[1] - a[1];
+  return 0;
+}
+
+int
+test (size_t nmemb, size_t size)
+{
+  array = malloc (nmemb * size);
+  if (array == NULL)
+    {
+      printf ("%zd x %zd: no memory", nmemb, size);
+      return 1;
+    }
+
+  array_end = array + nmemb * size;
+  member_size = size;
+
+  char *p;
+  size_t i;
+  size_t bias = random ();
+  for (i = 0, p = array; i < nmemb; i++, p += size)
+    {
+      p[0] = (char) (i + bias);
+      if (size > 1)
+	p[1] = (char) ((i + bias) >> 8);
+    }
+
+  qsort (array, nmemb, size, compare);
+
+  for (i = 0, p = array; i < nmemb - 1; i++, p += size)
+    {
+      if (p[0] < p[size]
+	  || (size > 1 && p[0] == p[size] && p[1] < p[size + 1]))
+	{
+	  printf ("%zd x %zd: failure at offset %zd\n", nmemb,
+		  size, i);
+	  free (array);
+	  return 1;
+	}
+    }
+
+  free (array);
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  int ret = 0;
+  if (argc >= 2)
+    ret |= test (atoi (argv[1]), atoi (argv[2]));
+  else
+    {
+      ret |= test (10000, 1);
+      ret |= test (200000, 2);
+      ret |= test (2000000, 3);
+      ret |= test (2132310, 4);
+      ret |= test (1202730, 7);
+      ret |= test (1184710, 8);
+      ret |= test (272710, 12);
+      ret |= test (14170, 32);
+      ret |= test (4170, 320);
+    }
+
+  return ret;
+}