about summary refs log tree commit diff
diff options
context:
space:
mode:
authorSiddhesh Poyarekar <siddhesh@redhat.com>2013-04-16 17:37:24 +0530
committerSiddhesh Poyarekar <siddhesh@redhat.com>2013-06-11 15:08:13 +0530
commitc1f75dc386d533806d29b7e94118363a7b50eed8 (patch)
tree38d917f7a99bf569704a09e56a0dd5b7b4475be5
parent50fd745b4dec07e8e213cf2703b5cabcfa128225 (diff)
downloadglibc-c1f75dc386d533806d29b7e94118363a7b50eed8.tar.gz
glibc-c1f75dc386d533806d29b7e94118363a7b50eed8.tar.xz
glibc-c1f75dc386d533806d29b7e94118363a7b50eed8.zip
Begin porting string performance tests to benchtests
This is the initial support for string function performance tests,
along with copying tests for memcpy and memcpy-ifunc as proof of
concept.  The string function benchmarks perform operations at
different alignments and for different sizes and compare performance
between plain operations and the optimized string operations.  Due to
this their output is incompatible with the function benchmarks where
we're interested in fastest time, throughput, etc.

In future, the correctness checks in the benchmark tests can be
removed.  Same goes for the performance measurements in the
string/test-*.
-rw-r--r--ChangeLog13
-rw-r--r--benchtests/Makefile27
-rw-r--r--benchtests/README15
-rw-r--r--benchtests/bench-memcpy-ifunc.c20
-rw-r--r--benchtests/bench-memcpy.c163
-rw-r--r--benchtests/bench-string.h212
6 files changed, 447 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 0afbce712c..737c746aa0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2013-06-11  Siddhesh Poyarekar  <siddhesh@redhat.com>
+
+	* benchtests/Makefile: Disable parallel execution of targets.
+	(string-bench): Add memcpy.
+	(benchset): New variable to store a list of benchmark sets.
+	(bench-func): Renamed from bench.
+	(bench-set): New target.
+	(bench): Depend on bench-func and bench-set.
+	* benchtests/README: Add section on benchmark sets.
+	* benchtests/bench-memcpy-ifunc.c: New file.
+	* benchtests/bench-memcpy.c: New file.
+	* benchtests/bench-string.h: New file.
+
 2013-06-11  Andreas Schwab  <schwab@suse.de>
 
 	[BZ #15577]
diff --git a/benchtests/Makefile b/benchtests/Makefile
index 680440f429..27d83f45a7 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -23,6 +23,13 @@ subdir := benchtests
 bench := acos acosh asin asinh atan atanh cos cosh exp log modf pow rint sin \
 	 sinh tan tanh
 
+# String function benchmarks.
+string-bench := memcpy
+string-bench-ifunc := $(addsuffix -ifunc, $(string-bench))
+string-bench-all := $(string-bench) $(string-bench-ifunc)
+
+benchset := $(string-bench-all)
+
 acos-ARGLIST = double
 acos-RET = double
 LDFLAGS-bench-acos = -lm
@@ -92,10 +99,15 @@ LDFLAGS-bench-tanh = -lm
 # Rules to build and execute the benchmarks.  Do not put any benchmark
 # parameters beyond this point.
 
+# We don't want the benchmark programs to run in parallel since that could
+# affect their performance.
+.NOTPARALLEL:
+
 include ../Makeconfig
 include ../Rules
 
 binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
+binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset))
 
 # The default duration: 10 seconds.
 ifndef BENCH_DURATION
@@ -112,7 +124,7 @@ endif
 
 # This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed
 # for all these modules.
-cpp-srcs-left := $(binaries-bench:=.c)
+cpp-srcs-left := $(binaries-benchset:=.c) $(binaries-bench:=.c)
 lib := nonlib
 include $(patsubst %,$(..)cppflags-iterator.mk,$(cpp-srcs-left))
 
@@ -124,8 +136,17 @@ run-bench = $(test-wrapper-env) \
 
 bench-clean:
 	rm -f $(binaries-bench) $(addsuffix .o,$(binaries-bench))
+	rm -f $(binaries-benchset) $(addsuffix .o,$(binaries-benchset))
+
+bench: bench-set bench-func
+
+bench-set: $(binaries-benchset)
+	for run in $^; do \
+	  echo "Running $${run}"; \
+	  $(run-bench) > $${run}.out; \
+	done
 
-bench: $(binaries-bench)
+bench-func: $(binaries-bench)
 	{ for run in $^; do \
 	  echo "Running $${run}" >&2; \
 	  $(run-bench); \
@@ -135,7 +156,7 @@ bench: $(binaries-bench)
 	fi; \
 	mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out
 
-$(binaries-bench): %: %.o \
+$(binaries-bench) $(binaries-benchset): %: %.o \
   $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
   $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
 	$(+link)
diff --git a/benchtests/README b/benchtests/README
index 8135069fea..045b7a673d 100644
--- a/benchtests/README
+++ b/benchtests/README
@@ -72,3 +72,18 @@ the same file by using the `name' directive that looks something like this:
 
 See the pow-inputs file for an example of what such a partitioned input file
 would look like.
+
+Benchmark Sets:
+==============
+
+In addition to standard benchmarking of functions, one may also generate
+custom outputs for a set of functions.  This is currently used by string
+function benchmarks where the aim is to compare performance between
+implementations at various alignments and for various sizes.
+
+To add a benchset for `foo':
+
+- Add `foo' to the benchset variable.
+- Write your bench-foo.c that prints out the measurements to stdout.
+- On execution, a bench-foo.out is created in $(objpfx) with the contents of
+  stdout.
diff --git a/benchtests/bench-memcpy-ifunc.c b/benchtests/bench-memcpy-ifunc.c
new file mode 100644
index 0000000000..b5a89f723d
--- /dev/null
+++ b/benchtests/bench-memcpy-ifunc.c
@@ -0,0 +1,20 @@
+/* Measure IFUNC implementations of memcpy function.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define TEST_IFUNC 1
+#include "bench-memcpy.c"
diff --git a/benchtests/bench-memcpy.c b/benchtests/bench-memcpy.c
new file mode 100644
index 0000000000..1b126711f8
--- /dev/null
+++ b/benchtests/bench-memcpy.c
@@ -0,0 +1,163 @@
+/* Measure memcpy functions.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef MEMCPY_RESULT
+# define MEMCPY_RESULT(dst, len) dst
+# define MIN_PAGE_SIZE 131072
+# define TEST_MAIN
+# define TEST_NAME "memcpy"
+# include "bench-string.h"
+
+char *simple_memcpy (char *, const char *, size_t);
+char *builtin_memcpy (char *, const char *, size_t);
+
+IMPL (simple_memcpy, 0)
+IMPL (builtin_memcpy, 0)
+IMPL (memcpy, 1)
+
+char *
+simple_memcpy (char *dst, const char *src, size_t n)
+{
+  char *ret = dst;
+  while (n--)
+    *dst++ = *src++;
+  return ret;
+}
+
+char *
+builtin_memcpy (char *dst, const char *src, size_t n)
+{
+  return __builtin_memcpy (dst, src, n);
+}
+#endif
+
+typedef char *(*proto_t) (char *, const char *, size_t);
+
+static void
+do_one_test (impl_t *impl, char *dst, const char *src,
+	     size_t len)
+{
+  if (CALL (impl, dst, src, len) != MEMCPY_RESULT (dst, len))
+    {
+      error (0, 0, "Wrong result in function %s %p %p", impl->name,
+	     CALL (impl, dst, src, len), MEMCPY_RESULT (dst, len));
+      ret = 1;
+      return;
+    }
+
+  if (memcmp (dst, src, len) != 0)
+    {
+      error (0, 0, "Wrong result in function %s dst \"%s\" src \"%s\"",
+	     impl->name, dst, src);
+      ret = 1;
+      return;
+    }
+
+  if (HP_TIMING_AVAIL)
+    {
+      hp_timing_t start __attribute ((unused));
+      hp_timing_t stop __attribute ((unused));
+      hp_timing_t best_time = ~ (hp_timing_t) 0;
+      size_t i;
+
+      for (i = 0; i < 32; ++i)
+	{
+	  HP_TIMING_NOW (start);
+	  CALL (impl, dst, src, len);
+	  HP_TIMING_NOW (stop);
+	  HP_TIMING_BEST (best_time, start, stop);
+	}
+
+      printf ("\t%zd", (size_t) best_time);
+    }
+}
+
+static void
+do_test (size_t align1, size_t align2, size_t len)
+{
+  size_t i, j;
+  char *s1, *s2;
+
+  align1 &= 63;
+  if (align1 + len >= page_size)
+    return;
+
+  align2 &= 63;
+  if (align2 + len >= page_size)
+    return;
+
+  s1 = (char *) (buf1 + align1);
+  s2 = (char *) (buf2 + align2);
+
+  for (i = 0, j = 1; i < len; i++, j += 23)
+    s1[i] = j;
+
+  if (HP_TIMING_AVAIL)
+    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+
+  FOR_EACH_IMPL (impl, 0)
+    do_one_test (impl, s2, s1, len);
+
+  if (HP_TIMING_AVAIL)
+    putchar ('\n');
+}
+
+int
+test_main (void)
+{
+  size_t i;
+
+  test_init ();
+
+  printf ("%23s", "");
+  FOR_EACH_IMPL (impl, 0)
+    printf ("\t%s", impl->name);
+  putchar ('\n');
+
+  for (i = 0; i < 18; ++i)
+    {
+      do_test (0, 0, 1 << i);
+      do_test (i, 0, 1 << i);
+      do_test (0, i, 1 << i);
+      do_test (i, i, 1 << i);
+    }
+
+  for (i = 0; i < 32; ++i)
+    {
+      do_test (0, 0, i);
+      do_test (i, 0, i);
+      do_test (0, i, i);
+      do_test (i, i, i);
+    }
+
+  for (i = 3; i < 32; ++i)
+    {
+      if ((i & (i - 1)) == 0)
+	continue;
+      do_test (0, 0, 16 * i);
+      do_test (i, 0, 16 * i);
+      do_test (0, i, 16 * i);
+      do_test (i, i, 16 * i);
+    }
+
+  do_test (0, 0, getpagesize ());
+
+  return ret;
+}
+
+#include "../test-skeleton.c"
diff --git a/benchtests/bench-string.h b/benchtests/bench-string.h
new file mode 100644
index 0000000000..2fe8d9fae4
--- /dev/null
+++ b/benchtests/bench-string.h
@@ -0,0 +1,212 @@
+/* Measure string and memory functions.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sys/cdefs.h>
+
+typedef struct
+{
+  const char *name;
+  void (*fn) (void);
+  long test;
+} impl_t;
+extern impl_t __start_impls[], __stop_impls[];
+
+#define IMPL(name, test) \
+  impl_t tst_ ## name							\
+  __attribute__ ((section ("impls"), aligned (sizeof (void *))))	\
+       = { __STRING (name), (void (*) (void))name, test };
+
+#ifdef TEST_MAIN
+
+# ifndef _GNU_SOURCE
+#  define _GNU_SOURCE
+# endif
+
+# undef __USE_STRING_INLINES
+
+# include <stdio.h>
+# include <stdlib.h>
+# include <string.h>
+# include <sys/mman.h>
+# include <sys/param.h>
+# include <unistd.h>
+# include <fcntl.h>
+# include <error.h>
+# include <errno.h>
+# include <time.h>
+# include <ifunc-impl-list.h>
+# define GL(x) _##x
+# define GLRO(x) _##x
+# include <hp-timing.h>
+
+
+# define TEST_FUNCTION test_main ()
+# define TIMEOUT (4 * 60)
+# define OPT_ITERATIONS 10000
+# define OPT_RANDOM 10001
+# define OPT_SEED 10002
+
+unsigned char *buf1, *buf2;
+int ret, do_srandom;
+unsigned int seed;
+size_t page_size;
+
+hp_timing_t _dl_hp_timing_overhead;
+
+# ifndef ITERATIONS
+size_t iterations = 100000;
+#  define ITERATIONS_OPTIONS \
+     { "iterations", required_argument, NULL, OPT_ITERATIONS },
+#  define ITERATIONS_PROCESS \
+     case OPT_ITERATIONS:						      \
+       iterations = strtoul (optarg, NULL, 0);				      \
+       break;
+#  define ITERATIONS iterations
+# else
+#  define ITERATIONS_OPTIONS
+#  define ITERATIONS_PROCESS
+# endif
+
+# define CMDLINE_OPTIONS ITERATIONS_OPTIONS \
+    { "random", no_argument, NULL, OPT_RANDOM },			      \
+    { "seed", required_argument, NULL, OPT_SEED },
+# define CMDLINE_PROCESS ITERATIONS_PROCESS \
+    case OPT_RANDOM:							      \
+      {									      \
+	int fdr = open ("/dev/urandom", O_RDONLY);			      \
+									      \
+	if (fdr < 0 || read (fdr, &seed, sizeof(seed)) != sizeof (seed))      \
+	  seed = time (NULL);						      \
+	if (fdr >= 0)							      \
+	  close (fdr);							      \
+	do_srandom = 1;							      \
+	break;								      \
+      }									      \
+									      \
+    case OPT_SEED:							      \
+      seed = strtoul (optarg, NULL, 0);					      \
+      do_srandom = 1;							      \
+      break;
+
+# define CALL(impl, ...)	\
+    (* (proto_t) (impl)->fn) (__VA_ARGS__)
+
+# if defined TEST_IFUNC && defined TEST_NAME
+/* Increase size of FUNC_LIST if assert is triggered at run-time.  */
+static struct libc_ifunc_impl func_list[32];
+static int func_count;
+static int impl_count = -1;
+static impl_t *impl_array;
+
+#  define FOR_EACH_IMPL(impl, notall) \
+     impl_t *impl;							      \
+     int count;								      \
+     if (impl_count == -1)						      \
+       {								      \
+	 impl_count = 0;						      \
+	 if (func_count != 0)						      \
+	   {								      \
+	     int f;							      \
+	     impl_t *skip = NULL, *a;					      \
+	     for (impl = __start_impls; impl < __stop_impls; ++impl)	      \
+	       if (strcmp (impl->name, TEST_NAME) == 0)			      \
+		 skip = impl;						      \
+	       else							      \
+		 impl_count++;						      \
+	     a = impl_array = malloc ((impl_count + func_count) *	      \
+				   sizeof (impl_t));			      \
+	     for (impl = __start_impls; impl < __stop_impls; ++impl)	      \
+	       if (impl != skip)					      \
+		 *a++ = *impl;						      \
+	     for (f = 0; f < func_count; f++)				      \
+	       if (func_list[f].usable)					      \
+		 {							      \
+		   a->name = func_list[f].name;				      \
+		   a->fn = func_list[f].fn;				      \
+		   a->test = 1;						      \
+		   a++;							      \
+		 }							      \
+	     impl_count = a - impl_array;				      \
+	   }								      \
+	 else								      \
+	   {								      \
+	     impl_count = __stop_impls - __start_impls;			      \
+	     impl_array = __start_impls;				      \
+	   }								      \
+       }								      \
+     impl = impl_array;							      \
+     for (count = 0; count < impl_count; ++count, ++impl)		      \
+       if (!notall || impl->test)
+# else /* ! (defined TEST_IFUNC && defined TEST_NAME) */
+#  define FOR_EACH_IMPL(impl, notall) \
+     for (impl_t *impl = __start_impls; impl < __stop_impls; ++impl)	      \
+       if (!notall || impl->test)
+# endif /* ! (defined TEST_IFUNC && defined TEST_NAME) */
+
+# define HP_TIMING_BEST(best_time, start, end)	\
+    do									      \
+      {									      \
+	hp_timing_t tmptime;						      \
+	HP_TIMING_DIFF (tmptime, start + _dl_hp_timing_overhead, end);	      \
+	if (best_time > tmptime)					      \
+	  best_time = tmptime;						      \
+      }									      \
+    while (0)
+
+# ifndef BUF1PAGES
+#  define BUF1PAGES 1
+# endif
+
+static void
+test_init (void)
+{
+# if defined TEST_IFUNC && defined TEST_NAME
+  func_count = __libc_ifunc_impl_list (TEST_NAME, func_list,
+				       (sizeof func_list
+					/ sizeof func_list[0]));
+# endif
+
+  page_size = 2 * getpagesize ();
+# ifdef MIN_PAGE_SIZE
+  if (page_size < MIN_PAGE_SIZE)
+    page_size = MIN_PAGE_SIZE;
+# endif
+  buf1 = mmap (0, (BUF1PAGES + 1) * page_size, PROT_READ | PROT_WRITE,
+	       MAP_PRIVATE | MAP_ANON, -1, 0);
+  if (buf1 == MAP_FAILED)
+    error (EXIT_FAILURE, errno, "mmap failed");
+  if (mprotect (buf1 + BUF1PAGES * page_size, page_size, PROT_NONE))
+    error (EXIT_FAILURE, errno, "mprotect failed");
+  buf2 = mmap (0, 2 * page_size, PROT_READ | PROT_WRITE,
+	       MAP_PRIVATE | MAP_ANON, -1, 0);
+  if (buf2 == MAP_FAILED)
+    error (EXIT_FAILURE, errno, "mmap failed");
+  if (mprotect (buf2 + page_size, page_size, PROT_NONE))
+    error (EXIT_FAILURE, errno, "mprotect failed");
+  HP_TIMING_DIFF_INIT ();
+  if (do_srandom)
+    {
+      printf ("Setting seed to 0x%x\n", seed);
+      srandom (seed);
+    }
+
+  memset (buf1, 0xa5, BUF1PAGES * page_size);
+  memset (buf2, 0x5a, page_size);
+}
+
+#endif /* TEST_MAIN */