diff options
author | Siddhesh Poyarekar <siddhesh@sourceware.org> | 2017-10-05 21:49:37 +0530 |
---|---|---|
committer | Siddhesh Poyarekar <siddhesh@sourceware.org> | 2017-10-05 22:20:23 +0530 |
commit | 9ec87fd2b11ffce5c27c8eacb9deaa20e2373ff5 (patch) | |
tree | 5b1ea6ee495759a1935f054af2f62b14d9dc0ecb | |
parent | 1e9522c61c7a544d59db32cb7fbbd42e6793d848 (diff) | |
download | glibc-9ec87fd2b11ffce5c27c8eacb9deaa20e2373ff5.tar.gz glibc-9ec87fd2b11ffce5c27c8eacb9deaa20e2373ff5.tar.xz glibc-9ec87fd2b11ffce5c27c8eacb9deaa20e2373ff5.zip |
benchtests: Memory walking benchmark for memcpy
This benchmark is an attempt to eliminate cache effects from string benchmarks. The benchmark walks both ways through a large memory area and copies different sizes of memory and alignments one at a time instead of looping around in the same memory area. This is a good metric to have alongside the other memcpy benchmarks, especially for larger sizes where the likelihood of the call being done only once is pretty high. * benchtests/bench-memcpy-walk.c: New file. * benchtests/Makefile (string-benchset): Add it.
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | benchtests/Makefile | 3 | ||||
-rw-r--r-- | benchtests/bench-memcpy-walk.c | 127 |
3 files changed, 134 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog index 7ddff74272..a86faeb792 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2017-10-05 Siddhesh Poyarekar <siddhesh@sourceware.org> + + * benchtests/bench-memcpy-walk.c: New file. + * benchtests/Makefile (string-benchset): Add it. + 2017-10-05 Florian Weimer <fweimer@redhat.com> nscd: Eliminate compilation time dependency in the build output. diff --git a/benchtests/Makefile b/benchtests/Makefile index 3acc39cafa..d086cc671f 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -37,7 +37,8 @@ string-benchset := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \ strcat strchr strchrnul strcmp strcpy strcspn strlen \ strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \ strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \ - strcoll memcpy-large memcpy-random memmove-large memset-large + strcoll memcpy-large memcpy-random memmove-large memset-large \ + memcpy-walk # Build and run locale-dependent benchmarks only if we're building natively. ifeq (no,$(cross-compiling)) diff --git a/benchtests/bench-memcpy-walk.c b/benchtests/bench-memcpy-walk.c new file mode 100644 index 0000000000..69d467d3fe --- /dev/null +++ b/benchtests/bench-memcpy-walk.c @@ -0,0 +1,127 @@ +/* Measure memcpy function combined throughput for different alignments. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This microbenchmark measures the throughput of memcpy for various sizes from + 1 byte to 32MiB, doubling every iteration and then misaligning by 0-15 + bytes. The copies are done from source to destination and then back and the + source walks forward across the array and the destination walks backward by + one byte each, thus measuring misaligned accesses as well. The idea is to + avoid caching effects by copying a different string and far enough from each + other, walking in different directions so that we can measure prefetcher + efficiency (software or hardware) more closely than with a loop copying the + same data over and over, which eventually only gives us L1 cache + performance. */ + +#ifndef MEMCPY_RESULT +# define MEMCPY_RESULT(dst, len) dst +# define START_SIZE 1 +# define MIN_PAGE_SIZE (getpagesize () + 32 * 1024 * 1024) +# define TEST_MAIN +# define TEST_NAME "memcpy" +# define TIMEOUT (20 * 60) +# include "bench-string.h" + +IMPL (memcpy, 1) +#endif + +#include "json-lib.h" + +typedef char *(*proto_t) (char *, const char *, size_t); + +static void +do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src, + size_t len) +{ + size_t i, iters = MIN_PAGE_SIZE / len; + timing_t start, stop, cur; + + char *dst_end = dst + MIN_PAGE_SIZE - len; + char *src_end = src + MIN_PAGE_SIZE - len; + + TIMING_NOW (start); + /* Copy the entire buffer back and forth, LEN at a time. */ + for (i = 0; i < iters && dst_end >= dst && src <= src_end; src++, dst_end--) + { + CALL (impl, dst_end, src, len); + CALL (impl, src, dst_end, len); + i += 2; + } + TIMING_NOW (stop); + + TIMING_DIFF (cur, start, stop); + + /* Get time taken per function call. */ + json_element_double (json_ctx, (double) cur * len / i); +} + +static void +do_test (json_ctx_t *json_ctx, size_t len) +{ + json_element_object_begin (json_ctx); + json_attr_uint (json_ctx, "length", (double) len); + json_array_begin (json_ctx, "timings"); + + FOR_EACH_IMPL (impl, 0) + do_one_test (json_ctx, impl, (char *) buf2, (char *) buf1, len); + + json_array_end (json_ctx); + json_element_object_end (json_ctx); +} + +int +test_main (void) +{ + json_ctx_t json_ctx; + size_t i; + + test_init (); + + json_init (&json_ctx, 0, stdout); + + json_document_begin (&json_ctx); + json_attr_string (&json_ctx, "timing_type", TIMING_TYPE); + + json_attr_object_begin (&json_ctx, "functions"); + json_attr_object_begin (&json_ctx, "memcpy"); + json_attr_string (&json_ctx, "bench-variant", "walk"); + + json_array_begin (&json_ctx, "ifuncs"); + FOR_EACH_IMPL (impl, 0) + json_element_string (&json_ctx, impl->name); + json_array_end (&json_ctx); + + json_array_begin (&json_ctx, "results"); + for (i = START_SIZE; i <= MIN_PAGE_SIZE; i <<= 1) + { + /* Test length alignments from 0-16 bytes. */ + for (int j = 0; j < 8; j++) + { + do_test (&json_ctx, i + j); + do_test (&json_ctx, i + 16 - j); + } + } + + json_array_end (&json_ctx); + json_attr_object_end (&json_ctx); + json_attr_object_end (&json_ctx); + json_document_end (&json_ctx); + + return ret; +} + +#include <support/test-driver.c> |