diff options
author | Wilco Dijkstra <Wilco.Dijkstra@arm.com> | 2019-01-29 17:43:45 +0000 |
---|---|---|
committer | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2019-03-22 17:30:44 -0300 |
commit | 7621e38bf3c58b2d0359545f1f2898017fd89d05 (patch) | |
tree | fbf712328d2d62a509d7a048e2fb22a5bf0f4b55 | |
parent | 1e372ded4f83362509c8672ff501cba871bb1edc (diff) | |
download | glibc-7621e38bf3c58b2d0359545f1f2898017fd89d05.tar.gz glibc-7621e38bf3c58b2d0359545f1f2898017fd89d05.tar.xz glibc-7621e38bf3c58b2d0359545f1f2898017fd89d05.zip |
Add generic hp-timing support
Add missing generic hp_timing support. It uses clock_gettime (CLOCK_MONOTONIC) which has unspecified starting time, nano-second accuracy, and should faster on architectures that implementes the symbol as vDSO. Checked on aarch64-linux-gnu, x86_64-linux-gnu, and i686-linux-gnu. I also checked the builds for all afected ABIs. * benchtests/Makefile (USE_CLOCK_GETTIME) Remove. * benchtests/README: Update description. * benchtests/bench-timing.h: Default to hp-timing. * sysdeps/generic/hp-timing.h (HP_TIMING_DIFF, HP_TIMING_ACCUM_NT, HP_TIMING_PRINT): Remove. (HP_TIMING_NOW): Add generic implementation. (hp_timing_t): Change to uint64_t.
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | benchtests/Makefile | 6 | ||||
-rw-r--r-- | benchtests/README | 7 | ||||
-rw-r--r-- | benchtests/bench-timing.h | 44 | ||||
-rw-r--r-- | sysdeps/generic/hp-timing.h | 25 |
5 files changed, 33 insertions, 57 deletions
diff --git a/ChangeLog b/ChangeLog index 316cbdf65f..45917ec56a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,13 @@ 2019-03-22 Adhemerval Zanella <adhemerval.zanella@linaro.org> + * benchtests/Makefile (USE_CLOCK_GETTIME) Remove. + * benchtests/README: Update description. + * benchtests/bench-timing.h: Default to hp-timing. + * sysdeps/generic/hp-timing.h (HP_TIMING_DIFF, HP_TIMING_ACCUM_NT, + HP_TIMING_PRINT): Remove. + (HP_TIMING_NOW): Add generic implementation. + (hp_timing_t): Change to uint64_t. + * benchtests/bench-timing.h: Replace HP_TIMING_AVAIL with HP_TIMING_INLINE. * nptl/descr.h: Likewise. diff --git a/benchtests/Makefile b/benchtests/Makefile index d00993eca4..cdc89488d6 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -127,17 +127,11 @@ endif CPPFLAGS-nonlib += -DDURATION=$(BENCH_DURATION) -D_ISOMAC -# Use clock_gettime to measure performance of functions. The default is to use -# HP_TIMING if it is available. -ifdef USE_CLOCK_GETTIME -CPPFLAGS-nonlib += -DUSE_CLOCK_GETTIME -else # On x86 processors, use RDTSCP, instead of RDTSC, to measure performance # of functions. All x86 processors since 2010 support RDTSCP instruction. ifdef USE_RDTSCP CPPFLAGS-nonlib += -DUSE_RDTSCP endif -endif DETAILED_OPT := diff --git a/benchtests/README b/benchtests/README index aaf0b659e2..c4f03fd872 100644 --- a/benchtests/README +++ b/benchtests/README @@ -27,12 +27,7 @@ BENCH_DURATION. The benchmark suite does function call measurements using architecture-specific high precision timing instructions whenever available. When such support is -not available, it uses clock_gettime (CLOCK_PROCESS_CPUTIME_ID). One can force -the benchmark to use clock_gettime by invoking make as follows: - - $ make USE_CLOCK_GETTIME=1 bench - -Again, one must run `make bench-clean' before changing the measurement method. +not available, it uses clock_gettime (CLOCK_MONOTONIC). On x86 processors, RDTSCP instruction provides more precise timing data than RDTSC instruction. All x86 processors since 2010 support RDTSCP diff --git a/benchtests/bench-timing.h b/benchtests/bench-timing.h index 93fe379f99..e213dec3fd 100644 --- a/benchtests/bench-timing.h +++ b/benchtests/bench-timing.h @@ -18,49 +18,21 @@ #undef attribute_hidden #define attribute_hidden +#define __clock_gettime clock_gettime #include <hp-timing.h> #include <stdint.h> -#if HP_TIMING_INLINE && !defined USE_CLOCK_GETTIME -# define GL(x) _##x -# define GLRO(x) _##x +#define GL(x) _##x +#define GLRO(x) _##x typedef hp_timing_t timing_t; -# define TIMING_TYPE "hp_timing" +#define TIMING_TYPE "hp_timing" -# define TIMING_INIT(res) ({ (res) = 1; }) +#define TIMING_INIT(res) ({ (res) = 1; }) -# define TIMING_NOW(var) HP_TIMING_NOW (var) -# define TIMING_DIFF(diff, start, end) HP_TIMING_DIFF ((diff), (start), (end)) -# define TIMING_ACCUM(sum, diff) HP_TIMING_ACCUM_NT ((sum), (diff)) - -#else - -#include <time.h> -typedef uint64_t timing_t; - -# define TIMING_TYPE "clock_gettime" - -/* Measure the resolution of the clock so we can scale the number of - benchmark iterations by this value. */ -# define TIMING_INIT(res) \ -({ \ - struct timespec start; \ - clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start); \ - (res) = start.tv_nsec; \ -}) - -# define TIMING_NOW(var) \ -({ \ - struct timespec tv; \ - clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &tv); \ - (var) = (uint64_t) (tv.tv_nsec + (uint64_t) 1000000000 * tv.tv_sec); \ -}) - -# define TIMING_DIFF(diff, start, end) (diff) = (end) - (start) -# define TIMING_ACCUM(sum, diff) (sum) += (diff) - -#endif +#define TIMING_NOW(var) HP_TIMING_NOW (var) +#define TIMING_DIFF(diff, start, end) HP_TIMING_DIFF ((diff), (start), (end)) +#define TIMING_ACCUM(sum, diff) HP_TIMING_ACCUM_NT ((sum), (diff)) #define TIMING_PRINT_MEAN(d_total_s, d_iters) \ printf ("\t%g", (d_total_s) / (d_iters)) diff --git a/sysdeps/generic/hp-timing.h b/sysdeps/generic/hp-timing.h index 278998d2c2..2528279558 100644 --- a/sysdeps/generic/hp-timing.h +++ b/sysdeps/generic/hp-timing.h @@ -20,16 +20,23 @@ #ifndef _HP_TIMING_H #define _HP_TIMING_H 1 -/* There are no generic definitions for the times. We could write something - using the `gettimeofday' system call where available but the overhead of - the system call might be too high. */ +#include <time.h> +#include <stdint.h> +#include <hp-timing-common.h> -/* Provide dummy definitions. */ +/* It should not be used for ld.so. */ #define HP_TIMING_INLINE (0) -typedef int hp_timing_t; -#define HP_TIMING_NOW(var) -#define HP_TIMING_DIFF(Diff, Start, End) -#define HP_TIMING_ACCUM_NT(Sum, Diff) -#define HP_TIMING_PRINT(Buf, Len, Val) + +typedef uint64_t hp_timing_t; + +/* The clock_gettime (CLOCK_MONOTONIC) has unspecified starting time, + nano-second accuracy, and for some architectues is implemented as + vDSO symbol. */ +#define HP_TIMING_NOW(var) \ +({ \ + struct timespec tv; \ + __clock_gettime (CLOCK_MONOTONIC, &tv); \ + (var) = (tv.tv_nsec + UINT64_C(1000000000) * tv.tv_sec); \ +}) #endif /* hp-timing.h */ |