diff options
Diffstat (limited to 'benchtests')
-rw-r--r-- | benchtests/Makefile | 28 | ||||
-rw-r--r-- | benchtests/bench-modf.c | 1 | ||||
-rw-r--r-- | benchtests/bench-skeleton.c | 38 |
3 files changed, 38 insertions, 29 deletions
diff --git a/benchtests/Makefile b/benchtests/Makefile index 387fde441d..9d25d6933a 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -23,12 +23,6 @@ # - Append the function name to the bench variable -# - Define foo-ITER with the number of iterations you want to run. Keep it -# high enough that the overhead of clock_gettime is only a small fraction of -# the total run time of the test. A good idea would be to keep the run time -# of each test at around 10 seconds for x86_64. That is just a guideline, -# since some scenarios may require higher run times. - # - Define foo-ARGLIST as a colon separated list of types of the input # arguments. Use `void` if function does not take any inputs. Put in quotes # if the input argument is a pointer, e.g.: @@ -49,83 +43,70 @@ bench := exp pow rint sin cos tan atan modf \ slowexp slowpow slowsin slowcos slowtan slowatan # exp function fast path: sysdeps/ieee754/dbl-64/e_exp.c -exp-ITER = 5e8 exp-ARGLIST = double exp-RET = double LDFLAGS-bench-exp = -lm # pow function fast path: sysdeps/ieee754/dbl-64/e_pow.c -pow-ITER = 2e8 pow-ARGLIST = double:double pow-RET = double LDFLAGS-bench-pow = -lm -rint-ITER = 250000000 rint-ARGLIST = double rint-RET = double LDFLAGS-bench-rint = -lm # exp function slowest path: sysdeps/ieee754/dbl-64/mpexp.c -slowexp-ITER = 3e5 slowexp-ARGLIST = double slowexp-RET = double slowexp-INCLUDE = slowexp.c LDFLAGS-bench-slowexp = -lm # sin function fast path: sysdeps/ieee754/dbl-64/s_sin.c -sin-ITER = 3e9 sin-ARGLIST = double sin-RET = double LDFLAGS-bench-sin = -lm # cos function fast path: sysdeps/ieee754/dbl-64/s_sin.c -cos-ITER = 3e9 cos-ARGLIST = double cos-RET = double LDFLAGS-bench-cos = -lm # tan function fast path: sysdeps/ieee754/dbl-64/s_tan.c -tan-ITER = 3e9 tan-ARGLIST = double tan-RET = double LDFLAGS-bench-tan = -lm # atan function fast path: sysdeps/ieee754/dbl-64/s_atan.c -atan-ITER = 6e9 atan-ARGLIST = double atan-RET = double LDFLAGS-bench-atan = -lm # pow function slowest path: sysdeps/ieee754/dbl-64/slowpow.c -slowpow-ITER = 1e5 slowpow-ARGLIST = double:double slowpow-RET = double slowpow-INCLUDE = slowpow.c LDFLAGS-bench-slowpow = -lm # sin function slowest path: sysdeps/ieee754/dbl-64/sincos32.c -slowsin-ITER = 3e7 slowsin-ARGLIST = double slowsin-RET = double slowsin-INCLUDE = slowsin.c LDFLAGS-bench-slowsin = -lm # cos function slowest path: sysdeps/ieee754/dbl-64/sincos32.c -slowcos-ITER = 3e7 slowcos-ARGLIST = double slowcos-RET = double slowcos-INCLUDE = slowcos.c LDFLAGS-bench-slowcos = -lm # tan function slowest path: sysdeps/ieee754/dbl-64/mptan.c -slowtan-ITER = 3e7 slowtan-ARGLIST = double slowtan-RET = double slowtan-INCLUDE = slowtan.c LDFLAGS-bench-slowtan = -lm # atan function slowest path: sysdeps/ieee754/dbl-64/mpatan.c -slowatan-ITER = 3e8 slowatan-ARGLIST = double slowatan-RET = double slowatan-INCLUDE = slowatan.c @@ -141,6 +122,13 @@ include ../Rules binaries-bench := $(addprefix $(objpfx)bench-,$(bench)) +# The default duration: 10 seconds. +ifndef BENCH_DURATION +BENCH_DURATION := 10 +endif + +CPPFLAGS-nonlib = -DDURATION=$(BENCH_DURATION) + # This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed # for all these modules. cpp-srcs-left := $(binaries-bench:=.c) @@ -176,5 +164,5 @@ $(objpfx)bench-%.c: %-inputs $(bench-deps) cat $($*-INCLUDE); \ fi; \ $(..)scripts/bench.pl $(patsubst %-inputs,%,$<) \ - $($*-ITER) $($*-ARGLIST) $($*-RET); } > $@-tmp + $($*-ARGLIST) $($*-RET); } > $@-tmp mv -f $@-tmp $@ diff --git a/benchtests/bench-modf.c b/benchtests/bench-modf.c index 975a29f199..90a5255291 100644 --- a/benchtests/bench-modf.c +++ b/benchtests/bench-modf.c @@ -33,7 +33,6 @@ struct args static volatile double ret = 0.0; #define BENCH_FUNC(j) ({double iptr; ret = CALL_BENCH_FUNC (j, iptr);}) -#define ITER 250000000 #define FUNCNAME "modf" #include "bench-skeleton.c" diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c index 13f986d817..bbd151b0ec 100644 --- a/benchtests/bench-skeleton.c +++ b/benchtests/bench-skeleton.c @@ -22,13 +22,18 @@ #include <time.h> #include <inttypes.h> +#define TIMESPEC_AFTER(a, b) \ + (((a).tv_sec == (b).tv_sec) ? \ + ((a).tv_nsec > (b).tv_nsec) : \ + ((a).tv_sec > (b).tv_sec)) int main (int argc, char **argv) { - unsigned long i, j, k; + unsigned long i, k; uint64_t total = 0, max = 0, min = 0x7fffffffffffffff; - struct timespec start, end; + struct timespec start, end, runtime; + memset (&runtime, 0, sizeof (runtime)); memset (&start, 0, sizeof (start)); memset (&end, 0, sizeof (end)); @@ -39,11 +44,15 @@ main (int argc, char **argv) Measurements close to the minimum clock resolution won't make much sense, but it's better than having nothing at all. */ unsigned long iters = 1000 * start.tv_nsec; - unsigned long total_iters = ITER / iters; - for (i = 0; i < NUM_SAMPLES; i++) + /* Run for approxmately DURATION seconds. */ + clock_gettime (CLOCK_MONOTONIC_RAW, &runtime); + runtime.tv_sec += DURATION; + + double d_total_i = 0; + while (1) { - for (j = 0; j < total_iters; j ++) + for (i = 0; i < NUM_SAMPLES; i++) { clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start); for (k = 0; k < iters; k++) @@ -61,12 +70,25 @@ main (int argc, char **argv) min = cur; total += cur; + + d_total_i += iters; } + + struct timespec curtime; + + memset (&curtime, 0, sizeof (curtime)); + clock_gettime (CLOCK_MONOTONIC_RAW, &curtime); + if (TIMESPEC_AFTER (curtime, runtime)) + goto done; } - double d_total_s = total * 1e-9; - double d_iters = iters; - double d_total_i = (double)ITER * NUM_SAMPLES; + double d_total_s; + double d_iters; + + done: + d_total_s = total * 1e-9; + d_iters = iters; + printf (FUNCNAME ": ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n", d_total_i, d_total_s, max / d_iters, min / d_iters, d_total_i / d_total_s); |