Add math benchmark latency test

This patch further improves math function benchmarking by adding a latency test in addition to throughput. This enables more accurate comparisons of the math functions. The latency test works by creating a dependency on the previous iteration: func_res = F (func_res * zero + input[i]). The multiply by zero avoids changing the input. It reports reciprocal throughput and latency in nanoseconds (depending on the timing header used) and max/min throughput in iterations per second: "workload-spec2006.wrf": { "reciprocal-throughput": 100, "latency": 200, "max-throughput": 1.0e+07, "min-throughput": 5.0e+06 } * benchtests/bench-skeleton.c (main): Add support for latency benchmarking. * benchtests/scripts/bench.py: Add support for latency benchmarking.
author: Wilco Dijkstra <wdijkstr@arm.com> 2017-08-17 16:27:20 +0100
committer: Wilco Dijkstra <wdijkstr@arm.com> 2017-08-17 16:27:20 +0100
commit: d4505b895f2aa577a6b974ab4bd0a1fd0db60ceb (patch)
tree: 5ae35fc88eda5f2ad45b63f4617f4a87d470d885 /benchtests/bench-skeleton.c
parent: 34d6a3cbf2be45aa039a7eb9f0084a4b710437b8 (diff)
download: glibc-d4505b895f2aa577a6b974ab4bd0a1fd0db60ceb.tar.gz
glibc-d4505b895f2aa577a6b974ab4bd0a1fd0db60ceb.tar.xz
glibc-d4505b895f2aa577a6b974ab4bd0a1fd0db60ceb.zip
1 files changed, 23 insertions, 4 deletions
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
index 3c6dad7055..955b2e1d21 100644
--- a/benchtests/bench-skeleton.c
+++ b/benchtests/bench-skeleton.c
@@ -71,8 +71,10 @@ main (int argc, char **argv)
       bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0;
       double d_total_i = 0;
       timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
+      timing_t throughput = 0, latency = 0;
       int64_t c = 0;
       uint64_t cur;
+      BENCH_VARS;
       while (1)
 	{
 	  if (is_bench)
@@ -86,7 +88,16 @@ main (int argc, char **argv)
 		  BENCH_FUNC (v, i);
 	      TIMING_NOW (end);
 	      TIMING_DIFF (cur, start, end);
-	      TIMING_ACCUM (total, cur);
+	      TIMING_ACCUM (throughput, cur);
+
+	      TIMING_NOW (start);
+	      for (k = 0; k < iters; k++)
+		for (i = 0; i < NUM_SAMPLES (v); i++)
+		  BENCH_FUNC_LAT (v, i);
+	      TIMING_NOW (end);
+	      TIMING_DIFF (cur, start, end);
+	      TIMING_ACCUM (latency, cur);
+
 	      d_total_i += iters * NUM_SAMPLES (v);
 	    }
 	  else
@@ -131,12 +142,20 @@ main (int argc, char **argv)
       /* Begin variant.  */
       json_attr_object_begin (&json_ctx, VARIANT (v));
 
-      json_attr_double (&json_ctx, "duration", d_total_s);
-      json_attr_double (&json_ctx, "iterations", d_total_i);
       if (is_bench)
-	json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i);
+	{
+	  json_attr_double (&json_ctx, "reciprocal-throughput",
+			    throughput / d_total_i);
+	  json_attr_double (&json_ctx, "latency", latency / d_total_i);
+	  json_attr_double (&json_ctx, "max-throughput",
+			    d_total_i / throughput * 1000000000.0);
+	  json_attr_double (&json_ctx, "min-throughput",
+			    d_total_i / latency * 1000000000.0);
+	}
       else
 	{
+	  json_attr_double (&json_ctx, "duration", d_total_s);
+	  json_attr_double (&json_ctx, "iterations", d_total_i);
 	  json_attr_double (&json_ctx, "max", max / d_iters);
 	  json_attr_double (&json_ctx, "min", min / d_iters);
 	  json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
author	Wilco Dijkstra <wdijkstr@arm.com>	2017-08-17 16:27:20 +0100
committer	Wilco Dijkstra <wdijkstr@arm.com>	2017-08-17 16:27:20 +0100
commit	d4505b895f2aa577a6b974ab4bd0a1fd0db60ceb (patch)
tree	5ae35fc88eda5f2ad45b63f4617f4a87d470d885 /benchtests/bench-skeleton.c
parent	34d6a3cbf2be45aa039a7eb9f0084a4b710437b8 (diff)
download	glibc-d4505b895f2aa577a6b974ab4bd0a1fd0db60ceb.tar.gz glibc-d4505b895f2aa577a6b974ab4bd0a1fd0db60ceb.tar.xz glibc-d4505b895f2aa577a6b974ab4bd0a1fd0db60ceb.zip