diff options
Diffstat (limited to 'benchtests/scripts/compare_bench.py')
-rwxr-xr-x | benchtests/scripts/compare_bench.py | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/benchtests/scripts/compare_bench.py b/benchtests/scripts/compare_bench.py new file mode 100755 index 0000000000..be5b5ca9cd --- /dev/null +++ b/benchtests/scripts/compare_bench.py @@ -0,0 +1,184 @@ +#!/usr/bin/python +# Copyright (C) 2015 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. +"""Compare two benchmark results + +Given two benchmark result files and a threshold, this script compares the +benchmark results and flags differences in performance beyond a given +threshold. +""" +import sys +import os +import pylab +import import_bench as bench + +def do_compare(func, var, tl1, tl2, par, threshold): + """Compare one of the aggregate measurements + + Helper function to compare one of the aggregate measurements of a function + variant. + + Args: + func: Function name + var: Function variant name + tl1: The first timings list + tl2: The second timings list + par: The aggregate to measure + threshold: The threshold for differences, beyond which the script should + print a warning. + """ + d = abs(tl2[par] - tl1[par]) * 100 / tl1[str(par)] + if d > threshold: + if tl1[par] > tl2[par]: + ind = '+++' + else: + ind = '---' + print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' % + (ind, func, var, par, d, tl1[par], tl2[par])) + + +def compare_runs(pts1, pts2, threshold): + """Compare two benchmark runs + + Args: + pts1: Timing data from first machine + pts2: Timing data from second machine + """ + + # XXX We assume that the two benchmarks have identical functions and + # variants. We cannot compare two benchmarks that may have different + # functions or variants. Maybe that is something for the future. + for func in pts1['functions'].keys(): + for var in pts1['functions'][func].keys(): + tl1 = pts1['functions'][func][var] + tl2 = pts2['functions'][func][var] + + # Compare the consolidated numbers + # do_compare(func, var, tl1, tl2, 'max', threshold) + do_compare(func, var, tl1, tl2, 'min', threshold) + do_compare(func, var, tl1, tl2, 'mean', threshold) + + # Skip over to the next variant or function if there is no detailed + # timing info for the function variant. + if 'timings' not in pts1['functions'][func][var].keys() or \ + 'timings' not in pts2['functions'][func][var].keys(): + return + + # If two lists do not have the same length then it is likely that + # the performance characteristics of the function have changed. + # XXX: It is also likely that there was some measurement that + # strayed outside the usual range. Such ouiers should not + # happen on an idle machine with identical hardware and + # configuration, but ideal environments are hard to come by. + if len(tl1['timings']) != len(tl2['timings']): + print('* %s(%s): Timing characteristics changed' % + (func, var)) + print('\tBefore: [%s]' % + ', '.join([str(x) for x in tl1['timings']])) + print('\tAfter: [%s]' % + ', '.join([str(x) for x in tl2['timings']])) + continue + + # Collect numbers whose differences cross the threshold we have + # set. + issues = [(x, y) for x, y in zip(tl1['timings'], tl2['timings']) \ + if abs(y - x) * 100 / x > threshold] + + # Now print them. + for t1, t2 in issues: + d = abs(t2 - t1) * 100 / t1 + if t2 > t1: + ind = '-' + else: + ind = '+' + + print("%s %s(%s): (%.2lf%%) from %g to %g" % + (ind, func, var, d, t1, t2)) + + +def plot_graphs(bench1, bench2): + """Plot graphs for functions + + Make scatter plots for the functions and their variants. + + Args: + bench1: Set of points from the first machine + bench2: Set of points from the second machine. + """ + for func in bench1['functions'].keys(): + for var in bench1['functions'][func].keys(): + # No point trying to print a graph if there are no detailed + # timings. + if u'timings' not in bench1['functions'][func][var].keys(): + print('Skipping graph for %s(%s)' % (func, var)) + continue + + pylab.clf() + pylab.ylabel('Time (cycles)') + + # First set of points + length = len(bench1['functions'][func][var]['timings']) + X = [float(x) for x in range(length)] + lines = pylab.scatter(X, bench1['functions'][func][var]['timings'], + 1.5 + 100 / length) + pylab.setp(lines, 'color', 'r') + + # Second set of points + length = len(bench2['functions'][func][var]['timings']) + X = [float(x) for x in range(length)] + lines = pylab.scatter(X, bench2['functions'][func][var]['timings'], + 1.5 + 100 / length) + pylab.setp(lines, 'color', 'g') + + if var: + filename = "%s-%s.png" % (func, var) + else: + filename = "%s.png" % func + print('Writing out %s' % filename) + pylab.savefig(filename) + + +def main(args): + """Program Entry Point + + Take two benchmark output files and compare their timings. + """ + if len(args) > 4 or len(args) < 3: + print('Usage: %s <schema> <file1> <file2> [threshold in %%]' % sys.argv[0]) + sys.exit(os.EX_USAGE) + + bench1 = bench.parse_bench(args[1], args[0]) + bench2 = bench.parse_bench(args[2], args[0]) + if len(args) == 4: + threshold = float(args[3]) + else: + threshold = 10.0 + + if (bench1['timing_type'] != bench2['timing_type']): + print('Cannot compare benchmark outputs: timing types are different') + return + + plot_graphs(bench1, bench2) + + bench.compress_timings(bench1) + bench.compress_timings(bench2) + + compare_runs(bench1, bench2, threshold) + + +if __name__ == '__main__': + main(sys.argv[1:]) |