diff options
Diffstat (limited to 'benchtests/scripts/import_bench.py')
-rw-r--r-- | benchtests/scripts/import_bench.py | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/benchtests/scripts/import_bench.py b/benchtests/scripts/import_bench.py index 81248c2adf..d37ff62383 100644 --- a/benchtests/scripts/import_bench.py +++ b/benchtests/scripts/import_bench.py @@ -25,6 +25,102 @@ except ImportError: raise +def mean(lst): + """Compute and return mean of numbers in a list + + The numpy average function has horrible performance, so implement our + own mean function. + + Args: + lst: The list of numbers to average. + Return: + The mean of members in the list. + """ + return sum(lst) / len(lst) + + +def split_list(bench, func, var): + """ Split the list into a smaller set of more distinct points + + Group together points such that the difference between the smallest + point and the mean is less than 1/3rd of the mean. This means that + the mean is at most 1.5x the smallest member of that group. + + mean - xmin < mean / 3 + i.e. 2 * mean / 3 < xmin + i.e. mean < 3 * xmin / 2 + + For an evenly distributed group, the largest member will be less than + twice the smallest member of the group. + Derivation: + + An evenly distributed series would be xmin, xmin + d, xmin + 2d... + + mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n + and max element is xmin + (n - 1) * d + + Now, mean < 3 * xmin / 2 + + 3 * xmin > 2 * mean + 3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n + 3 * n * xmin > 2 * n * xmin + n * (n - 1) * d + n * xmin > n * (n - 1) * d + xmin > (n - 1) * d + 2 * xmin > xmin + (n-1) * d + 2 * xmin > xmax + + Hence, proved. + + Similarly, it is trivial to prove that for a similar aggregation by using + the maximum element, the maximum element in the group must be at most 4/3 + times the mean. + + Args: + bench: The benchmark object + func: The function name + var: The function variant name + """ + means = [] + lst = bench['functions'][func][var]['timings'] + last = len(lst) - 1 + while lst: + for i in range(last + 1): + avg = mean(lst[i:]) + if avg > 0.75 * lst[last]: + means.insert(0, avg) + lst = lst[:i] + last = i - 1 + break + bench['functions'][func][var]['timings'] = means + + +def do_for_all_timings(bench, callback): + """Call a function for all timing objects for each function and its + variants. + + Args: + bench: The benchmark object + callback: The callback function + """ + for func in bench['functions'].keys(): + for k in bench['functions'][func].keys(): + if 'timings' not in bench['functions'][func][k].keys(): + continue + + callback(bench, func, k) + + +def compress_timings(points): + """Club points with close enough values into a single mean value + + See split_list for details on how the clubbing is done. + + Args: + points: The set of points. + """ + do_for_all_timings(points, split_list) + + def parse_bench(filename, schema_filename): """Parse the input file |