#!/usr/bin/python3 # Copyright (C) 2021-2023 Free Software Foundation, Inc. # This file is part of the GNU C Library. # # The GNU C Library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # The GNU C Library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with the GNU C Library; if not, see # . """Benchmark program generator script This script takes a function name as input and generates a program using an libmvec input file located in the sysdeps/x86_64/fpu directory. The name of the input file should be of the form libmvec-foo-inputs where 'foo' is the name of the function. """ from __future__ import print_function import sys import os import itertools import re # Macro definitions for functions that take no arguments. For functions # that take arguments, the STRUCT_TEMPLATE, ARGS_TEMPLATE and # VARIANTS_TEMPLATE are used instead. DEFINES_TEMPLATE = ''' #define CALL_BENCH_FUNC(v, i) %(func)s () #define NUM_VARIANTS (1) #define NUM_SAMPLES(v) (1) #define VARIANT(v) FUNCNAME "()" ''' # Structures to store arguments for the function call. A function may # have its inputs partitioned to represent distinct performance # characteristics or distinct flavors of the function. Each such # variant is represented by the _VARIANT structure. The ARGS structure # represents a single set of arguments. BENCH_VEC_TEMPLATE = ''' #define CALL_BENCH_FUNC(v, i) (__extension__ ({ \\ %(defs)s mx0 = %(func)s (%(func_args)s); \\ mx0; })) ''' BENCH_SCALAR_TEMPLATE = ''' #define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s) ''' STRUCT_TEMPLATE = '''struct args { %(args)s double timing; }; struct _variants { const char *name; int count; struct args *in; }; ''' # The actual input arguments. ARGS_TEMPLATE = '''struct args in%(argnum)d[%(num_args)d] = { %(args)s }; ''' # The actual variants, along with macros defined to access the variants. VARIANTS_TEMPLATE = '''struct _variants variants[%(num_variants)d] = { %(variants)s }; #define NUM_VARIANTS %(num_variants)d #define NUM_SAMPLES(i) (variants[i].count) #define VARIANT(i) (variants[i].name) ''' # Epilogue for the generated source file. EPILOGUE = ''' #define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);}) #define FUNCNAME "%(func)s" #include ''' def gen_source(func_types, directives, all_vals): """Generate source for the function Generate the C source for the function from the values and directives. Args: func: The function name directives: A dictionary of directives applicable to this function all_vals: A dictionary input values """ # The includes go in first. for header in directives['includes']: print('#include <%s>' % header) for header in directives['include-sources']: print('#include "%s"' % header) argtype_vtable = { 2: '128', 4: '256', 8: '512' } prefix_vtable = { 2: 'b', 4: 'c', 8: 'e' } # Get all the function properties funcname_argtype = '' float_flag = False if func_types[1] == 'float': float_flag = True avx_flag = False if func_types[3] == 'avx2': avx_flag = True funcname_stride = int(func_types[2][4:]) funcname_origin = func_types[-1] if float_flag: funcname_origin = funcname_origin[:-1] if funcname_stride == 1: # Prepare for scalar functions file generation funcname_prefix = '' funcname_prefix_1 = '' funcname_argtype = 'double' if float_flag: funcname_argtype = 'float' else: # Prepare for libmvec functions file generation funcname_prefix_1 = len(directives['args']) * 'v' + '_' aligned_stride = funcname_stride if float_flag: aligned_stride /= 2 funcname_prefix = '_ZGV' if (avx_flag and (aligned_stride == 4)): funcname_prefix += 'd' else: funcname_prefix += prefix_vtable[aligned_stride] funcname_prefix = funcname_prefix + 'N' + func_types[2][4:] funcname_argtype = '__m' + argtype_vtable[aligned_stride] if not float_flag: funcname_argtype += 'd' # Include x86intrin.h for vector functions if not funcname_stride == 1: print('#include ') if (avx_flag and (aligned_stride == 4)): # For bench-float-vlen8-avx2* and bench-double-vlen4-avx2* print('#define REQUIRE_AVX2') elif aligned_stride == 8: # For bench-float-vlen16* and bench-double-vlen8* print('#define REQUIRE_AVX512F') elif aligned_stride == 4: # For bench-float-vlen8* and bench-double-vlen4* without avx2 print('#define REQUIRE_AVX') else: print('#define FUNCTYPE %s' % funcname_argtype) print('#define STRIDE %d ' % funcname_stride) funcname = funcname_prefix + funcname_prefix_1 + funcname_origin if float_flag: funcname += 'f' funcname_rettype = funcname_argtype if directives['ret'] == '': funcname_rettype = 'void' funcname_inputtype = [] for arg, i in zip(directives['args'], itertools.count()): funcname_inputtype.append(funcname_argtype) if arg[0] == '<' and arg[-1] == '>': pos = arg.rfind('*') if pos == -1: die('Output argument must be a pointer type') funcname_inputtype[i] += ' *' if not funcname_stride == 1: if len(directives['args']) == 2: print('extern %s %s (%s, %s);' % (funcname_rettype, funcname, funcname_inputtype[0], funcname_inputtype[1])) elif len(directives['args']) == 3: print('extern %s %s (%s, %s, %s);' % (funcname_rettype, funcname, funcname_inputtype[0], funcname_inputtype[1], funcname_inputtype[2])) else: print('extern %s %s (%s);' % (funcname_rettype, funcname, funcname_inputtype[0])) # Print macros. This branches out to a separate routine if # the function takes arguments. if not directives['args']: print(DEFINES_TEMPLATE % {'funcname': funcname}) outargs = [] else: outargs = _print_arg_data(funcname, float_flag, funcname_argtype, funcname_stride, directives, all_vals) # Print the output variable definitions if necessary. for out in outargs: print(out) # If we have a return value from the function, make sure it is # assigned to prevent the compiler from optimizing out the # call. getret = '' if directives['ret']: if funcname_argtype != '': print('static %s volatile ret;' % funcname_argtype) getret = 'ret =' else: print('static %s volatile ret;' % directives['ret']) getret = 'ret =' # Test initialization. if directives['init']: print('#define BENCH_INIT %s' % directives['init']) print(EPILOGUE % {'getret': getret, 'func': funcname}) def _print_arg_data(func, float_flag, funcname_argtype, funcname_stride, directives, all_vals): """Print argument data This is a helper function for gen_source that prints structure and values for arguments and their variants and returns output arguments if any are found. Args: func: Function name float_flag: True if function is float type funcname_argtype: Type for vector variants funcname_stride: Vector Length directives: A dictionary of directives applicable to this function all_vals: A dictionary input values Returns: Returns a list of definitions for function arguments that act as output parameters. """ # First, all of the definitions. We process writing of # CALL_BENCH_FUNC, struct args and also the output arguments # together in a single traversal of the arguments list. func_args = [] _func_args = [] arg_struct = [] outargs = [] # Conversion function for each type vtable = { '__m128d': '_mm_loadu_pd', '__m256d': '_mm256_loadu_pd', '__m512d': '_mm512_loadu_pd', '__m128': '_mm_loadu_ps', '__m256': '_mm256_loadu_ps', '__m512': '_mm512_loadu_ps', 'double': '', 'float': '' } # For double max_vlen=8, for float max_vlen=16. if float_flag == True: max_vlen = 16 else: max_vlen = 8 for arg, i in zip(directives['args'], itertools.count()): if arg[0] == '<' and arg[-1] == '>': outargs.append('static %s out%d __attribute__((used));' % (funcname_argtype, i)) func_args.append('&out%d' % i) _func_args.append('&out%d' % i) else: arg_struct.append(' %s arg%d[STRIDE];' % (arg, i)) func_args.append('%s (variants[v].in[i].arg%d)' % (vtable[funcname_argtype], i)) _func_args.append('variants[v].in[i].arg%d[0]' % i) if funcname_stride == 1: print(BENCH_SCALAR_TEMPLATE % {'func': func, 'func_args': ', '.join(_func_args)}) elif directives['ret'] == '': print(BENCH_SCALAR_TEMPLATE % {'func': func, 'func_args': ', '.join(func_args)}) else: print(BENCH_VEC_TEMPLATE % {'func': func, 'func_args': ', '.join(func_args), 'defs': funcname_argtype}) print(STRUCT_TEMPLATE % {'args': '\n'.join(arg_struct)}) # Now print the values. variants = [] for (k, _vals), i in zip(all_vals.items(), itertools.count()): vals = [] temp_vals = [] j = 0 temp_j = 0 result_v = ['', '', ''] for _v in _vals: nums = _v.split(',') for l in range(0, len(nums)): result_v[l] = result_v[l] + nums[l].strip() + ',' j += 1 temp_j += 1 if temp_j == funcname_stride: final_result = '' for l in range(0, len(nums)): final_result = final_result + '{' + result_v[l][:-1] + '},' temp_vals.append(final_result[:-1]) temp_j = 0 result_v = ['', '', ''] # Make sure amount of test data is multiple of max_vlen # to keep data size same for all vector length. if j == max_vlen: vals.extend(temp_vals) temp_vals = [] j = 0 out = [' {%s, 0},' % v for v in vals] # Members for the variants structure list that we will # print later. variants.append(' {"%s", %d, in%d},' % (k, len(vals), i)) print(ARGS_TEMPLATE % {'argnum': i, 'num_args': len(vals), 'args': '\n'.join(out)}) # Print the variants and the last set of macros. print(VARIANTS_TEMPLATE % {'num_variants': len(all_vals), 'variants': '\n'.join(variants)}) return outargs def _process_directive(d_name, d_val, func_args): """Process a directive. Evaluate the directive name and value passed and return the processed value. This is a helper function for parse_file. Args: d_name: Name of the directive d_val: The string value to process Returns: The processed value, which may be the string as it is or an object that describes the directive. """ # Process the directive values if necessary. name and ret don't # need any processing. if d_name.startswith('include'): d_val = d_val.split(',') elif d_name == 'args': d_val = d_val.split(':') # Check if args type match if not d_val[0] == func_args: die("Args mismatch, should be %s, but get %s" % (d_val[0], func_args)) # Return the values. return d_val def parse_file(func_types): """Parse an input file Given a function name, open and parse an input file for the function and get the necessary parameters for the generated code and the list of inputs. Args: func: The function name Returns: A tuple of two elements, one a dictionary of directives and the other a dictionary of all input values. """ all_vals = {} # Valid directives. directives = { 'name': '', 'args': [], 'includes': [], 'include-sources': [], 'ret': '', 'init': '' } func = func_types[-1] try: with open('../sysdeps/x86_64/fpu/libmvec-%s-inputs' % func) as f: for line in f: # Look for directives and parse it if found. if line.startswith('##'): try: d_name, d_val = line[2:].split(':', 1) d_name = d_name.strip() d_val = d_val.strip() directives[d_name] = _process_directive(d_name, d_val, func_types[1]) except (IndexError, KeyError): die('Invalid directive: %s' % line[2:]) # Skip blank lines and comments. line = line.split('#', 1)[0].rstrip() if not line: continue # Otherwise, we're an input. Add to the appropriate # input set. cur_name = directives['name'] all_vals.setdefault(cur_name, []) all_vals[cur_name].append(line) except IOError as ex: die("Failed to open input file (%s): %s" % (ex.filename, ex.strerror)) return directives, all_vals def die(msg): """Exit with an error Prints an error message to the standard error stream and exits with a non-zero status. Args: msg: The error message to print to standard error """ print('%s\n' % msg, file=sys.stderr) sys.exit(os.EX_DATAERR) def main(args): """Main function Use the first command line argument as function name and parse its input file to generate C source that calls the function repeatedly for the input. Args: args: The command line arguments with the program name dropped Returns: os.EX_USAGE on error and os.EX_OK on success. """ if len(args) != 1: print('Usage: %s ' % sys.argv[0]) return os.EX_USAGE func_types = args[0].split('-') directives, all_vals = parse_file(func_types) gen_source(func_types, directives, all_vals) return os.EX_OK if __name__ == '__main__': sys.exit(main(sys.argv[1:]))