summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--sysdeps/x86_64/fpu/Makeconfig35
-rw-r--r--sysdeps/x86_64/fpu/Makefile40
-rw-r--r--sysdeps/x86_64/fpu/bench-libmvec-skeleton.c103
-rwxr-xr-xsysdeps/x86_64/fpu/scripts/bench_libmvec.py464
4 files changed, 642 insertions, 0 deletions
diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig
index 24aaee1a43..503e9b5ffa 100644
--- a/sysdeps/x86_64/fpu/Makeconfig
+++ b/sysdeps/x86_64/fpu/Makeconfig
@@ -29,6 +29,23 @@ libmvec-funcs = \
   sin \
   sincos \
 
+# Define libmvec function for benchtests directory.
+libmvec-bench-funcs = \
+
+bench-libmvec-double = \
+  $(addprefix double-vlen1-, $(libmvec-bench-funcs)) \
+  $(addprefix double-vlen2-, $(libmvec-bench-funcs)) \
+  $(addprefix double-vlen4-, $(libmvec-bench-funcs)) \
+  $(addprefix double-vlen4-avx2-, $(libmvec-bench-funcs)) \
+  $(addprefix double-vlen8-, $(libmvec-bench-funcs)) \
+
+bench-libmvec-float = \
+  $(addsuffix f, $(addprefix float-vlen1-, $(libmvec-bench-funcs))) \
+  $(addsuffix f, $(addprefix float-vlen4-, $(libmvec-bench-funcs))) \
+  $(addsuffix f, $(addprefix float-vlen8-, $(libmvec-bench-funcs))) \
+  $(addsuffix f, $(addprefix float-vlen8-avx2-, $(libmvec-bench-funcs))) \
+  $(addsuffix f, $(addprefix float-vlen16-, $(libmvec-bench-funcs))) \
+
 # The base libmvec ABI tests.
 libmvec-abi-func-tests = \
   $(addprefix test-double-libmvec-,$(libmvec-funcs)) \
@@ -83,5 +100,23 @@ $(common-objpfx)libmvec.mk: $(common-objpfx)config.make
 	   echo "  \$$(float-vlen16-arch-ext-cflags)"; \
 	   echo; \
 	 done; \
+	 echo "endif"; \
+	 echo "ifeq (\$$(subdir),benchtests)"; \
+	 for t in $(libmvec-bench-funcs); do \
+	   echo "CFLAGS-bench-double-vlen4-$$t.c = \\"; \
+	   echo "  \$$(double-vlen4-arch-ext-cflags)"; \
+	   echo "CFLAGS-bench-double-vlen4-avx2-$$t.c = \\"; \
+	   echo "  \$$(double-vlen4-arch-ext2-cflags)"; \
+	   echo "CFLAGS-bench-double-vlen8-$$t.c = \\"; \
+	   echo "  \$$(double-vlen8-arch-ext-cflags)"; \
+	   echo; \
+	   echo "CFLAGS-bench-float-vlen8-$${t}f.c = \\"; \
+	   echo "  \$$(float-vlen8-arch-ext-cflags)"; \
+	   echo "CFLAGS-bench-float-vlen8-avx2-$${t}f.c = \\"; \
+	   echo "  \$$(float-vlen8-arch-ext2-cflags)"; \
+	   echo "CFLAGS-bench-float-vlen16-$${t}f.c = \\"; \
+	   echo "  \$$(float-vlen16-arch-ext-cflags)"; \
+	   echo; \
+	 done; \
 	 echo "endif") > $@T
 	mv -f $@T $@
diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile
index d172ae815d..9fb587cf8f 100644
--- a/sysdeps/x86_64/fpu/Makefile
+++ b/sysdeps/x86_64/fpu/Makefile
@@ -72,3 +72,43 @@ ifeq ($(subdir)$(config-cflags-mprefer-vector-width),mathyes)
 # performance of sin and cos by more than 40% on Skylake.
 CFLAGS-branred.c = -mprefer-vector-width=128
 endif
+
+ifeq ($(subdir),benchtests)
+double-vlen4-arch-ext-cflags = -mavx
+double-vlen4-arch-ext2-cflags = -mavx2
+double-vlen8-arch-ext-cflags = -mavx512f
+
+float-vlen8-arch-ext-cflags = -mavx
+float-vlen8-arch-ext2-cflags = -mavx2
+float-vlen16-arch-ext-cflags = -mavx512f
+
+bench-libmvec := $(bench-libmvec-double) $(bench-libmvec-float)
+
+ifeq (${BENCHSET},)
+bench += $(bench-libmvec)
+endif
+
+ifeq (${STATIC-BENCHTESTS},yes)
+libmvec-benchtests = $(common-objpfx)mathvec/libmvec.a $(common-objpfx)math/libm.a
+else
+libmvec-benchtests = $(libmvec) $(libm)
+endif
+
+$(addprefix $(objpfx)bench-,$(bench-libmvec-double)): $(libmvec-benchtests)
+$(addprefix $(objpfx)bench-,$(bench-libmvec-float)): $(libmvec-benchtests)
+bench-libmvec-deps = $(..)sysdeps/x86_64/fpu/bench-libmvec-skeleton.c bench-timing.h Makefile
+
+$(objpfx)bench-float-%.c: $(bench-libmvec-deps)
+	{ if [ -n "$($*-INCLUDE)" ]; then \
+	  cat $($*-INCLUDE); \
+	fi; \
+	$(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp
+	mv -f $@-tmp $@
+
+$(objpfx)bench-double-%.c: $(bench-libmvec-deps)
+	{ if [ -n "$($*-INCLUDE)" ]; then \
+	  cat $($*-INCLUDE); \
+	fi; \
+	$(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp
+	mv -f $@-tmp $@
+endif
diff --git a/sysdeps/x86_64/fpu/bench-libmvec-skeleton.c b/sysdeps/x86_64/fpu/bench-libmvec-skeleton.c
new file mode 100644
index 0000000000..ee025594a5
--- /dev/null
+++ b/sysdeps/x86_64/fpu/bench-libmvec-skeleton.c
@@ -0,0 +1,103 @@
+/* Skeleton for libmvec benchmark programs.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <time.h>
+#include <inttypes.h>
+#include <bench-timing.h>
+#include <json-lib.h>
+#include <bench-util.h>
+#include <math-tests-arch.h>
+
+#include <bench-util.c>
+#define D_ITERS 10000
+
+int
+main (int argc, char **argv)
+{
+  unsigned long i, k;
+  timing_t start, end;
+  json_ctx_t json_ctx;
+
+#if defined REQUIRE_AVX
+  if (!CPU_FEATURE_ACTIVE (AVX))
+    {
+      printf ("AVX not supported.\n");
+      return 0;
+    }
+#elif defined REQUIRE_AVX2
+  if (!CPU_FEATURE_ACTIVE (AVX2))
+    {
+      printf ("AVX2 not supported.\n");
+      return 0;
+    }
+#elif defined REQUIRE_AVX512F
+  if (!CPU_FEATURE_ACTIVE (AVX512F))
+    {
+      printf ("AVX512F not supported.\n");
+      return 0;
+    }
+#endif
+
+  bench_start ();
+
+#ifdef BENCH_INIT
+  BENCH_INIT ();
+#endif
+
+  json_init (&json_ctx, 2, stdout);
+
+  /* Begin function.  */
+  json_attr_object_begin (&json_ctx, FUNCNAME);
+
+  for (int v = 0; v < NUM_VARIANTS; v++)
+    {
+      double d_total_time = 0;
+      timing_t cur;
+      for (k = 0; k < D_ITERS; k++)
+	{
+	  TIMING_NOW (start);
+	  for (i = 0; i < NUM_SAMPLES (v); i++)
+	    BENCH_FUNC (v, i);
+	  TIMING_NOW (end);
+
+	  TIMING_DIFF (cur, start, end);
+
+	  TIMING_ACCUM (d_total_time, cur);
+	}
+      double d_total_data_set = D_ITERS * NUM_SAMPLES (v) * STRIDE;
+
+      /* Begin variant.  */
+      json_attr_object_begin (&json_ctx, VARIANT (v));
+
+      json_attr_double (&json_ctx, "duration", d_total_time);
+      json_attr_double (&json_ctx, "iterations", d_total_data_set);
+      json_attr_double (&json_ctx, "mean", d_total_time / d_total_data_set);
+
+      /* End variant.  */
+      json_attr_object_end (&json_ctx);
+    }
+
+  /* End function.  */
+  json_attr_object_end (&json_ctx);
+
+  return 0;
+}
diff --git a/sysdeps/x86_64/fpu/scripts/bench_libmvec.py b/sysdeps/x86_64/fpu/scripts/bench_libmvec.py
new file mode 100755
index 0000000000..762865de8f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/scripts/bench_libmvec.py
@@ -0,0 +1,464 @@
+#!/usr/bin/python3
+# Copyright (C) 2021 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+"""Benchmark program generator script
+
+This script takes a function name as input and generates a program using
+an libmvec input file located in the sysdeps/x86_64/fpu directory.  The
+name of the input file should be of the form libmvec-foo-inputs where
+'foo' is the name of the function.
+"""
+
+from __future__ import print_function
+import sys
+import os
+import itertools
+import re
+
+# Macro definitions for functions that take no arguments.  For functions
+# that take arguments, the STRUCT_TEMPLATE, ARGS_TEMPLATE and
+# VARIANTS_TEMPLATE are used instead.
+DEFINES_TEMPLATE = '''
+#define CALL_BENCH_FUNC(v, i) %(func)s ()
+#define NUM_VARIANTS (1)
+#define NUM_SAMPLES(v) (1)
+#define VARIANT(v) FUNCNAME "()"
+'''
+
+# Structures to store arguments for the function call.  A function may
+# have its inputs partitioned to represent distinct performance
+# characteristics or distinct flavors of the function.  Each such
+# variant is represented by the _VARIANT structure.  The ARGS structure
+# represents a single set of arguments.
+BENCH_VEC_TEMPLATE = '''
+#define CALL_BENCH_FUNC(v, i) (__extension__ ({ \\
+  %(defs)s mx0 = %(func)s (%(func_args)s); \\
+  mx0; }))
+'''
+
+BENCH_SCALAR_TEMPLATE = '''
+#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s)
+'''
+
+STRUCT_TEMPLATE = '''struct args
+{
+%(args)s
+  double timing;
+};
+
+struct _variants
+{
+  const char *name;
+  int count;
+  struct args *in;
+};
+'''
+
+# The actual input arguments.
+ARGS_TEMPLATE = '''struct args in%(argnum)d[%(num_args)d] = {
+%(args)s
+};
+'''
+
+# The actual variants, along with macros defined to access the variants.
+VARIANTS_TEMPLATE = '''struct _variants variants[%(num_variants)d] = {
+%(variants)s
+};
+
+#define NUM_VARIANTS %(num_variants)d
+#define NUM_SAMPLES(i) (variants[i].count)
+#define VARIANT(i) (variants[i].name)
+'''
+
+# Epilogue for the generated source file.
+EPILOGUE = '''
+#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);})
+#define FUNCNAME "%(func)s"
+#include <bench-libmvec-skeleton.c>'''
+
+
+def gen_source(func_types, directives, all_vals):
+  """Generate source for the function
+
+  Generate the C source for the function from the values and
+  directives.
+
+  Args:
+    func: The function name
+    directives: A dictionary of directives applicable to this function
+    all_vals: A dictionary input values
+  """
+  # The includes go in first.
+  for header in directives['includes']:
+    print('#include <%s>' % header)
+
+  for header in directives['include-sources']:
+    print('#include "%s"' % header)
+
+  argtype_vtable = {
+    2: '128',
+    4: '256',
+    8: '512'
+  }
+  prefix_vtable = {
+    2: 'b',
+    4: 'c',
+    8: 'e'
+  }
+
+  # Get all the function properties
+  funcname_argtype = ''
+  float_flag = False
+  if func_types[1] == 'float':
+    float_flag = True
+  avx_flag = False
+  if func_types[3] == 'avx2':
+    avx_flag = True
+  funcname_stride = int(func_types[2][4:])
+  funcname_origin = func_types[-1]
+  if float_flag:
+    funcname_origin = funcname_origin[:-1]
+
+  if funcname_stride == 1:
+    # Prepare for scalar functions file generation
+    funcname_prefix = ''
+    funcname_prefix_1 = ''
+    funcname_argtype = 'double'
+    if float_flag:
+      funcname_argtype = 'float'
+  else:
+    # Prepare for libmvec functions file generation
+    funcname_prefix_1 = len(directives['args']) * 'v' + '_'
+    aligned_stride = funcname_stride
+    if float_flag:
+      aligned_stride /= 2
+    funcname_prefix = '_ZGV'
+    if (avx_flag and (aligned_stride == 4)):
+      funcname_prefix += 'd'
+    else:
+      funcname_prefix += prefix_vtable[aligned_stride]
+    funcname_prefix = funcname_prefix + 'N' + func_types[2][4:]
+    funcname_argtype = '__m' + argtype_vtable[aligned_stride]
+    if not float_flag:
+      funcname_argtype += 'd'
+
+  # Include x86intrin.h for vector functions
+  if not funcname_stride == 1:
+    print('#include <x86intrin.h>')
+    if (avx_flag and (aligned_stride == 4)):
+      # For bench-float-vlen8-avx2* and bench-double-vlen4-avx2*
+      print('#define REQUIRE_AVX2')
+    elif aligned_stride == 8:
+      # For bench-float-vlen16* and bench-double-vlen8*
+      print('#define REQUIRE_AVX512F')
+    elif aligned_stride == 4:
+      # For bench-float-vlen8* and bench-double-vlen4* without avx2
+      print('#define REQUIRE_AVX')
+  else:
+    print('#define FUNCTYPE %s' % funcname_argtype)
+
+  print('#define STRIDE %d ' % funcname_stride)
+
+  funcname = funcname_prefix + funcname_prefix_1 + funcname_origin
+  if float_flag:
+    funcname += 'f'
+
+  funcname_rettype = funcname_argtype
+  if directives['ret'] == '':
+    funcname_rettype = 'void'
+
+  funcname_inputtype = []
+  for arg, i in zip(directives['args'], itertools.count()):
+    funcname_inputtype.append(funcname_argtype)
+    if arg[0] == '<' and arg[-1] == '>':
+      pos = arg.rfind('*')
+      if pos == -1:
+        die('Output argument must be a pointer type')
+      funcname_inputtype[i] += ' *'
+
+  if not funcname_stride == 1:
+    if len(directives['args']) == 2:
+      print('extern %s %s (%s, %s);' % (funcname_rettype, funcname, funcname_inputtype[0], funcname_inputtype[1]))
+    elif len(directives['args']) == 3:
+      print('extern %s %s (%s, %s, %s);' % (funcname_rettype, funcname, funcname_inputtype[0], funcname_inputtype[1], funcname_inputtype[2]))
+    else:
+      print('extern %s %s (%s);' % (funcname_rettype, funcname, funcname_inputtype[0]))
+
+  # Print macros.  This branches out to a separate routine if
+  # the function takes arguments.
+  if not directives['args']:
+    print(DEFINES_TEMPLATE % {'funcname': funcname})
+    outargs = []
+  else:
+    outargs = _print_arg_data(funcname, float_flag, funcname_argtype, funcname_stride, directives, all_vals)
+
+  # Print the output variable definitions if necessary.
+  for out in outargs:
+    print(out)
+
+  # If we have a return value from the function, make sure it is
+  # assigned to prevent the compiler from optimizing out the
+  # call.
+  getret = ''
+
+  if directives['ret']:
+    if funcname_argtype != '':
+      print('static %s volatile ret;' % funcname_argtype)
+      getret = 'ret ='
+    else:
+      print('static %s volatile ret;' % directives['ret'])
+      getret = 'ret ='
+
+  # Test initialization.
+  if directives['init']:
+    print('#define BENCH_INIT %s' % directives['init'])
+
+  print(EPILOGUE % {'getret': getret, 'func': funcname})
+
+
+def _print_arg_data(func, float_flag, funcname_argtype, funcname_stride, directives, all_vals):
+  """Print argument data
+
+  This is a helper function for gen_source that prints structure and
+  values for arguments and their variants and returns output arguments
+  if any are found.
+
+  Args:
+    func: Function name
+    float_flag: True if function is float type
+    funcname_argtype: Type for vector variants
+    funcname_stride: Vector Length
+    directives: A dictionary of directives applicable to this function
+    all_vals: A dictionary input values
+
+  Returns:
+    Returns a list of definitions for function arguments that act as
+    output parameters.
+  """
+  # First, all of the definitions.  We process writing of
+  # CALL_BENCH_FUNC, struct args and also the output arguments
+  # together in a single traversal of the arguments list.
+  func_args = []
+  _func_args = []
+  arg_struct = []
+  outargs = []
+  # Conversion function for each type
+  vtable = {
+    '__m128d': '_mm_loadu_pd',
+    '__m256d': '_mm256_loadu_pd',
+    '__m512d': '_mm512_loadu_pd',
+    '__m128': '_mm_loadu_ps',
+    '__m256': '_mm256_loadu_ps',
+    '__m512': '_mm512_loadu_ps',
+    'double': '',
+    'float': ''
+  }
+
+  # For double max_vlen=8, for float max_vlen=16.
+  if float_flag == True:
+    max_vlen = 16
+  else:
+    max_vlen = 8
+
+  for arg, i in zip(directives['args'], itertools.count()):
+    if arg[0] == '<' and arg[-1] == '>':
+      outargs.append('static %s out%d __attribute__((used));' % (funcname_argtype, i))
+      func_args.append('&out%d' % i)
+      _func_args.append('&out%d' % i)
+    else:
+      arg_struct.append('  %s arg%d[STRIDE];' % (arg, i))
+      func_args.append('%s (variants[v].in[i].arg%d)' %
+                       (vtable[funcname_argtype], i))
+      _func_args.append('variants[v].in[i].arg%d[0]' % i)
+
+  if funcname_stride == 1:
+    print(BENCH_SCALAR_TEMPLATE % {'func': func,
+                                   'func_args': ', '.join(_func_args)})
+  elif directives['ret'] == '':
+    print(BENCH_SCALAR_TEMPLATE % {'func': func,
+                                   'func_args': ', '.join(func_args)})
+  else:
+    print(BENCH_VEC_TEMPLATE % {'func': func, 'func_args': ', '.join(func_args),
+                                'defs': funcname_argtype})
+  print(STRUCT_TEMPLATE % {'args': '\n'.join(arg_struct)})
+
+  # Now print the values.
+  variants = []
+  for (k, _vals), i in zip(all_vals.items(), itertools.count()):
+    vals = []
+    temp_vals = []
+    j = 0
+    temp_j = 0
+    result_v = ['', '', '']
+    for _v in _vals:
+      nums = _v.split(',')
+      for l in range(0, len(nums)):
+        result_v[l] = result_v[l] + nums[l].strip() + ','
+      j += 1
+      temp_j += 1
+
+      if temp_j == funcname_stride:
+        final_result = ''
+        for l in range(0, len(nums)):
+          final_result = final_result + '{' + result_v[l][:-1] + '},'
+        temp_vals.append(final_result[:-1])
+        temp_j = 0
+        result_v = ['', '', '']
+
+      # Make sure amount of test data is multiple of max_vlen
+      # to keep data size same for all vector length.
+      if j == max_vlen:
+        vals.extend(temp_vals)
+        temp_vals = []
+        j = 0
+
+    out = ['  {%s, 0},' % v for v in vals]
+
+    # Members for the variants structure list that we will
+    # print later.
+    variants.append('  {"%s", %d, in%d},' % (k, len(vals), i))
+    print(ARGS_TEMPLATE % {'argnum': i, 'num_args': len(vals),
+                           'args': '\n'.join(out)})
+
+  # Print the variants and the last set of macros.
+  print(VARIANTS_TEMPLATE % {'num_variants': len(all_vals),
+                             'variants': '\n'.join(variants)})
+  return outargs
+
+
+def _process_directive(d_name, d_val, func_args):
+  """Process a directive.
+
+  Evaluate the directive name and value passed and return the
+  processed value. This is a helper function for parse_file.
+
+  Args:
+    d_name: Name of the directive
+    d_val: The string value to process
+
+  Returns:
+    The processed value, which may be the string as it is or an object
+    that describes the directive.
+  """
+  # Process the directive values if necessary.  name and ret don't
+  # need any processing.
+  if d_name.startswith('include'):
+    d_val = d_val.split(',')
+  elif d_name == 'args':
+    d_val = d_val.split(':')
+    # Check if args type match
+    if not d_val[0] == func_args:
+      die("Args mismatch, should be %s, but get %s" % (d_val[0], func_args))
+
+  # Return the values.
+  return d_val
+
+
+def parse_file(func_types):
+  """Parse an input file
+
+  Given a function name, open and parse an input file for the function
+  and get the necessary parameters for the generated code and the list
+  of inputs.
+
+  Args:
+    func: The function name
+
+  Returns:
+    A tuple of two elements, one a dictionary of directives and the
+    other a dictionary of all input values.
+  """
+  all_vals = {}
+  # Valid directives.
+  directives = {
+    'name': '',
+    'args': [],
+    'includes': [],
+    'include-sources': [],
+    'ret': '',
+    'init': ''
+  }
+
+  func = func_types[-1]
+  try:
+    with open('../sysdeps/x86_64/fpu/libmvec-%s-inputs' % func) as f:
+      for line in f:
+        # Look for directives and parse it if found.
+        if line.startswith('##'):
+          try:
+            d_name, d_val = line[2:].split(':', 1)
+            d_name = d_name.strip()
+            d_val = d_val.strip()
+            directives[d_name] = _process_directive(d_name, d_val, func_types[1])
+          except (IndexError, KeyError):
+            die('Invalid directive: %s' % line[2:])
+
+        # Skip blank lines and comments.
+        line = line.split('#', 1)[0].rstrip()
+        if not line:
+          continue
+
+        # Otherwise, we're an input.  Add to the appropriate
+        # input set.
+        cur_name = directives['name']
+        all_vals.setdefault(cur_name, [])
+        all_vals[cur_name].append(line)
+  except IOError as ex:
+    die("Failed to open input file (%s): %s" % (ex.filename, ex.strerror))
+
+  return directives, all_vals
+
+
+def die(msg):
+  """Exit with an error
+
+  Prints an error message to the standard error stream and exits with
+  a non-zero status.
+
+  Args:
+    msg: The error message to print to standard error
+  """
+  print('%s\n' % msg, file=sys.stderr)
+  sys.exit(os.EX_DATAERR)
+
+
+def main(args):
+  """Main function
+
+  Use the first command line argument as function name and parse its
+  input file to generate C source that calls the function repeatedly
+  for the input.
+
+  Args:
+    args: The command line arguments with the program name dropped
+
+  Returns:
+    os.EX_USAGE on error and os.EX_OK on success.
+  """
+  if len(args) != 1:
+    print('Usage: %s <function>' % sys.argv[0])
+    return os.EX_USAGE
+
+  func_types = args[0].split('-')
+  directives, all_vals = parse_file(func_types)
+  gen_source(func_types, directives, all_vals)
+  return os.EX_OK
+
+
+if __name__ == '__main__':
+  sys.exit(main(sys.argv[1:]))