powerpc64le: ifunc select *f128 routines in multiarch mode

Programatically generate simple wrappers for interesting libm *f128 objects. Selected functions are transcendental functions or those with trivial compiler builtins. This can result in a 2-3x speedup (e.g logf128 and expf128). A second set of implementation files are generated which include the first implementation encountered along the search path. This usually works, except when a wrapper is overriden and makefile search order slightly diverges from include order. Likewise, wrapper object files are created for each generated file. These hold the ifunc selection routines which export ABI. Next, several shared headers are intercepted to control renaming of asm function redirects are used first, and sometimes macro renames if the former is impractical. Notably, if the request machine supports hardware IEEE128 (i.e POWER9 and newer) this ifunc machinery is disabled. Likewise existing ifunc support for float128 is consolidated into this (e.g sqrtf128 and fmaf128). Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
author: Paul E. Murphy <murphyp@linux.vnet.ibm.com> 2020-04-07 16:20:55 -0500
committer: Paul E. Murphy <murphyp@linux.ibm.com> 2020-11-30 09:56:14 -0600
commit: 33fc34521de970153344cfe1bfa9ce6da7a6efea (patch)
tree: e327104762da05a3354a89a35122fe26bdb8c439 /sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile
parent: cc5d5852c65eddf92368b0845e1374bd443316e7 (diff)
download: glibc-33fc34521de970153344cfe1bfa9ce6da7a6efea.tar.gz
glibc-33fc34521de970153344cfe1bfa9ce6da7a6efea.tar.xz
glibc-33fc34521de970153344cfe1bfa9ce6da7a6efea.zip
1 files changed, 198 insertions, 6 deletions
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile
index 8747b02127..767805b510 100644
--- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile
@@ -1,10 +1,202 @@
 ifeq ($(subdir),math)
-libm-sysdep_routines += s_fmaf128-ppc64 s_fmaf128-power9 \
-			w_sqrtf128-power9 w_sqrtf128-ppc64le
 
-CFLAGS-s_fmaf128-ppc64.c += $(type-float128-CFLAGS) $(no-gnu-attribute-CFLAGS)
-CFLAGS-s_fmaf128-power9.c += $(type-float128-CFLAGS) -mcpu=power9 $(no-gnu-attribute-CFLAGS)
+#
+# Only enable ifunc _Float128 support if the baseline cpu support
+# is older than power9.
+ifneq (yes,$(libc-submachine-power9))
+do_f128_multiarch = yes
+endif
+
+#
+# This is an ugly, but contained, mechanism to provide hardware optimized
+# _Float128 and ldouble == ieee128 optimized routines for P9 and beyond
+# hardware.  At a very high level, we rely on ASM renames, and rarely
+# macro renames to build two sets of _Float128 ABI, one with _power8 (the
+# baseline powerpc64le cpu) and _power9 (the first powerpc64le cpu to introduce
+# hardware support for _Float128).
+#
+# At a high level, we compile 3 files for each object file.
+#   1.  The baseline soft-float128, unsuffixed objects $(object).$(sfx)
+#       The symbols contained in these files is suffixed by _power8.
+#   2.  The hard-float128, power9, suffixed objects $(object)-power9.$(sfx).
+#       The symbols contained in these files is suffixed by _power9.
+#   3.  The IFUNC wrapper object to export ABI, $(object)-ifunc.$(sfx)
+#       This glues the above together and implements the ABI.
+#
+# 2 & 3 are automatically generated by Makefile rule.  Placing the exported
+# ABI into a separate file allows reuse of existing aliasing macros
+# with minimal hassle.
+#
+#
+# If the float128 ABI is expanded, and a new ifunc wrappers are desired,
+# the following lists how to map new symbols from the shared headers into
+# their local overrides here:
+#
+#   float128_private.h
+#
+#     is used to rename the ldouble == ieee128 object files.  This takes
+#     it a step further and redirects symbols to a local name.  This supports
+#     nearly all files in sysdeps/ieee754/float128, but not all _Float128
+#     objects.  However, this is only meant to be used internally to support
+#     compilation of ldbl-128 into float128.
+#
+#   math-type-macros-float128.h
+#
+#     renames symbols which are generated via shared templated in math/.
+#
+#   math_private.h
+#
+#     provides internal declarations for common macros and functions which
+#     are called from within libm.  Note, float128_private.h duplicates
+#     some of these declarations as these headers are generally not included
+#     in the same translation unit.
+#
+# The above is supported by several header files as described below:
+#
+#   float128-ifunc.h
+#
+#     provides support for generating the IFUNC objects in part 3 above.
+#     This header is only included with wrapper functions.
+#
+#   float128-ifunc-macros.h
+#
+#     disables all first-order float128 aliasing macros used in libm,
+#     and libm wrappers around libc-symbols.h.
+#
+#   float128-ifunc-redirect-macros.h
+#
+#     provides macros which implement the appending of the suffix to
+#     symbols what have been selected.
+#
+#   float128-ifunc-redirects.h
+#
+#     provides ASM redirects for symbols which are redirected in the
+#     private copy of math.h used by glibc, but not declared by math_private.h
+#
+#   float128-ifunc-redirects-mp.h
+#
+#     provides ASM redirects which are used by math_private.h (the -mp suffix)
+#     and the interposer float128_private.h discussed late.
+#
+# Notably, this enforces a slightly different mechanism for machine specific
+# overrides.  Optimizations for all targets must all be reachable from the same
+# file.  See the history to fmaf128 or sqrtf128 to understand how this looks
+# in practice.
+#
+ifeq ($(do_f128_multiarch),yes)
+
+gen-libm-f128-ifunc-routines = \
+	e_acosf128 e_acoshf128 e_asinf128 e_atan2f128 e_atanhf128 e_coshf128 \
+	e_expf128 e_fmodf128 e_hypotf128 e_j0f128 e_j1f128 e_jnf128 \
+	e_lgammaf128_r e_logf128 e_log10f128 e_powf128 e_remainderf128 \
+	e_sinhf128 e_sqrtf128 e_gammaf128_r e_ilogbf128 k_tanf128 s_asinhf128 \
+	s_atanf128 s_cbrtf128 s_ceilf128 s_cosf128 s_erff128 s_expm1f128 \
+	s_fabsf128 s_floorf128 s_log1pf128 s_logbf128 \
+	s_rintf128 s_scalblnf128 s_sinf128 s_tanf128 \
+	s_tanhf128 s_truncf128 s_remquof128 e_log2f128 \
+	s_roundf128 s_nearbyintf128 s_sincosf128 s_fmaf128 s_lrintf128 \
+	s_llrintf128 s_lroundf128 s_llroundf128 e_exp10f128 \
+	m_modff128 m_scalbnf128 m_frexpf128 m_ldexpf128 x2y2m1f128 \
+	gamma_productf128 lgamma_negf128 lgamma_productf128 s_roundevenf128 \
+	cargf128 conjf128 cimagf128 crealf128 cabsf128 e_scalbf128 s_cacosf128 \
+	s_cacoshf128 s_ccosf128 s_ccoshf128 s_casinf128 s_csinf128 \
+	s_casinhf128 k_casinhf128 s_csinhf128 k_casinhf128 s_csinhf128 \
+	s_catanhf128 s_catanf128 s_ctanf128 s_ctanhf128 s_cexpf128 s_clogf128 \
+	s_cprojf128 s_csqrtf128 s_cpowf128 s_clog10f128 s_fdimf128 \
+	s_fmaxf128 s_fminf128 w_ilogbf128 w_llogbf128 \
+	w_log1pf128 w_scalblnf128 w_acosf128 \
+	w_acoshf128 w_asinf128 w_atan2f128 w_atanhf128 w_coshf128 w_exp10f128 \
+	w_exp2f128 w_fmodf128 w_hypotf128 w_j0f128 w_j1f128 w_jnf128 \
+	w_logf128 w_log10f128 w_log2f128 w_powf128 w_remainderf128 \
+	w_scalbf128 w_sinhf128 w_sqrtf128 w_tgammaf128 w_lgammaf128 \
+	w_lgammaf128_r w_expf128 e_exp2f128 \
+	k_sinf128 k_cosf128 k_sincosf128 e_rem_pio2f128
+
+
+f128-march-routines-p9 = $(addsuffix -power9,$(gen-libm-f128-ifunc-routines))
+f128-march-routines-ifunc = $(addsuffix -ifunc,$(gen-libm-f128-ifunc-routines))
+f128-march-routines = $(f128-march-routines-p9) $(f128-march-routines-ifunc)
+f128-march-cpus = power9
+
+libm-routines += $(f128-march-routines)
+generated += $(f128-march-routines)
+
+CFLAGS-float128-ifunc.c += $(type-float128-CFLAGS) $(no-gnu-attribute-CFLAGS)
+
+# Copy special CFLAGS for some functions
+CFLAGS-m_modff128-power9.c += -fsignaling-nans
+
+# Generate ifunc wrapper files and target specific wrappers around
+# each routine above.  Note, m_%.c files are fixed up to include
+# s_%.c files.  This is an artifact of the makefile rules which allow
+# some files to be compiled for libc and libm.
+$(objpfx)gen-float128-ifuncs.stmp: Makefile
+	$(make-target-directory)
+	for gcall in $(gen-libm-f128-ifunc-routines); do \
+	  ifile="$${gcall}";                             \
+	  if [ $${gcall##m_} != $${gcall} ]; then        \
+	    ifile="s_$${gcall##m_}";                     \
+	  fi;                                            \
+	  for cpu in $(f128-march-cpus); do              \
+	    file=$(objpfx)$${gcall}-$${cpu}.c;           \
+	    {                                            \
+	      echo "#include <$${ifile}.c>";             \
+	    } > $${file};                                \
+	  done;                                          \
+	  name="$${gcall##?_}";                          \
+	  pfx="$${gcall%%_*}";                           \
+	  R="";                                          \
+	  r="";                                          \
+	  if [ $${gcall##m_} != $${gcall} ]; then        \
+	    pfx="s";                                     \
+	  fi;                                            \
+	  if [ $${#pfx} != 1 ]; then                     \
+	    pfx="";                                      \
+	  else                                           \
+	    pfx="_$${pfx}";                              \
+	  fi;                                            \
+	  if [ $${name%%_r} != $${name} ]; then          \
+	    R="_R";                                      \
+	    r="_r";                                      \
+	    name="$${name%%_r}";                         \
+	  fi;                                            \
+	  name="$${name%%f128}";                         \
+	  decl="DECL_ALIAS$${pfx}_$${name}$${r}";        \
+	  compat="GEN_COMPAT$${pfx}_$${name}$${r}";      \
+	  declc="DECL_ALIAS$${R}$${pfx}";                \
+	  {                                              \
+	    echo "#include <float128-ifunc.h>";          \
+	    echo "#ifndef $${decl}";                     \
+	    echo "# define $${decl}(f) $${declc} (f)";   \
+	    echo "#endif";                               \
+	    echo "#ifndef $${compat}";                   \
+	    echo "# define $${compat}(f)";               \
+	    echo "#endif";                               \
+	    echo "$${decl} ($${name});";                 \
+	    echo "$${compat} ($${name});";               \
+	  } > $(objpfx)$${gcall}-ifunc.c;                \
+	done;                                            \
+	echo > $(@)
+
+$(foreach f,$(f128-march-routines),$(objpfx)$(f).c): $(objpfx)gen-float128-ifuncs.stmp
+
+enable-f128-ifunc-CFLAGS = -D_F128_ENABLE_IFUNC $(no-gnu-attributes-CFLAGS) $(type-float128-CFLAGS)
+
+# Enable IFUNC on baseline (power8) implementations
+include $(o-iterator)
+define o-iterator-doit
+$(foreach f,$(gen-libm-f128-ifunc-routines),$(objpfx)$(f)$(o)): sysdep-CFLAGS += -D_F128_ENABLE_IFUNC
+endef
+object-suffixes-left := $(all-object-suffixes)
+include $(o-iterator)
+
+# Likewise, but for power9.
+include $(o-iterator)
+define o-iterator-doit
+$(foreach f,$(f128-march-routines-p9),$(objpfx)$(f)$(o)): sysdep-CFLAGS += $$(enable-f128-ifunc-CFLAGS) -mcpu=power9
+endef
+object-suffixes-left := $(all-object-suffixes)
+include $(o-iterator)
 
-CFLAGS-w_sqrtf128-ppc64le.c += $(type-float128-CFLAGS) $(no-gnu-attribute-CFLAGS)
-CFLAGS-w_sqrtf128-power9.c += $(type-float128-CFLAGS) -mcpu=power9 $(no-gnu-attribute-CFLAGS)
+endif # do_f128_multiarch
 endif
author	Paul E. Murphy <murphyp@linux.vnet.ibm.com>	2020-04-07 16:20:55 -0500
committer	Paul E. Murphy <murphyp@linux.ibm.com>	2020-11-30 09:56:14 -0600
commit	33fc34521de970153344cfe1bfa9ce6da7a6efea (patch)
tree	e327104762da05a3354a89a35122fe26bdb8c439 /sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile
parent	cc5d5852c65eddf92368b0845e1374bd443316e7 (diff)
download	glibc-33fc34521de970153344cfe1bfa9ce6da7a6efea.tar.gz glibc-33fc34521de970153344cfe1bfa9ce6da7a6efea.tar.xz glibc-33fc34521de970153344cfe1bfa9ce6da7a6efea.zip