From 80792b61024d810c3d36e9a1dd8a166249d8b6e8 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Fri, 28 Jun 2019 11:29:19 +0100 Subject: Aarch64: Add simd exp/expf ABI symbols The implementation is in assembly and just calls the scalar math code. This ensures that old compiler without vector call abi support can build libmvec. The abi is supported since GCC 9.1, the specification is https://developer.arm.com/tools-and-software/server-and-hpc/arm-architecture-tools/arm-compiler-for-hpc/vector-function-abi Vector functions require a STO_AARCH64_VARIANT_PCS marking in the dynamic symbol table for lazy bound calls to work. This will be missing in libmvec, which works because the marking only affects the behaviour if there are calls to the symbols in the binary. Testing requires vector call abi support, which is detected. Header declarations are not added yet, so the symbols will not be used by the compiler: they are just added so the abi is in place which enables backporting later. Currently we cannot add correct declarations that only declare the specific symbols we provide: the OpenMP pragma mechanism would declare both AdvSIMD and SVE variants. 2019-07-15 Steve Ellcey Szabolcs Nagy * sysdeps/aarch64/configure.ac (build_mathvec): Enable. (test-mathvec): Enable if ABI is supported. * sysdeps/aarch64/configure: Regenerate. * sysdeps/aarch64/fpu/Makefile (libmvec-support): Add libmvec_double_vlen2_exp, libmvec_float_vlen4_expf to list. (libmvec_nonshared.a): Use make-dummy-lib. (libmvec-tests): Add double-vlen2, float-vlen4 to list. (double-vlen2-funcs): Add new vector function name. (float-vlen4-funcs): Add new vector function name. * sysdeps/aarch64/fpu/Versions: New file. * sysdeps/aarch64/fpu/libmvec_double_vlen2.h: New file. * sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S: New file. * sysdeps/aarch64/fpu/libmvec_float_vlen4.h: New file. * sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S: New file. * sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c: New file. * sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c: New file. * sysdeps/aarch64/libm-test-ulps (exp_vlen2): New entry. (exp_vlen4): Likewise. * sysdeps/unix/sysv/linux/aarch64/libmvec.abilist: New file. --- sysdeps/aarch64/configure | 31 +++++++++++ sysdeps/aarch64/configure.ac | 24 +++++++++ sysdeps/aarch64/fpu/Makefile | 19 +++++++ sysdeps/aarch64/fpu/Versions | 5 ++ sysdeps/aarch64/fpu/libmvec_double_vlen2.h | 59 +++++++++++++++++++++ sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S | 21 ++++++++ sysdeps/aarch64/fpu/libmvec_float_vlen4.h | 65 ++++++++++++++++++++++++ sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S | 21 ++++++++ sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c | 28 ++++++++++ sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c | 28 ++++++++++ sysdeps/aarch64/libm-test-ulps | 6 +++ sysdeps/unix/sysv/linux/aarch64/libmvec.abilist | 2 + 12 files changed, 309 insertions(+) create mode 100644 sysdeps/aarch64/fpu/Versions create mode 100644 sysdeps/aarch64/fpu/libmvec_double_vlen2.h create mode 100644 sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S create mode 100644 sysdeps/aarch64/fpu/libmvec_float_vlen4.h create mode 100644 sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S create mode 100644 sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c create mode 100644 sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c create mode 100644 sysdeps/unix/sysv/linux/aarch64/libmvec.abilist diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure index 5bd355a691..df15cdb02a 100644 --- a/sysdeps/aarch64/configure +++ b/sysdeps/aarch64/configure @@ -172,3 +172,34 @@ else config_vars="$config_vars default-abi = lp64" fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pcs attribute support" >&5 +$as_echo_n "checking for pcs attribute support... " >&6; } +if ${libc_cv_gcc_pcs_attribute+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat > conftest.c <&5 \ + 2>&5 ; then + libc_cv_gcc_pcs_attribute=yes +fi +rm -f conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_gcc_pcs_attribute" >&5 +$as_echo "$libc_cv_gcc_pcs_attribute" >&6; } + +# Enable libmvec by default. +if test x"$build_mathvec" = xnotset; then + build_mathvec=yes +fi + +# Only test libmvec if the compiler supports aarch64_vector_pcs. +if test x"$build_mathvec" = xyes; then + if test $libc_cv_gcc_pcs_attribute = yes; then + config_vars="$config_vars +test-mathvec = yes" + fi +fi diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac index 7851dd4dac..eab411cad4 100644 --- a/sysdeps/aarch64/configure.ac +++ b/sysdeps/aarch64/configure.ac @@ -20,3 +20,27 @@ if test $libc_cv_aarch64_be = yes; then else LIBC_CONFIG_VAR([default-abi], [lp64]) fi + +AC_CACHE_CHECK([for pcs attribute support], + libc_cv_gcc_pcs_attribute, [dnl +cat > conftest.c <&AS_MESSAGE_LOG_FD \ + 2>&AS_MESSAGE_LOG_FD ; then + libc_cv_gcc_pcs_attribute=yes +fi +rm -f conftest*]) + +# Enable libmvec by default. +if test x"$build_mathvec" = xnotset; then + build_mathvec=yes +fi + +# Only test libmvec if the compiler supports aarch64_vector_pcs. +if test x"$build_mathvec" = xyes; then + if test $libc_cv_gcc_pcs_attribute = yes; then + LIBC_CONFIG_VAR([test-mathvec], [yes]) + fi +fi diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile index 4a182bd6d6..2841c03c14 100644 --- a/sysdeps/aarch64/fpu/Makefile +++ b/sysdeps/aarch64/fpu/Makefile @@ -12,3 +12,22 @@ CFLAGS-s_fmaxf.c += -ffinite-math-only CFLAGS-s_fmin.c += -ffinite-math-only CFLAGS-s_fminf.c += -ffinite-math-only endif + +ifeq ($(subdir),mathvec) +libmvec-support += \ + libmvec_double_vlen2_exp \ + libmvec_float_vlen4_expf \ + +install-lib += libmvec_nonshared.a +$(objpfx)libmvec_nonshared.a: $(dep-dummy-lib); $(make-dummy-lib) +endif + +ifeq ($(subdir),math) +ifeq ($(build-mathvec),yes) +double-vlen2-funcs = exp +float-vlen4-funcs = exp +ifeq ($(test-mathvec),yes) +libmvec-tests += double-vlen2 float-vlen4 +endif +endif +endif diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions new file mode 100644 index 0000000000..da36f3c495 --- /dev/null +++ b/sysdeps/aarch64/fpu/Versions @@ -0,0 +1,5 @@ +libmvec { + GLIBC_2.30 { + _ZGVnN2v_exp; _ZGVnN4v_expf; + } +} diff --git a/sysdeps/aarch64/fpu/libmvec_double_vlen2.h b/sysdeps/aarch64/fpu/libmvec_double_vlen2.h new file mode 100644 index 0000000000..383980d6ef --- /dev/null +++ b/sysdeps/aarch64/fpu/libmvec_double_vlen2.h @@ -0,0 +1,59 @@ +/* Double-precision 2 element vector function template. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +ENTRY (VECTOR_FUNCTION) + stp x29, x30, [sp, -288]! + cfi_adjust_cfa_offset (288) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + mov x29, sp + stp q8, q9, [sp, 16] + stp q10, q11, [sp, 48] + stp q12, q13, [sp, 80] + stp q14, q15, [sp, 112] + stp q16, q17, [sp, 144] + stp q18, q19, [sp, 176] + stp q20, q21, [sp, 208] + stp q22, q23, [sp, 240] + + // Use per lane load/store to avoid endianness issues. + str q0, [sp, 272] + ldr d0, [sp, 272] + bl SCALAR_FUNCTION + str d0, [sp, 272] + ldr d0, [sp, 280] + bl SCALAR_FUNCTION + str d0, [sp, 280] + ldr q0, [sp, 272] + + ldp q8, q9, [sp, 16] + ldp q10, q11, [sp, 48] + ldp q12, q13, [sp, 80] + ldp q14, q15, [sp, 112] + ldp q16, q17, [sp, 144] + ldp q18, q19, [sp, 176] + ldp q20, q21, [sp, 208] + ldp q22, q23, [sp, 240] + ldp x29, x30, [sp], 288 + cfi_adjust_cfa_offset (288) + cfi_restore (x29) + cfi_restore (x30) + ret +END (VECTOR_FUNCTION) diff --git a/sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S b/sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S new file mode 100644 index 0000000000..644405cc4f --- /dev/null +++ b/sysdeps/aarch64/fpu/libmvec_double_vlen2_exp.S @@ -0,0 +1,21 @@ +/* Double-precision 2 element vector e^x function. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define SCALAR_FUNCTION exp +#define VECTOR_FUNCTION _ZGVnN2v_exp +#include "libmvec_double_vlen2.h" diff --git a/sysdeps/aarch64/fpu/libmvec_float_vlen4.h b/sysdeps/aarch64/fpu/libmvec_float_vlen4.h new file mode 100644 index 0000000000..2450309c13 --- /dev/null +++ b/sysdeps/aarch64/fpu/libmvec_float_vlen4.h @@ -0,0 +1,65 @@ +/* Single-precision 4 element vector function template. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +ENTRY (VECTOR_FUNCTION) + stp x29, x30, [sp, -288]! + cfi_adjust_cfa_offset (288) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + mov x29, sp + stp q8, q9, [sp, 16] + stp q10, q11, [sp, 48] + stp q12, q13, [sp, 80] + stp q14, q15, [sp, 112] + stp q16, q17, [sp, 144] + stp q18, q19, [sp, 176] + stp q20, q21, [sp, 208] + stp q22, q23, [sp, 240] + + // Use per lane load/store to avoid endianness issues. + str q0, [sp, 272] + ldr s0, [sp, 272] + bl SCALAR_FUNCTION + str s0, [sp, 272] + ldr s0, [sp, 276] + bl SCALAR_FUNCTION + str s0, [sp, 276] + ldr s0, [sp, 280] + bl SCALAR_FUNCTION + str s0, [sp, 280] + ldr s0, [sp, 284] + bl SCALAR_FUNCTION + str s0, [sp, 284] + ldr q0, [sp, 272] + + ldp q8, q9, [sp, 16] + ldp q10, q11, [sp, 48] + ldp q12, q13, [sp, 80] + ldp q14, q15, [sp, 112] + ldp q16, q17, [sp, 144] + ldp q18, q19, [sp, 176] + ldp q20, q21, [sp, 208] + ldp q22, q23, [sp, 240] + ldp x29, x30, [sp], 288 + cfi_adjust_cfa_offset (288) + cfi_restore (x29) + cfi_restore (x30) + ret +END (VECTOR_FUNCTION) diff --git a/sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S b/sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S new file mode 100644 index 0000000000..ab76ea0c77 --- /dev/null +++ b/sysdeps/aarch64/fpu/libmvec_float_vlen4_expf.S @@ -0,0 +1,21 @@ +/* Single-precision 4 element vector e^x function. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define SCALAR_FUNCTION expf +#define VECTOR_FUNCTION _ZGVnN4v_expf +#include "libmvec_float_vlen4.h" diff --git a/sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c b/sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c new file mode 100644 index 0000000000..6c6c44d6b5 --- /dev/null +++ b/sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c @@ -0,0 +1,28 @@ +/* Wrapper part of tests for aarch64 double vector math functions. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "test-double-vlen2.h" + +#define VEC_TYPE float64x2_t + +/* Hack: VECTOR_WRAPPER declares the vector function without the pcs attribute, + placing it here happens to work, should be fixed in test-math-vector.h. */ +__attribute__ ((aarch64_vector_pcs)) + +VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVnN2v_exp) diff --git a/sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c b/sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c new file mode 100644 index 0000000000..5117633f1f --- /dev/null +++ b/sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c @@ -0,0 +1,28 @@ +/* Wrapper part of tests for float aarch64 vector math functions. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "test-float-vlen4.h" + +#define VEC_TYPE float32x4_t + +/* Hack: VECTOR_WRAPPER declares the vector function without the pcs attribute, + placing it here happens to work, should be fixed in test-math-vector.h. */ +__attribute__ ((aarch64_vector_pcs)) + +VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVnN4v_expf) diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps index 585e5bbce7..1ed4af9e55 100644 --- a/sysdeps/aarch64/libm-test-ulps +++ b/sysdeps/aarch64/libm-test-ulps @@ -1601,6 +1601,12 @@ float: 1 idouble: 1 ifloat: 1 +Function: "exp_vlen2": +double: 1 + +Function: "exp_vlen4": +float: 1 + Function: "expm1": double: 1 float: 1 diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist new file mode 100644 index 0000000000..9e178253f7 --- /dev/null +++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist @@ -0,0 +1,2 @@ +GLIBC_2.30 _ZGVnN2v_exp F +GLIBC_2.30 _ZGVnN4v_expf F -- cgit 1.4.1