From c9a8c526acd185176e486bee4624039740f8c435 Mon Sep 17 00:00:00 2001
From: Andrew Senkevich <andrew.senkevich@intel.com>
Date: Thu, 18 Jun 2015 17:55:55 +0300
Subject: Vector sincos for x86_64 and tests.

Here is implementation of vectorized sincos containing SSE, AVX,
AVX2 and AVX512 versions according to Vector ABI
<https://groups.google.com/forum/#!topic/x86-64-abi/LmppCfN1rZ4>.

    * NEWS: Mention addition of x86_64 vector sincos.
    * bits/libm-simd-decl-stubs.h: Added stubs for sincos.
    * math/math.h (__MATHDECL_VEC): New macro.
    * math/bits/mathcalls.h: Added sincos declaration with __MATHDECL_VEC.
    * math/gen-libm-have-vector-test.sh: Added generation of sincos wrapper
    declaration under condition.
    * math/test-vec-loop.h (TEST_VEC_LOOP): Refactored.
    * math/test-double-vlen2.h: Added wrapper for sincos tests, reflected
    TEST_VEC_LOOP change.
    * math/test-double-vlen4.h: Likewise.
    * math/test-double-vlen8.h: Likewise.
    * math/test-float-vlen16.h: Reflected TEST_VEC_LOOP change.
    * math/test-float-vlen4.h: Likewise.
    * math/test-float-vlen8.h: Likewise.
    * sysdeps/unix/sysv/linux/x86_64/libmvec.abilist: New symbols added.
    * sysdeps/x86/fpu/bits/math-vector.h: Added sincos SIMD declaration.
    * sysdeps/x86_64/fpu/Makefile (libmvec-support): Added new files.
    * sysdeps/x86_64/fpu/Versions: New versions added.
    * sysdeps/x86_64/fpu/libm-test-ulps: Regenerated.
    * sysdeps/x86_64/fpu/multiarch/Makefile (libmvec-sysdep_routines):
    Added build of SSE, AVX2 and AVX512 IFUNC versions.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S: New file.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S: New file.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S: New file.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S: New file.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S: New file.
    * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: New file.
    * sysdeps/x86_64/fpu/svml_d_sincos2_core.S: New file.
    * sysdeps/x86_64/fpu/svml_d_sincos4_core.S: New file.
    * sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S: New file.
    * sysdeps/x86_64/fpu/svml_d_sincos8_core.S: New file.
    * sysdeps/x86_64/fpu/svml_d_sincos_data.S: New file.
    * sysdeps/x86_64/fpu/svml_d_sincos_data.h: New file.
    * sysdeps/x86_64/fpu/svml_d_wrapper_impl.h: Added wrappers for sincos.
    * sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c: Vector sincos tests.
    * sysdeps/x86_64/fpu/test-double-vlen2.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-vlen4-avx2.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-vlen4.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c: Likewise.
    * sysdeps/x86_64/fpu/test-double-vlen8.c: Likewise.
---
 math/bits/mathcalls.h             |  4 ++--
 math/gen-libm-have-vector-test.sh |  8 +++++++-
 math/math.h                       |  4 ++++
 math/test-double-vlen2.h          | 23 +++++++++++++++++++++--
 math/test-double-vlen4.h          | 23 +++++++++++++++++++++--
 math/test-double-vlen8.h          | 23 +++++++++++++++++++++--
 math/test-float-vlen16.h          |  6 ++++--
 math/test-float-vlen4.h           |  6 ++++--
 math/test-float-vlen8.h           |  6 ++++--
 math/test-vec-loop.h              |  8 ++++----
 10 files changed, 92 insertions(+), 19 deletions(-)

(limited to 'math')

diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h
index f297aa7906..a881b7d69c 100644
--- a/math/bits/mathcalls.h
+++ b/math/bits/mathcalls.h
@@ -78,8 +78,8 @@ _Mdouble_END_NAMESPACE
 
 #ifdef __USE_GNU
 /* Cosine and sine of X.  */
-__MATHDECL (void,sincos,,
-	    (_Mdouble_ __x, _Mdouble_ *__sinx, _Mdouble_ *__cosx));
+__MATHDECL_VEC (void,sincos,,
+		(_Mdouble_ __x, _Mdouble_ *__sinx, _Mdouble_ *__cosx));
 #endif
 
 #if defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99
diff --git a/math/gen-libm-have-vector-test.sh b/math/gen-libm-have-vector-test.sh
index 8c36078662..529ea0c1ac 100644
--- a/math/gen-libm-have-vector-test.sh
+++ b/math/gen-libm-have-vector-test.sh
@@ -33,7 +33,7 @@ print_defs()
   echo
 }
 
-for func in $(cat libm-test.inc | grep ALL_RM_TEST | grep -v define | grep -v RUN_TEST_LOOP_ff_f | sed -r "s/.*\(//; s/,.*//" ); do
+for func in $(cat libm-test.inc | grep ALL_RM_TEST | grep -v define | grep -v RUN_TEST_LOOP_ff_f | grep -v RUN_TEST_LOOP_fFF_11 | sed -r "s/.*\(//; s/,.*//" ); do
   print_defs ${func}
   print_defs ${func}f
   print_defs ${func}l
@@ -45,6 +45,12 @@ for func in $(cat libm-test.inc | grep ALL_RM_TEST | grep RUN_TEST_LOOP_ff_f | s
   print_defs ${func}l "_ff"
 done
 
+for func in $(cat libm-test.inc | grep ALL_RM_TEST | grep RUN_TEST_LOOP_fFF_11 | sed -r "s/.*\(//; s/,.*//" ); do
+  print_defs ${func} "_fFF"
+  print_defs ${func}f "_fFF"
+  print_defs ${func}l "_fFF"
+done
+
 # When all functions will use ALL_RM_TEST instead of using START directly,
 # this code can be removed.
 for func in $(grep 'START.*;$' libm-test.inc | sed -r "s/.*\(//; s/,.*//"); do
diff --git a/math/math.h b/math/math.h
index 7e959fca9f..c5115d7982 100644
--- a/math/math.h
+++ b/math/math.h
@@ -58,6 +58,10 @@ __BEGIN_DECLS
   __SIMD_DECL (__MATH_PRECNAME (function, suffix)) \
   __MATHCALL (function, suffix, args)
 
+#define __MATHDECL_VEC(type, function,suffix, args) \
+  __SIMD_DECL (__MATH_PRECNAME (function, suffix)) \
+  __MATHDECL(type, function,suffix, args)
+
 #define __MATHCALL(function,suffix, args)	\
   __MATHDECL (_Mdouble_,function,suffix, args)
 #define __MATHDECL(type, function,suffix, args) \
diff --git a/math/test-double-vlen2.h b/math/test-double-vlen2.h
index 2e8415b16a..3f117e510f 100644
--- a/math/test-double-vlen2.h
+++ b/math/test-double-vlen2.h
@@ -44,6 +44,7 @@
 
 #define WRAPPER_DECL(function) extern FLOAT function (FLOAT);
 #define WRAPPER_DECL_ff(function) extern FLOAT function (FLOAT, FLOAT);
+#define WRAPPER_DECL_fFF(function) extern void function (FLOAT, FLOAT *, FLOAT *);
 
 // Wrapper from scalar to vector function with vector length 2.
 #define VECTOR_WRAPPER(scalar_func, vector_func) \
@@ -54,7 +55,8 @@ FLOAT scalar_func (FLOAT x)			\
   VEC_TYPE mx;					\
   INIT_VEC_LOOP (mx, x, 2);			\
   VEC_TYPE mr = vector_func (mx);		\
-  TEST_VEC_LOOP (2);				\
+  TEST_VEC_LOOP (mr, 2);			\
+  return ((FLOAT) mr[0]);			\
 }
 
 // Wrapper from scalar 2 argument function to vector one.
@@ -67,5 +69,22 @@ FLOAT scalar_func (FLOAT x, FLOAT y)		\
   INIT_VEC_LOOP (mx, x, 2);			\
   INIT_VEC_LOOP (my, y, 2);			\
   VEC_TYPE mr = vector_func (mx, my);		\
-  TEST_VEC_LOOP (2);				\
+  TEST_VEC_LOOP (mr, 2);			\
+  return ((FLOAT) mr[0]);			\
+}
+
+// Wrapper from scalar 3 argument function to vector one.
+#define VECTOR_WRAPPER_fFF(scalar_func, vector_func) 	\
+extern void vector_func (VEC_TYPE, VEC_TYPE *, VEC_TYPE *);	\
+void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1)		\
+{						\
+  int i;					\
+  VEC_TYPE mx, mr, mr1;				\
+  INIT_VEC_LOOP (mx, x, 2);			\
+  vector_func (mx, &mr, &mr1);			\
+  TEST_VEC_LOOP (mr, 2);			\
+  TEST_VEC_LOOP (mr1, 2);			\
+  *r = (FLOAT) mr[0];				\
+  *r1 = (FLOAT) mr1[0];				\
+  return;					\
 }
diff --git a/math/test-double-vlen4.h b/math/test-double-vlen4.h
index 4fca9d80b0..ffaba6916e 100644
--- a/math/test-double-vlen4.h
+++ b/math/test-double-vlen4.h
@@ -44,6 +44,7 @@
 
 #define WRAPPER_DECL(function) extern FLOAT function (FLOAT);
 #define WRAPPER_DECL_ff(function) extern FLOAT function (FLOAT, FLOAT);
+#define WRAPPER_DECL_fFF(function) extern void function (FLOAT, FLOAT *, FLOAT *);
 
 // Wrapper from scalar to vector function with vector length 4.
 #define VECTOR_WRAPPER(scalar_func, vector_func) \
@@ -54,7 +55,8 @@ FLOAT scalar_func (FLOAT x)			\
   VEC_TYPE mx;					\
   INIT_VEC_LOOP (mx, x, 4);			\
   VEC_TYPE mr = vector_func (mx);		\
-  TEST_VEC_LOOP (4);				\
+  TEST_VEC_LOOP (mr, 4);			\
+  return ((FLOAT) mr[0]);			\
 }
 
 // Wrapper from scalar 2 argument function to vector one.
@@ -67,5 +69,22 @@ FLOAT scalar_func (FLOAT x, FLOAT y)		\
   INIT_VEC_LOOP (mx, x, 4);			\
   INIT_VEC_LOOP (my, y, 4);			\
   VEC_TYPE mr = vector_func (mx, my);		\
-  TEST_VEC_LOOP (4);				\
+  TEST_VEC_LOOP (mr, 4);			\
+  return ((FLOAT) mr[0]);			\
+}
+
+// Wrapper from scalar 3 argument function to vector one.
+#define VECTOR_WRAPPER_fFF(scalar_func, vector_func) 	\
+extern void vector_func (VEC_TYPE, VEC_TYPE *, VEC_TYPE *);	\
+void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1)		\
+{						\
+  int i;					\
+  VEC_TYPE mx, mr, mr1;				\
+  INIT_VEC_LOOP (mx, x, 4);			\
+  vector_func (mx, &mr, &mr1);			\
+  TEST_VEC_LOOP (mr, 4);			\
+  TEST_VEC_LOOP (mr1, 4);			\
+  *r = (FLOAT) mr[0];				\
+  *r1 = (FLOAT) mr1[0];				\
+  return;					\
 }
diff --git a/math/test-double-vlen8.h b/math/test-double-vlen8.h
index 6780e81513..bdfccbb4ff 100644
--- a/math/test-double-vlen8.h
+++ b/math/test-double-vlen8.h
@@ -44,6 +44,7 @@
 
 #define WRAPPER_DECL(function) extern FLOAT function (FLOAT);
 #define WRAPPER_DECL_ff(function) extern FLOAT function (FLOAT, FLOAT);
+#define WRAPPER_DECL_fFF(function) extern void function (FLOAT, FLOAT *, FLOAT *);
 
 // Wrapper from scalar to vector function with vector length 8.
 #define VECTOR_WRAPPER(scalar_func, vector_func) \
@@ -54,7 +55,8 @@ FLOAT scalar_func (FLOAT x)			\
   VEC_TYPE mx;					\
   INIT_VEC_LOOP (mx, x, 8);			\
   VEC_TYPE mr = vector_func (mx);		\
-  TEST_VEC_LOOP (8);				\
+  TEST_VEC_LOOP (mr, 8);			\
+  return ((FLOAT) mr[0]);                       \
 }
 
 // Wrapper from scalar 2 argument function to vector one.
@@ -67,5 +69,22 @@ FLOAT scalar_func (FLOAT x, FLOAT y)		\
   INIT_VEC_LOOP (mx, x, 8);			\
   INIT_VEC_LOOP (my, y, 8);			\
   VEC_TYPE mr = vector_func (mx, my);		\
-  TEST_VEC_LOOP (8);				\
+  TEST_VEC_LOOP (mr, 8);			\
+  return ((FLOAT) mr[0]);                       \
+}
+
+// Wrapper from scalar 3 argument function to vector one.
+#define VECTOR_WRAPPER_fFF(scalar_func, vector_func) 	\
+extern void vector_func (VEC_TYPE, VEC_TYPE *, VEC_TYPE *);	\
+void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1)		\
+{						\
+  int i;					\
+  VEC_TYPE mx, mr, mr1;				\
+  INIT_VEC_LOOP (mx, x, 8);			\
+  vector_func (mx, &mr, &mr1);			\
+  TEST_VEC_LOOP (mr, 8);			\
+  TEST_VEC_LOOP (mr1, 8);			\
+  *r = (FLOAT) mr[0];				\
+  *r1 = (FLOAT) mr1[0];				\
+  return;					\
 }
diff --git a/math/test-float-vlen16.h b/math/test-float-vlen16.h
index 008e15ea13..802ae7bda2 100644
--- a/math/test-float-vlen16.h
+++ b/math/test-float-vlen16.h
@@ -54,7 +54,8 @@ FLOAT scalar_func (FLOAT x)			\
   VEC_TYPE mx;					\
   INIT_VEC_LOOP (mx, x, 16);			\
   VEC_TYPE mr = vector_func (mx);		\
-  TEST_VEC_LOOP (16);				\
+  TEST_VEC_LOOP (mr, 16);			\
+  return ((FLOAT) mr[0]);			\
 }
 
 // Wrapper from scalar 2 argument function to vector one.
@@ -67,5 +68,6 @@ FLOAT scalar_func (FLOAT x, FLOAT y)		\
   INIT_VEC_LOOP (mx, x, 16);			\
   INIT_VEC_LOOP (my, y, 16);			\
   VEC_TYPE mr = vector_func (mx, my);		\
-  TEST_VEC_LOOP (16);				\
+  TEST_VEC_LOOP (mr, 16);			\
+  return ((FLOAT) mr[0]);			\
 }
diff --git a/math/test-float-vlen4.h b/math/test-float-vlen4.h
index eaf4b4b13e..f5e530b756 100644
--- a/math/test-float-vlen4.h
+++ b/math/test-float-vlen4.h
@@ -54,7 +54,8 @@ FLOAT scalar_func (FLOAT x)			\
   VEC_TYPE mx;					\
   INIT_VEC_LOOP (mx, x, 4);			\
   VEC_TYPE mr = vector_func (mx);		\
-  TEST_VEC_LOOP (4);				\
+  TEST_VEC_LOOP (mr, 4);			\
+  return ((FLOAT) mr[0]);			\
 }
 
 // Wrapper from scalar 2 argument function to vector one.
@@ -67,5 +68,6 @@ FLOAT scalar_func (FLOAT x, FLOAT y)		\
   INIT_VEC_LOOP (mx, x, 4);			\
   INIT_VEC_LOOP (my, y, 4);			\
   VEC_TYPE mr = vector_func (mx, my);		\
-  TEST_VEC_LOOP (4);				\
+  TEST_VEC_LOOP (mr, 4);			\
+  return ((FLOAT) mr[0]);			\
 }
diff --git a/math/test-float-vlen8.h b/math/test-float-vlen8.h
index 1a2eb52318..697849f070 100644
--- a/math/test-float-vlen8.h
+++ b/math/test-float-vlen8.h
@@ -54,7 +54,8 @@ FLOAT scalar_func (FLOAT x)			\
   VEC_TYPE mx;					\
   INIT_VEC_LOOP (mx, x, 8);			\
   VEC_TYPE mr = vector_func (mx);		\
-  TEST_VEC_LOOP (8);				\
+  TEST_VEC_LOOP (mr, 8);			\
+  return ((FLOAT) mr[0]);			\
 }
 
 // Wrapper from scalar 2 argument function to vector one.
@@ -67,5 +68,6 @@ FLOAT scalar_func (FLOAT x, FLOAT y)		\
   INIT_VEC_LOOP (mx, x, 8);			\
   INIT_VEC_LOOP (my, y, 8);			\
   VEC_TYPE mr = vector_func (mx, my);		\
-  TEST_VEC_LOOP (8);				\
+  TEST_VEC_LOOP (mr, 8);			\
+  return ((FLOAT) mr[0]);			\
 }
diff --git a/math/test-vec-loop.h b/math/test-vec-loop.h
index 1a76c3ed58..66d7692c22 100644
--- a/math/test-vec-loop.h
+++ b/math/test-vec-loop.h
@@ -17,17 +17,17 @@
    <http://www.gnu.org/licenses/>.  */
 
 /* This macro is used in VECTOR_WRAPPER macros for vector tests.  */
-#define TEST_VEC_LOOP(len) 					\
+#define TEST_VEC_LOOP(vec, len) 				\
   do								\
     {								\
       for (i = 1; i < len; i++)					\
         {							\
-          if ((FLOAT) mr[0] != (FLOAT) mr[i])			\
+          if ((FLOAT) vec[0] != (FLOAT) vec[i])			\
             {							\
-              return ((FLOAT) mr[0] + 0.1);			\
+              vec[0] = (FLOAT) vec[0] + 0.1;			\
+	      break;						\
             }							\
         }							\
-      return ((FLOAT) mr[0]);					\
     }								\
   while (0)
 
-- 
cgit 1.4.1