From 5648ca0d46d1e9509b445f029b9bb36dd4886007 Mon Sep 17 00:00:00 2001
From: Bert Tenjy <bert.tenjy@gmail.com>
Date: Sat, 6 Apr 2019 21:11:52 +0000
Subject: PPC64: Add libmvec SIMD double-precision sincos function [BZ #24207]

Implements double-precision vector sincos function. The polynomial approxima-
ting algorithm is adapted for PPC64 from x86_64 [commit #c9a8c526ac].

The patch has been tested on PPC64/POWER8 Little Endian and Big Endian.
Testing uses the framework created for libmvec on x86_64 which runs tests on
issuing 'make check'. Tests of the new vector sincos function all pass.

Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
---
 NEWS                                               |   1 +
 sysdeps/powerpc/bits/math-vector.h                 |   2 +
 sysdeps/powerpc/fpu/libm-test-ulps                 |   3 +
 sysdeps/powerpc/powerpc64/fpu/Versions             |   1 +
 sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile   |   6 +-
 .../fpu/multiarch/test-double-vlen2-wrappers.c     |   2 +
 .../powerpc64/fpu/multiarch/vec_d_sincos2_vsx.c    |  33 ++++
 .../powerpc64/fpu/multiarch/vec_d_sincos2_vsx.h    | 169 +++++++++++++++++++++
 .../sysv/linux/powerpc/powerpc64/libmvec.abilist   |   1 +
 9 files changed, 216 insertions(+), 2 deletions(-)
 create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.c
 create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.h

diff --git a/NEWS b/NEWS
index 4a7e434330..08a11d4fee 100644
--- a/NEWS
+++ b/NEWS
@@ -264,6 +264,7 @@ Major new features:
   - single-precision cosine: cosf
   - double-precision sine: sin
   - single-precision sine: sinf
+  - double-precision sincos: sincos
 
   GCC support for auto-vectorization of functions on PPC64 is not yet
   available. Until that is done, the new vector math functions are
diff --git a/sysdeps/powerpc/bits/math-vector.h b/sysdeps/powerpc/bits/math-vector.h
index 78d9db64bf..52b4b09024 100644
--- a/sysdeps/powerpc/bits/math-vector.h
+++ b/sysdeps/powerpc/bits/math-vector.h
@@ -36,6 +36,8 @@
 # ifdef __DECL_SIMD_PPC64
 #  undef __DECL_SIMD_cos
 #  define __DECL_SIMD_cos __DECL_SIMD_PPC64
+#  undef __DECL_SIMD_sincos
+#  define __DECL_SIMD_sincos __DECL_SIMD_PPC64
 
 # endif
 #endif
diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps
index 32a7a8483c..14c82e4995 100644
--- a/sysdeps/powerpc/fpu/libm-test-ulps
+++ b/sysdeps/powerpc/fpu/libm-test-ulps
@@ -2616,6 +2616,9 @@ ifloat128: 3
 ildouble: 7
 ldouble: 7
 
+Function: "sincos_vlen2":
+double: 2
+
 Function: "sinh":
 double: 2
 float: 2
diff --git a/sysdeps/powerpc/powerpc64/fpu/Versions b/sysdeps/powerpc/powerpc64/fpu/Versions
index f7c8fd886b..8ab0f00f18 100644
--- a/sysdeps/powerpc/powerpc64/fpu/Versions
+++ b/sysdeps/powerpc/powerpc64/fpu/Versions
@@ -1,5 +1,6 @@
 libmvec {
   GLIBC_2.30 {
     _ZGVbN2v_cos; _ZGVbN4v_cosf; _ZGVbN2v_sin; _ZGVbN4v_sinf;
+    _ZGVbN2vvv_sincos;
   }
 }
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
index b821641bb9..454631b738 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
@@ -1,10 +1,12 @@
 ifeq ($(subdir),mathvec)
 libmvec-sysdep_routines += vec_d_cos2_vsx vec_s_cosf4_vsx \
-			   vec_d_sin2_vsx vec_s_sinf4_vsx
+			   vec_d_sin2_vsx vec_s_sinf4_vsx \
+			   vec_d_sincos2_vsx
 CFLAGS-vec_d_cos2_vsx.c += -mabi=altivec -maltivec -mvsx
 CFLAGS-vec_s_cosf4_vsx.c += -mabi=altivec -maltivec -mvsx
 CFLAGS-vec_d_sin2_vsx.c += -mabi=altivec -maltivec -mvsx
 CFLAGS-vec_s_sinf4_vsx.c += -mabi=altivec -maltivec -mvsx
+CFLAGS-vec_d_sincos2_vsx.c += -mabi=altivec -maltivec -mvsx
 endif
 
 # Variables for libmvec tests.
@@ -12,7 +14,7 @@ ifeq ($(subdir),math)
 ifeq ($(build-mathvec),yes)
 libmvec-tests += double-vlen2 float-vlen4
 
-double-vlen2-funcs = cos sin
+double-vlen2-funcs = cos sin sincos
 float-vlen4-funcs = cos sin
 
 double-vlen2-arch-ext-cflags = -mabi=altivec -maltivec -mvsx -DREQUIRE_VSX
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-double-vlen2-wrappers.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-double-vlen2-wrappers.c
index 10a1ec281b..7082a3500e 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-double-vlen2-wrappers.c
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-double-vlen2-wrappers.c
@@ -23,3 +23,5 @@
 
 VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVbN2v_cos)
 VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVbN2v_sin)
+
+VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVbN2vvv_sincos)
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.c
new file mode 100644
index 0000000000..0dbde4bbd3
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.c
@@ -0,0 +1,33 @@
+/* Function sincos vectorized with VSX.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+#include "vec_d_trig_data.h"
+#include "vec_d_sincos2_vsx.h"
+
+void
+_ZGVbN2vvv_sincos (vector double x, vector double * sines_x, vector double * cosines_x)
+{
+
+  /* Call vector sine evaluator.  */
+  *sines_x = __d_sin_poly_eval(x);
+
+  /* Call vector cosine evaluator.  */
+  *cosines_x = __d_cos_poly_eval(x);
+
+} /* Function _ZGVbN2_vvv_sincos.  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.h b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.h
new file mode 100644
index 0000000000..1ff04528d2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.h
@@ -0,0 +1,169 @@
+/* Definitions to simplify code by allowing reuse of sine and cosine
+   function implementations.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+#include "vec_d_trig_data.h"
+
+static inline vector double
+__d_cos_poly_eval (vector double x)
+{
+
+  /*
+   ARGUMENT RANGE REDUCTION:
+   Add Pi/2 to argument: X' = X+Pi/2.  */
+  vector double x_prime = (vector double) __d_half_pi + x;
+
+  /* Get absolute argument value: X' = |X'|.  */
+  vector double abs_x_prime = vec_abs (x_prime);
+
+  /* Y = X'*InvPi + RS : right shifter add.  */
+  vector double y = (x_prime * __d_inv_pi) + __d_rshifter;
+
+  /* Check for large arguments path.  */
+  vector bool long long large_in = vec_cmpgt (abs_x_prime, __d_rangeval);
+
+  /* N = Y - RS : right shifter sub.  */
+  vector double n = y - __d_rshifter;
+
+  /* SignRes = Y<<63 : shift LSB to MSB place for result sign.  */
+  vector double sign_res = (vector double) vec_sl ((vector long long) y,
+						   (vector unsigned long long)
+						   vec_splats (63));
+
+  /* N = N - 0.5.  */
+  n = n - __d_one_half;
+
+  /* R = X - N*Pi1.  */
+  vector double r = x - (n * __d_pi1_fma);
+
+  /* R = R - N*Pi2.  */
+  r = r - (n * __d_pi2_fma);
+
+  /* R = R - N*Pi3.  */
+  r = r - (n * __d_pi3_fma);
+
+  /* R2 = R*R.  */
+  vector double r2 = r * r;
+
+  /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))).  */
+  vector double poly = r2 * __d_coeff7 + __d_coeff6;
+  poly = poly * r2 + __d_coeff5;
+  poly = poly * r2 + __d_coeff4;
+  poly = poly * r2 + __d_coeff3;
+
+  /* Poly = R+R*(R2*(C1+R2*(C2+R2*Poly))).  */
+  poly = poly * r2 + __d_coeff2;
+  poly = poly * r2 + __d_coeff1;
+  poly = poly * r2 * r + r;
+
+  /*
+     RECONSTRUCTION:
+     Final sign setting: Res = Poly^SignRes.  */
+  vector double out
+    = (vector double) ((vector long long) poly ^ (vector long long) sign_res);
+
+  if (large_in[0] != 0)
+    out[0] = cos (x[0]);
+
+  if (large_in[1] != 0)
+    out[1] = cos (x[1]);
+
+  return out;
+
+} /* Function __d_cos_poly_eval.  */
+
+static inline vector double
+__d_sin_poly_eval (vector double x)
+{
+  /* ALGORITHM DESCRIPTION:
+
+      ( low accuracy ( < 4ulp ) or enhanced performance
+      ( half of correct mantissa ) implementation )
+
+     Argument representation:
+     arg = N*Pi + R
+
+     Result calculation:
+     sin(arg) = sin(N*Pi + R) = (-1)^N * sin(R)
+     sin(R) is approximated by corresponding polynomial.  */
+
+  /* ARGUMENT RANGE REDUCTION: X' = |X|.  */
+  vector double abs_x_prime = vec_abs (x);
+
+  /* Y = X'*InvPi + RS : right shifter add.  */
+  vector double y = (abs_x_prime * __d_inv_pi) + __d_rshifter;
+
+  /* N = Y - RS : right shifter sub.  */
+  vector double n = y - __d_rshifter;
+
+  /* SignRes = Y<<63 : shift LSB to MSB place for result sign.  */
+  vector double sign_res = (vector double) vec_sl ((vector long long) y,
+                                                   (vector unsigned long long)
+                                                   vec_splats (63));
+
+  /* Check for large arguments path.  */
+  vector bool long long large_in = vec_cmpgt (abs_x_prime, __d_rangeval);
+
+  /* R = X' - N*Pi1.  */
+  vector double r = abs_x_prime - (n * __d_pi1_fma);
+
+  /* R = R - N*Pi2.  */
+  r = r - (n * __d_pi2_fma);
+
+  /* R = R - N*Pi3.  */
+  r = r - (n * __d_pi3_fma);
+
+  /* POLYNOMIAL APPROXIMATION: R2 = R*R.  */
+  vector double r2 = r * r;
+
+  /* R = R^SignRes : update sign of reduced argument.  */
+  vector double r_sign
+      = (vector double) ((vector long long) r ^ (vector long long) sign_res);
+
+  /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))).  */
+  vector double poly = r2 * __d_coeff7_sin + __d_coeff6_sin;
+  poly = poly * r2 + __d_coeff5_sin;
+  poly = poly * r2 + __d_coeff4_sin;
+  poly = poly * r2 + __d_coeff3_sin;
+
+  /* Poly = R2*(C1+R2*(C2+R2*Poly)).  */
+  poly = poly * r2 + __d_coeff2_sin;
+  poly = poly * r2 + __d_coeff1_sin;
+  poly = poly * r2;
+
+  /* Poly = Poly*R + R.  */
+  poly = poly * r_sign + r_sign;
+
+  /* SignX: -ve sign bit of X.  */
+  vector double neg_sign
+      = (vector double) vec_andc ((vector bool long long) x, __d_abs_mask);
+
+  /* RECONSTRUCTION: Final sign setting: Res = Poly^SignX.  */
+  vector double out
+      = (vector double) ((vector long long) poly ^ (vector long long) neg_sign);
+
+  if (large_in[0] != 0)
+    out[0] = sin (x[0]);
+
+  if (large_in[1] != 0)
+    out[1] = sin (x[1]);
+
+  return out;
+
+} /* Function __d_sin_poly_eval.  */
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
index 48a742c3ef..0337d3adb5 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
@@ -1,4 +1,5 @@
 GLIBC_2.30 _ZGVbN2v_cos F
 GLIBC_2.30 _ZGVbN2v_sin F
+GLIBC_2.30 _ZGVbN2vvv_sincos F
 GLIBC_2.30 _ZGVbN4v_cosf F
 GLIBC_2.30 _ZGVbN4v_sinf F
-- 
cgit 1.4.1