about summary refs log tree commit diff
path: root/sysdeps/x86/fpu
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2017-06-26 22:01:27 +0000
committerJoseph Myers <joseph@codesourcery.com>2017-06-26 22:02:24 +0000
commitc86ed71d633c22d6f638576f7660c52a5f783d66 (patch)
tree610b82948e239aed3d612a99c01318dedf497422 /sysdeps/x86/fpu
parent3f823e87ccbf3723eb4eeb63b0619f1a0ceb174e (diff)
downloadglibc-c86ed71d633c22d6f638576f7660c52a5f783d66.tar.gz
glibc-c86ed71d633c22d6f638576f7660c52a5f783d66.tar.xz
glibc-c86ed71d633c22d6f638576f7660c52a5f783d66.zip
Add float128 support for x86_64, x86.
This patch enables float128 support for x86_64 and x86.  All GCC
versions that can build glibc provide the required support, but since
GCC 6 and before don't provide __builtin_nanq / __builtin_nansq, sNaN
tests and some tests of NaN payloads need to be disabled with such
compilers (this does not affect the generated glibc binaries at all,
just the tests).  bits/floatn.h declares float128 support to be
available for GCC versions that provide the required libgcc support
(4.3 for x86_64, 4.4 for i386 GNU/Linux, 4.5 for i386 GNU/Hurd);
compilation-only support was present some time before then, but not
really useful without the libgcc functions.

fenv_private.h needed updating to avoid trying to put _Float128 values
in registers.  I make no assertion of optimality of the
math_opt_barrier / math_force_eval definitions for this case; they are
simply intended to be sufficient to work correctly.

Tested for x86_64 and x86, with GCC 7 and GCC 6.  (Testing for x32 was
compilation tests only with build-many-glibcs.py to verify the ABI
baseline updates.  I have not done any testing for Hurd, although the
float128 support is enabled there as for GNU/Linux.)

	* sysdeps/i386/Implies: Add ieee754/float128.
	* sysdeps/x86_64/Implies: Likewise.
	* sysdeps/x86/bits/floatn.h: New file.
	* sysdeps/x86/float128-abi.h: Likewise.
	* manual/math.texi (Mathematics): Document support for _Float128
	on x86_64 and x86.
	* sysdeps/i386/fpu/fenv_private.h: Include <bits/floatn.h>.
	(math_opt_barrier): Do not put _Float128 values in floating-point
	registers.
	(math_force_eval): Likewise.
	[__x86_64__] (SET_RESTORE_ROUNDF128): New macro.
	* sysdeps/x86/fpu/Makefile [$(subdir) = math] (CPPFLAGS): Append
	to Makefile variable.
	* sysdeps/x86/fpu/e_sqrtf128.c: New file.
	* sysdeps/x86/fpu/sfp-machine.h: Likewise.  Based on libgcc.
	* sysdeps/x86/math-tests.h: New file.
	* math/libm-test-support.h (XFAIL_FLOAT128_PAYLOAD): New macro.
	* math/libm-test-getpayload.inc (getpayload_test_data): Use
	XFAIL_FLOAT128_PAYLOAD.
	* math/libm-test-setpayload.inc (setpayload_test_data): Likewise.
	* math/libm-test-totalorder.inc (totalorder_test_data): Likewise.
	* math/libm-test-totalordermag.inc (totalordermag_test_data):
	Likewise.
	* sysdeps/unix/sysv/linux/i386/libc.abilist: Update.
	* sysdeps/unix/sysv/linux/i386/libm.abilist: Likewise.
	* sysdeps/unix/sysv/linux/x86_64/64/libc.abilist: Likewise.
	* sysdeps/unix/sysv/linux/x86_64/64/libm.abilist: Likewise.
	* sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist: Likewise.
	* sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist: Likewise.
	* sysdeps/i386/fpu/libm-test-ulps: Likewise.
	* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
	* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
Diffstat (limited to 'sysdeps/x86/fpu')
-rw-r--r--sysdeps/x86/fpu/Makefile3
-rw-r--r--sysdeps/x86/fpu/e_sqrtf128.c47
-rw-r--r--sysdeps/x86/fpu/sfp-machine.h209
3 files changed, 259 insertions, 0 deletions
diff --git a/sysdeps/x86/fpu/Makefile b/sysdeps/x86/fpu/Makefile
index a8047a4504..600e42c3db 100644
--- a/sysdeps/x86/fpu/Makefile
+++ b/sysdeps/x86/fpu/Makefile
@@ -1,4 +1,7 @@
 ifeq ($(subdir),math)
+# sqrtf128 requires soft-fp.
+CPPFLAGS += -I../soft-fp
+
 libm-support += powl_helper
 tests += test-fenv-sse test-fenv-clear-sse test-fenv-x87 test-fenv-sse-2 \
 	 test-flt-eval-method-387 test-flt-eval-method-sse
diff --git a/sysdeps/x86/fpu/e_sqrtf128.c b/sysdeps/x86/fpu/e_sqrtf128.c
new file mode 100644
index 0000000000..f6a62a83de
--- /dev/null
+++ b/sysdeps/x86/fpu/e_sqrtf128.c
@@ -0,0 +1,47 @@
+/* soft-fp sqrt for _Float128
+   Return sqrt(a)
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <soft-fp.h>
+#include <quad.h>
+
+__float128
+__ieee754_sqrtf128 (__float128 a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q (A);
+  FP_DECL_Q (R);
+  __float128 r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_Q (A, a);
+  FP_SQRT_Q (R, A);
+  FP_PACK_Q (r, R);
+  FP_HANDLE_EXCEPTIONS;
+  return r;
+}
+strong_alias (__ieee754_sqrtf128, __sqrtf128_finite)
diff --git a/sysdeps/x86/fpu/sfp-machine.h b/sysdeps/x86/fpu/sfp-machine.h
new file mode 100644
index 0000000000..df8906acb4
--- /dev/null
+++ b/sysdeps/x86/fpu/sfp-machine.h
@@ -0,0 +1,209 @@
+/* Configure soft-fp for building sqrtf128.  Based on sfp-machine.h in
+   libgcc, with soft-float and other irrelevant parts removed.  */
+
+/* The type of the result of a floating point comparison.  This must
+   match `__libgcc_cmp_return__' in GCC for the target.  */
+typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+#define CMPtype __gcc_CMPtype
+
+#ifdef __x86_64__
+# define _FP_W_TYPE_SIZE	64
+# define _FP_W_TYPE		unsigned long long
+# define _FP_WS_TYPE		signed long long
+# define _FP_I_TYPE		long long
+
+typedef int TItype __attribute__ ((mode (TI)));
+typedef unsigned int UTItype __attribute__ ((mode (TI)));
+
+# define TI_BITS (__CHAR_BIT__ * (int)sizeof(TItype))
+
+# define _FP_MUL_MEAT_Q(R,X,Y)				\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+# define _FP_DIV_MEAT_Q(R,X,Y)   _FP_DIV_MEAT_2_udiv(Q,R,X,Y)
+
+# define _FP_NANFRAC_S		_FP_QNANBIT_S
+# define _FP_NANFRAC_D		_FP_QNANBIT_D
+# define _FP_NANFRAC_E		_FP_QNANBIT_E, 0
+# define _FP_NANFRAC_Q		_FP_QNANBIT_Q, 0
+
+# define FP_EX_SHIFT 7
+
+# define _FP_DECL_EX \
+  unsigned int _fcw __attribute__ ((unused)) = FP_RND_NEAREST;
+
+# define FP_RND_NEAREST		0
+# define FP_RND_ZERO		0x6000
+# define FP_RND_PINF		0x4000
+# define FP_RND_MINF		0x2000
+
+# define FP_RND_MASK		0x6000
+
+# define FP_INIT_ROUNDMODE					\
+  do {								\
+    __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (_fcw));	\
+  } while (0)
+#else
+# define _FP_W_TYPE_SIZE	32
+# define _FP_W_TYPE		unsigned int
+# define _FP_WS_TYPE		signed int
+# define _FP_I_TYPE		int
+
+# define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
+  __asm__ ("add{l} {%11,%3|%3,%11}\n\t"				\
+	   "adc{l} {%9,%2|%2,%9}\n\t"				\
+	   "adc{l} {%7,%1|%1,%7}\n\t"				\
+	   "adc{l} {%5,%0|%0,%5}"				\
+	   : "=r" ((USItype) (r3)),				\
+	     "=&r" ((USItype) (r2)),				\
+	     "=&r" ((USItype) (r1)),				\
+	     "=&r" ((USItype) (r0))				\
+	   : "%0" ((USItype) (x3)),				\
+	     "g" ((USItype) (y3)),				\
+	     "%1" ((USItype) (x2)),				\
+	     "g" ((USItype) (y2)),				\
+	     "%2" ((USItype) (x1)),				\
+	     "g" ((USItype) (y1)),				\
+	     "%3" ((USItype) (x0)),				\
+	     "g" ((USItype) (y0)))
+# define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
+  __asm__ ("add{l} {%8,%2|%2,%8}\n\t"				\
+	   "adc{l} {%6,%1|%1,%6}\n\t"				\
+	   "adc{l} {%4,%0|%0,%4}"				\
+	   : "=r" ((USItype) (r2)),				\
+	     "=&r" ((USItype) (r1)),				\
+	     "=&r" ((USItype) (r0))				\
+	   : "%0" ((USItype) (x2)),				\
+	     "g" ((USItype) (y2)),				\
+	     "%1" ((USItype) (x1)),				\
+	     "g" ((USItype) (y1)),				\
+	     "%2" ((USItype) (x0)),				\
+	     "g" ((USItype) (y0)))
+# define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
+  __asm__ ("sub{l} {%11,%3|%3,%11}\n\t"				\
+	   "sbb{l} {%9,%2|%2,%9}\n\t"				\
+	   "sbb{l} {%7,%1|%1,%7}\n\t"				\
+	   "sbb{l} {%5,%0|%0,%5}"				\
+	   : "=r" ((USItype) (r3)),				\
+	     "=&r" ((USItype) (r2)),				\
+	     "=&r" ((USItype) (r1)),				\
+	     "=&r" ((USItype) (r0))				\
+	   : "0" ((USItype) (x3)),				\
+	     "g" ((USItype) (y3)),				\
+	     "1" ((USItype) (x2)),				\
+	     "g" ((USItype) (y2)),				\
+	     "2" ((USItype) (x1)),				\
+	     "g" ((USItype) (y1)),				\
+	     "3" ((USItype) (x0)),				\
+	     "g" ((USItype) (y0)))
+# define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
+  __asm__ ("sub{l} {%8,%2|%2,%8}\n\t"				\
+	   "sbb{l} {%6,%1|%1,%6}\n\t"				\
+	   "sbb{l} {%4,%0|%0,%4}"				\
+	   : "=r" ((USItype) (r2)),				\
+	     "=&r" ((USItype) (r1)),				\
+	     "=&r" ((USItype) (r0))				\
+	   : "0" ((USItype) (x2)),				\
+	     "g" ((USItype) (y2)),				\
+	     "1" ((USItype) (x1)),				\
+	     "g" ((USItype) (y1)),				\
+	     "2" ((USItype) (x0)),				\
+	     "g" ((USItype) (y0)))
+# define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)			\
+  __asm__ ("add{l} {%4,%3|%3,%4}\n\t"				\
+	   "adc{l} {$0,%2|%2,0}\n\t"				\
+	   "adc{l} {$0,%1|%1,0}\n\t"				\
+	   "adc{l} {$0,%0|%0,0}"				\
+	   : "+r" ((USItype) (x3)),				\
+	     "+&r" ((USItype) (x2)),				\
+	     "+&r" ((USItype) (x1)),				\
+	     "+&r" ((USItype) (x0))				\
+	   : "g" ((USItype) (i)))
+
+
+# define _FP_MUL_MEAT_S(R,X,Y)				\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+# define _FP_MUL_MEAT_D(R,X,Y)				\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+# define _FP_MUL_MEAT_Q(R,X,Y)				\
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+# define _FP_DIV_MEAT_S(R,X,Y)   _FP_DIV_MEAT_1_loop(S,R,X,Y)
+# define _FP_DIV_MEAT_D(R,X,Y)   _FP_DIV_MEAT_2_udiv(D,R,X,Y)
+# define _FP_DIV_MEAT_Q(R,X,Y)   _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+# define _FP_NANFRAC_S		_FP_QNANBIT_S
+# define _FP_NANFRAC_D		_FP_QNANBIT_D, 0
+/* Even if XFmode is 12byte,  we have to pad it to
+   16byte since soft-fp emulation is done in 16byte.  */
+# define _FP_NANFRAC_E		_FP_QNANBIT_E, 0, 0, 0
+# define _FP_NANFRAC_Q		_FP_QNANBIT_Q, 0, 0, 0
+
+# define FP_EX_SHIFT 0
+
+# define _FP_DECL_EX \
+  unsigned short _fcw __attribute__ ((unused)) = FP_RND_NEAREST;
+
+# define FP_RND_NEAREST		0
+# define FP_RND_ZERO		0xc00
+# define FP_RND_PINF		0x800
+# define FP_RND_MINF		0x400
+
+# define FP_RND_MASK		0xc00
+
+# define FP_INIT_ROUNDMODE				\
+  do {							\
+    __asm__ __volatile__ ("fnstcw\t%0" : "=m" (_fcw));	\
+  } while (0)
+#endif
+
+#define _FP_KEEPNANFRACP	1
+#define _FP_QNANNEGATEDP 0
+
+#define _FP_NANSIGN_S		1
+#define _FP_NANSIGN_D		1
+#define _FP_NANSIGN_E		1
+#define _FP_NANSIGN_Q		1
+
+/* Here is something Intel misdesigned: the specs don't define
+   the case where we have two NaNs with same mantissas, but
+   different sign. Different operations pick up different NaNs.  */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if (_FP_FRAC_GT_##wc(X, Y)					\
+	|| (_FP_FRAC_EQ_##wc(X,Y) && (OP == '+' || OP == '*')))	\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    else							\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+#define FP_EX_INVALID		0x01
+#define FP_EX_DENORM		0x02
+#define FP_EX_DIVZERO		0x04
+#define FP_EX_OVERFLOW		0x08
+#define FP_EX_UNDERFLOW		0x10
+#define FP_EX_INEXACT		0x20
+#define FP_EX_ALL \
+	(FP_EX_INVALID | FP_EX_DENORM | FP_EX_DIVZERO | FP_EX_OVERFLOW \
+	 | FP_EX_UNDERFLOW | FP_EX_INEXACT)
+
+void __sfp_handle_exceptions (int);
+
+#define FP_HANDLE_EXCEPTIONS			\
+  do {						\
+    if (__builtin_expect (_fex, 0))		\
+      __sfp_handle_exceptions (_fex);		\
+  } while (0);
+
+#define FP_TRAPPING_EXCEPTIONS ((~_fcw >> FP_EX_SHIFT) & FP_EX_ALL)
+
+#define FP_ROUNDMODE		(_fcw & FP_RND_MASK)
+
+#define _FP_TININESS_AFTER_ROUNDING 1