Fix x86 SSE cosf, sinf issues

* sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S: Fix unwind info if defined PIC. Fix special cases description. * sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S: Likewise. * sysdeps/x86_64/fpu/s_sinf.S: Fix special cases description, fix DP_HI_MASK entry. * sysdeps/x86_64/fpu/s_cosf.S: Likewise.
author: Liubov Dmitrieva <liubov.dmitrieva@gmail.com> 2012-09-10 11:44:49 +0200
committer: Andreas Jaeger <aj@suse.de> 2012-09-10 11:44:49 +0200
commit: 80ccd52c95bda018899d83f21c797dd0fd028512 (patch)
tree: 428a02774c0b6a050ce30b929e14c6250bd1173c /sysdeps
parent: 3d9b46b3500566163815747173002d3d0bbb9b2f (diff)
download: glibc-80ccd52c95bda018899d83f21c797dd0fd028512.tar.gz
glibc-80ccd52c95bda018899d83f21c797dd0fd028512.tar.xz
glibc-80ccd52c95bda018899d83f21c797dd0fd028512.zip
4 files changed, 56 insertions, 84 deletions
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
index 2b5a2a5ae2..405c6ea4c8 100644
--- a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
+++ b/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
@@ -50,25 +50,29 @@
  *  9) if x is NaN, return x-x.
  *
  * Special cases:
- *  cos(+-0)==+-0 not raising inexact/underflow,
- *  cos(subnormal) raises inexact/underflow
- *  cos(min_normalized) raises inexact/underflow
- *  cos(normalized) raises inexact
- *  cos(Inf) = NaN, raises invalid, sets errno to EDOM
- *  cos(NaN) = NaN
+ *  cos(+-0) = 1 not raising inexact,
+ *  cos(subnormal) raises inexact,
+ *  cos(min_normalized) raises inexact,
+ *  cos(normalized) raises inexact,
+ *  cos(Inf) = NaN, raises invalid, sets errno to EDOM,
+ *  cos(NaN) = NaN.
  */
 
 #ifdef	PIC
 # define MO1(symbol)			L(symbol)##@GOTOFF(%ebx)
 # define MO2(symbol,reg2,_scale)	L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define SAVE_BX			pushl	%ebx
-# define RESTORE_BX			popl	%ebx
+# define CFI_PUSH(REG)	cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
+# define CFI_POP(REG)	cfi_adjust_cfa_offset(-4); cfi_restore(REG)
+# define PUSH(REG)			pushl REG; CFI_PUSH(REG)
+# define POP(REG)			popl REG; CFI_POP(REG)
+# define ENTRANCE			PUSH(%ebx); LOAD_PIC_REG(bx)
+# define RETURN				POP(%ebx); ret; CFI_PUSH(%ebx)
 # define ARG_X				8(%esp)
 #else
 # define MO1(symbol)			L(symbol)
 # define MO2(symbol,reg2,_scale)	L(symbol)(,reg2,_scale)
-# define SAVE_BX
-# define RESTORE_BX
+# define ENTRANCE
+# define RETURN				ret
 # define ARG_X				4(%esp)
 #endif
 
@@ -76,11 +80,7 @@
 ENTRY(__cosf_sse2)
 	/* Input: single precision x on stack at address ARG_X */
 
-#ifdef	PIC
-	SAVE_BX
-	LOAD_PIC_REG(bx)
-#endif
-
+	ENTRANCE
 	movl	ARG_X, %eax		/* Bits of x */
 	cvtss2sd ARG_X, %xmm0		/* DP x */
 	andl	$0x7fffffff, %eax	/* |x| */
@@ -143,8 +143,7 @@ L(reconstruction):
 	fldl	0(%esp)			/* ...to FPU.  */
 	/* Return back 4 bytes of stack frame */
 	lea	8(%esp), %esp
-	RESTORE_BX
-	ret
+	RETURN
 
 	.p2align	4
 L(sin_poly):
@@ -183,9 +182,7 @@ L(sin_poly):
 	fldl	0(%esp)			/* ...to FPU.  */
 	/* Return back 4 bytes of stack frame */
 	lea	8(%esp), %esp
-	RESTORE_BX
-	ret
-
+	RETURN
 
 	.p2align	4
 L(large_args):
@@ -275,7 +272,6 @@ L(very_large_skip2):
 
 	jmp	L(reconstruction)	/* end of very_large_args peth */
 
-
 	.p2align	4
 L(arg_less_pio4):
 	/* Here if |x|<Pi/4 */
@@ -307,8 +303,7 @@ L(epilogue):
 	flds	0(%esp)			/* ...to FPU.  */
 	/* Return back 4 bytes of stack frame */
 	lea	4(%esp), %esp
-	RESTORE_BX
-	ret
+	RETURN
 
 	.p2align	4
 L(arg_less_2pn5):
@@ -353,7 +348,6 @@ L(skip_errno_setting):
 	jmp	L(epilogue)
 END(__cosf_sse2)
 
-
 	.section .rodata, "a"
 	.p2align 3
 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@@ -540,8 +534,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 	.p2align 3
 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 	.long	0x00000000,0xffffffff
-	.type L(DP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+	.type L(DP_HI_MASK), @object
+	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
 
 	.p2align 4
 L(SP_ABS_MASK): /* Mask for getting SP absolute value */
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
index cda175094a..49d59b5972 100644
--- a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
+++ b/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
@@ -50,25 +50,29 @@
  *  9) if x is NaN, return x-x.
  *
  * Special cases:
- *  sin(+-0)==+-0 not raising inexact/underflow,
- *  sin(subnormal) raises inexact/underflow
- *  sin(min_normalized) raises inexact/underflow
- *  sin(normalized) raises inexact
- *  sin(Inf) = NaN, raises invalid, sets errno to EDOM
- *  sin(NaN) = NaN
+ *  sin(+-0) = +-0 not raising inexact/underflow,
+ *  sin(subnormal) raises inexact/underflow,
+ *  sin(min_normalized) raises inexact/underflow,
+ *  sin(normalized) raises inexact,
+ *  sin(Inf) = NaN, raises invalid, sets errno to EDOM,
+ *  sin(NaN) = NaN.
  */
 
 #ifdef	PIC
 # define MO1(symbol)			L(symbol)##@GOTOFF(%ebx)
 # define MO2(symbol,reg2,_scale)	L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define SAVE_BX			pushl	%ebx
-# define RESTORE_BX			popl	%ebx
+# define CFI_PUSH(REG)	cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
+# define CFI_POP(REG)	cfi_adjust_cfa_offset(-4); cfi_restore(REG)
+# define PUSH(REG)			pushl REG; CFI_PUSH(REG)
+# define POP(REG)			popl REG; CFI_POP(REG)
+# define ENTRANCE			PUSH(%ebx); LOAD_PIC_REG(bx)
+# define RETURN				POP(%ebx); ret; CFI_PUSH(%ebx)
 # define ARG_X				8(%esp)
 #else
 # define MO1(symbol)			L(symbol)
 # define MO2(symbol,reg2,_scale)	L(symbol)(,reg2,_scale)
-# define SAVE_BX
-# define RESTORE_BX
+# define ENTRANCE
+# define RETURN				ret
 # define ARG_X				4(%esp)
 #endif
 
@@ -76,11 +80,7 @@
 ENTRY(__sinf_sse2)
 	/* Input: single precision x on stack at address ARG_X */
 
-#ifdef	PIC
-	SAVE_BX
-	LOAD_PIC_REG(bx)
-#endif
-
+	ENTRANCE
 	movl	ARG_X, %eax		/* Bits of x */
 	cvtss2sd ARG_X, %xmm0		/* DP x */
 	andl	$0x7fffffff, %eax	/* |x| */
@@ -145,8 +145,7 @@ L(reconstruction):
 	fldl	0(%esp)			/* ...to FPU.  */
 	/* Return back 4 bytes of stack frame */
 	lea	8(%esp), %esp
-	RESTORE_BX
-	ret
+	RETURN
 
 	.p2align	4
 L(sin_poly):
@@ -186,9 +185,7 @@ L(sin_poly):
 	fldl	0(%esp)			/* ...to FPU.  */
 	/* Return back 4 bytes of stack frame */
 	lea	8(%esp), %esp
-	RESTORE_BX
-	ret
-
+	RETURN
 
 	.p2align	4
 L(large_args):
@@ -281,10 +278,6 @@ L(very_large_skip2):
 
 	jmp	L(reconstruction)	/* end of very_large_args peth */
 
-
-
-
-
 	.p2align	4
 L(arg_less_pio4):
 	/* Here if |x|<Pi/4 */
@@ -320,8 +313,7 @@ L(epilogue):
 	flds	0(%esp)			/* ...to FPU.  */
 	/* Return back 4 bytes of stack frame */
 	lea	4(%esp), %esp
-	RESTORE_BX
-	ret
+	RETURN
 
 	.p2align	4
 L(arg_less_2pn5):
@@ -376,7 +368,6 @@ L(skip_errno_setting):
 	jmp	L(epilogue)
 END(__sinf_sse2)
 
-
 	.section .rodata, "a"
 	.p2align 3
 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@@ -569,7 +560,7 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 	.p2align 3
 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 	.long	0x00000000,0xffffffff
-	.type L(DP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+	.type L(DP_HI_MASK), @object
+	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
 
 weak_alias (__sinf, sinf)
diff --git a/sysdeps/x86_64/fpu/s_cosf.S b/sysdeps/x86_64/fpu/s_cosf.S
index 7eeefe8f30..dc8c76a131 100644
--- a/sysdeps/x86_64/fpu/s_cosf.S
+++ b/sysdeps/x86_64/fpu/s_cosf.S
@@ -50,12 +50,12 @@
  *  9) if x is NaN, return x-x.
  *
  * Special cases:
- *  cos(+-0)==+-0 not raising inexact/underflow,
- *  cos(subnormal) raises inexact/underflow
- *  cos(min_normalized) raises inexact/underflow
- *  cos(normalized) raises inexact
- *  cos(Inf) = NaN, raises invalid, sets errno to EDOM
- *  cos(NaN) = NaN
+ *  cos(+-0) = 1 not raising inexact,
+ *  cos(subnormal) raises inexact,
+ *  cos(min_normalized) raises inexact,
+ *  cos(normalized) raises inexact,
+ *  cos(Inf) = NaN, raises invalid, sets errno to EDOM,
+ *  cos(NaN) = NaN.
  */
 
 	.text
@@ -163,10 +163,6 @@ L(sin_poly):
 	cvtsd2ss %xmm3, %xmm0		/* SP result */
 	ret
 
-
-
-
-
 	.p2align	4
 L(large_args):
 	/* Here if |x|>=9*Pi/4 */
@@ -257,7 +253,6 @@ L(very_large_skip2):
 
 	jmp	L(reconstruction)	/* end of very_large_args peth */
 
-
 	.p2align	4
 L(arg_less_pio4):
 	/* Here if |x|<Pi/4 */
@@ -317,7 +312,6 @@ L(arg_inf_or_nan):
 
 	/* Here if x is Inf. Set errno to EDOM.  */
 	call	JUMPTARGET(__errno_location)
-	lea	(%rax), %rax
 	movl	$EDOM, (%rax)
 
 	.p2align	4
@@ -328,8 +322,6 @@ L(skip_errno_setting):
 	ret
 END(__cosf)
 
-
-
 	.section .rodata, "a"
 	.p2align 3
 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@@ -516,8 +508,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 	.p2align 3
 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 	.long	0x00000000,0xffffffff
-	.type L(DP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+	.type L(DP_HI_MASK), @object
+	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
 
 	.p2align 4
 L(SP_ABS_MASK): /* Mask for getting SP absolute value */
diff --git a/sysdeps/x86_64/fpu/s_sinf.S b/sysdeps/x86_64/fpu/s_sinf.S
index 295ba3df85..9a6c87ff79 100644
--- a/sysdeps/x86_64/fpu/s_sinf.S
+++ b/sysdeps/x86_64/fpu/s_sinf.S
@@ -50,12 +50,12 @@
  *  9) if x is NaN, return x-x.
  *
  * Special cases:
- *  sin(+-0)==+-0 not raising inexact/underflow,
- *  sin(subnormal) raises inexact/underflow
- *  sin(min_normalized) raises inexact/underflow
- *  sin(normalized) raises inexact
- *  sin(Inf) = NaN, raises invalid, sets errno to EDOM
- *  sin(NaN) = NaN
+ *  sin(+-0) = +-0 not raising inexact/underflow,
+ *  sin(subnormal) raises inexact/underflow,
+ *  sin(min_normalized) raises inexact/underflow,
+ *  sin(normalized) raises inexact,
+ *  sin(Inf) = NaN, raises invalid, sets errno to EDOM,
+ *  sin(NaN) = NaN.
  */
 
 	.text
@@ -168,7 +168,6 @@ L(sin_poly):
 	cvtsd2ss %xmm3, %xmm0 		/* SP result */
 	ret
 
-
 	.p2align	4
 L(large_args):
 	/* Here if |x|>=9*Pi/4 */
@@ -262,7 +261,6 @@ L(very_large_skip2):
 
 	jmp	L(reconstruction)	/* end of very_large_args peth */
 
-
 	.p2align	4
 L(arg_less_pio4):
 	/* Here if |x|<Pi/4 */
@@ -340,7 +338,6 @@ L(arg_inf_or_nan):
 
 	/* Here if x is Inf. Set errno to EDOM.  */
 	call	JUMPTARGET(__errno_location)
-	lea	(%rax), %rax
 	movl	$EDOM, (%rax)
 
 	.p2align	4
@@ -351,8 +348,6 @@ L(skip_errno_setting):
 	ret
 END(__sinf)
 
-
-
 	.section .rodata, "a"
 	.p2align 3
 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@@ -545,8 +540,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 	.p2align 3
 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 	.long	0x00000000,0xffffffff
-	.type L(DP_ABS_MASK),@object
-	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+	.type L(DP_HI_MASK),@object
+	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
 
 	.p2align 4
 L(SP_ABS_MASK): /* Mask for getting SP absolute value */
author	Liubov Dmitrieva <liubov.dmitrieva@gmail.com>	2012-09-10 11:44:49 +0200
committer	Andreas Jaeger <aj@suse.de>	2012-09-10 11:44:49 +0200
commit	80ccd52c95bda018899d83f21c797dd0fd028512 (patch)
tree	428a02774c0b6a050ce30b929e14c6250bd1173c /sysdeps
parent	3d9b46b3500566163815747173002d3d0bbb9b2f (diff)
download	glibc-80ccd52c95bda018899d83f21c797dd0fd028512.tar.gz glibc-80ccd52c95bda018899d83f21c797dd0fd028512.tar.xz glibc-80ccd52c95bda018899d83f21c797dd0fd028512.zip