diff options
author | Liubov Dmitrieva <liubov.dmitrieva@gmail.com> | 2012-09-10 11:44:49 +0200 |
---|---|---|
committer | Andreas Jaeger <aj@suse.de> | 2012-09-10 11:44:49 +0200 |
commit | 80ccd52c95bda018899d83f21c797dd0fd028512 (patch) | |
tree | 428a02774c0b6a050ce30b929e14c6250bd1173c /sysdeps | |
parent | 3d9b46b3500566163815747173002d3d0bbb9b2f (diff) | |
download | glibc-80ccd52c95bda018899d83f21c797dd0fd028512.tar.gz glibc-80ccd52c95bda018899d83f21c797dd0fd028512.tar.xz glibc-80ccd52c95bda018899d83f21c797dd0fd028512.zip |
Fix x86 SSE cosf, sinf issues
* sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S: Fix unwind info if defined PIC. Fix special cases description. * sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S: Likewise. * sysdeps/x86_64/fpu/s_sinf.S: Fix special cases description, fix DP_HI_MASK entry. * sysdeps/x86_64/fpu/s_cosf.S: Likewise.
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S | 46 | ||||
-rw-r--r-- | sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S | 49 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/s_cosf.S | 24 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/s_sinf.S | 21 |
4 files changed, 56 insertions, 84 deletions
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S index 2b5a2a5ae2..405c6ea4c8 100644 --- a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S +++ b/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S @@ -50,25 +50,29 @@ * 9) if x is NaN, return x-x. * * Special cases: - * cos(+-0)==+-0 not raising inexact/underflow, - * cos(subnormal) raises inexact/underflow - * cos(min_normalized) raises inexact/underflow - * cos(normalized) raises inexact - * cos(Inf) = NaN, raises invalid, sets errno to EDOM - * cos(NaN) = NaN + * cos(+-0) = 1 not raising inexact, + * cos(subnormal) raises inexact, + * cos(min_normalized) raises inexact, + * cos(normalized) raises inexact, + * cos(Inf) = NaN, raises invalid, sets errno to EDOM, + * cos(NaN) = NaN. */ #ifdef PIC # define MO1(symbol) L(symbol)##@GOTOFF(%ebx) # define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale) -# define SAVE_BX pushl %ebx -# define RESTORE_BX popl %ebx +# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0) +# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG) +# define PUSH(REG) pushl REG; CFI_PUSH(REG) +# define POP(REG) popl REG; CFI_POP(REG) +# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx) +# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx) # define ARG_X 8(%esp) #else # define MO1(symbol) L(symbol) # define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale) -# define SAVE_BX -# define RESTORE_BX +# define ENTRANCE +# define RETURN ret # define ARG_X 4(%esp) #endif @@ -76,11 +80,7 @@ ENTRY(__cosf_sse2) /* Input: single precision x on stack at address ARG_X */ -#ifdef PIC - SAVE_BX - LOAD_PIC_REG(bx) -#endif - + ENTRANCE movl ARG_X, %eax /* Bits of x */ cvtss2sd ARG_X, %xmm0 /* DP x */ andl $0x7fffffff, %eax /* |x| */ @@ -143,8 +143,7 @@ L(reconstruction): fldl 0(%esp) /* ...to FPU. */ /* Return back 4 bytes of stack frame */ lea 8(%esp), %esp - RESTORE_BX - ret + RETURN .p2align 4 L(sin_poly): @@ -183,9 +182,7 @@ L(sin_poly): fldl 0(%esp) /* ...to FPU. */ /* Return back 4 bytes of stack frame */ lea 8(%esp), %esp - RESTORE_BX - ret - + RETURN .p2align 4 L(large_args): @@ -275,7 +272,6 @@ L(very_large_skip2): jmp L(reconstruction) /* end of very_large_args peth */ - .p2align 4 L(arg_less_pio4): /* Here if |x|<Pi/4 */ @@ -307,8 +303,7 @@ L(epilogue): flds 0(%esp) /* ...to FPU. */ /* Return back 4 bytes of stack frame */ lea 4(%esp), %esp - RESTORE_BX - ret + RETURN .p2align 4 L(arg_less_2pn5): @@ -353,7 +348,6 @@ L(skip_errno_setting): jmp L(epilogue) END(__cosf_sse2) - .section .rodata, "a" .p2align 3 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ @@ -540,8 +534,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */ .p2align 3 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ .long 0x00000000,0xffffffff - .type L(DP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) + .type L(DP_HI_MASK), @object + ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) .p2align 4 L(SP_ABS_MASK): /* Mask for getting SP absolute value */ diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S index cda175094a..49d59b5972 100644 --- a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S +++ b/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S @@ -50,25 +50,29 @@ * 9) if x is NaN, return x-x. * * Special cases: - * sin(+-0)==+-0 not raising inexact/underflow, - * sin(subnormal) raises inexact/underflow - * sin(min_normalized) raises inexact/underflow - * sin(normalized) raises inexact - * sin(Inf) = NaN, raises invalid, sets errno to EDOM - * sin(NaN) = NaN + * sin(+-0) = +-0 not raising inexact/underflow, + * sin(subnormal) raises inexact/underflow, + * sin(min_normalized) raises inexact/underflow, + * sin(normalized) raises inexact, + * sin(Inf) = NaN, raises invalid, sets errno to EDOM, + * sin(NaN) = NaN. */ #ifdef PIC # define MO1(symbol) L(symbol)##@GOTOFF(%ebx) # define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale) -# define SAVE_BX pushl %ebx -# define RESTORE_BX popl %ebx +# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0) +# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG) +# define PUSH(REG) pushl REG; CFI_PUSH(REG) +# define POP(REG) popl REG; CFI_POP(REG) +# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx) +# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx) # define ARG_X 8(%esp) #else # define MO1(symbol) L(symbol) # define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale) -# define SAVE_BX -# define RESTORE_BX +# define ENTRANCE +# define RETURN ret # define ARG_X 4(%esp) #endif @@ -76,11 +80,7 @@ ENTRY(__sinf_sse2) /* Input: single precision x on stack at address ARG_X */ -#ifdef PIC - SAVE_BX - LOAD_PIC_REG(bx) -#endif - + ENTRANCE movl ARG_X, %eax /* Bits of x */ cvtss2sd ARG_X, %xmm0 /* DP x */ andl $0x7fffffff, %eax /* |x| */ @@ -145,8 +145,7 @@ L(reconstruction): fldl 0(%esp) /* ...to FPU. */ /* Return back 4 bytes of stack frame */ lea 8(%esp), %esp - RESTORE_BX - ret + RETURN .p2align 4 L(sin_poly): @@ -186,9 +185,7 @@ L(sin_poly): fldl 0(%esp) /* ...to FPU. */ /* Return back 4 bytes of stack frame */ lea 8(%esp), %esp - RESTORE_BX - ret - + RETURN .p2align 4 L(large_args): @@ -281,10 +278,6 @@ L(very_large_skip2): jmp L(reconstruction) /* end of very_large_args peth */ - - - - .p2align 4 L(arg_less_pio4): /* Here if |x|<Pi/4 */ @@ -320,8 +313,7 @@ L(epilogue): flds 0(%esp) /* ...to FPU. */ /* Return back 4 bytes of stack frame */ lea 4(%esp), %esp - RESTORE_BX - ret + RETURN .p2align 4 L(arg_less_2pn5): @@ -376,7 +368,6 @@ L(skip_errno_setting): jmp L(epilogue) END(__sinf_sse2) - .section .rodata, "a" .p2align 3 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ @@ -569,7 +560,7 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */ .p2align 3 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ .long 0x00000000,0xffffffff - .type L(DP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) + .type L(DP_HI_MASK), @object + ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) weak_alias (__sinf, sinf) diff --git a/sysdeps/x86_64/fpu/s_cosf.S b/sysdeps/x86_64/fpu/s_cosf.S index 7eeefe8f30..dc8c76a131 100644 --- a/sysdeps/x86_64/fpu/s_cosf.S +++ b/sysdeps/x86_64/fpu/s_cosf.S @@ -50,12 +50,12 @@ * 9) if x is NaN, return x-x. * * Special cases: - * cos(+-0)==+-0 not raising inexact/underflow, - * cos(subnormal) raises inexact/underflow - * cos(min_normalized) raises inexact/underflow - * cos(normalized) raises inexact - * cos(Inf) = NaN, raises invalid, sets errno to EDOM - * cos(NaN) = NaN + * cos(+-0) = 1 not raising inexact, + * cos(subnormal) raises inexact, + * cos(min_normalized) raises inexact, + * cos(normalized) raises inexact, + * cos(Inf) = NaN, raises invalid, sets errno to EDOM, + * cos(NaN) = NaN. */ .text @@ -163,10 +163,6 @@ L(sin_poly): cvtsd2ss %xmm3, %xmm0 /* SP result */ ret - - - - .p2align 4 L(large_args): /* Here if |x|>=9*Pi/4 */ @@ -257,7 +253,6 @@ L(very_large_skip2): jmp L(reconstruction) /* end of very_large_args peth */ - .p2align 4 L(arg_less_pio4): /* Here if |x|<Pi/4 */ @@ -317,7 +312,6 @@ L(arg_inf_or_nan): /* Here if x is Inf. Set errno to EDOM. */ call JUMPTARGET(__errno_location) - lea (%rax), %rax movl $EDOM, (%rax) .p2align 4 @@ -328,8 +322,6 @@ L(skip_errno_setting): ret END(__cosf) - - .section .rodata, "a" .p2align 3 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ @@ -516,8 +508,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */ .p2align 3 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ .long 0x00000000,0xffffffff - .type L(DP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) + .type L(DP_HI_MASK), @object + ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) .p2align 4 L(SP_ABS_MASK): /* Mask for getting SP absolute value */ diff --git a/sysdeps/x86_64/fpu/s_sinf.S b/sysdeps/x86_64/fpu/s_sinf.S index 295ba3df85..9a6c87ff79 100644 --- a/sysdeps/x86_64/fpu/s_sinf.S +++ b/sysdeps/x86_64/fpu/s_sinf.S @@ -50,12 +50,12 @@ * 9) if x is NaN, return x-x. * * Special cases: - * sin(+-0)==+-0 not raising inexact/underflow, - * sin(subnormal) raises inexact/underflow - * sin(min_normalized) raises inexact/underflow - * sin(normalized) raises inexact - * sin(Inf) = NaN, raises invalid, sets errno to EDOM - * sin(NaN) = NaN + * sin(+-0) = +-0 not raising inexact/underflow, + * sin(subnormal) raises inexact/underflow, + * sin(min_normalized) raises inexact/underflow, + * sin(normalized) raises inexact, + * sin(Inf) = NaN, raises invalid, sets errno to EDOM, + * sin(NaN) = NaN. */ .text @@ -168,7 +168,6 @@ L(sin_poly): cvtsd2ss %xmm3, %xmm0 /* SP result */ ret - .p2align 4 L(large_args): /* Here if |x|>=9*Pi/4 */ @@ -262,7 +261,6 @@ L(very_large_skip2): jmp L(reconstruction) /* end of very_large_args peth */ - .p2align 4 L(arg_less_pio4): /* Here if |x|<Pi/4 */ @@ -340,7 +338,6 @@ L(arg_inf_or_nan): /* Here if x is Inf. Set errno to EDOM. */ call JUMPTARGET(__errno_location) - lea (%rax), %rax movl $EDOM, (%rax) .p2align 4 @@ -351,8 +348,6 @@ L(skip_errno_setting): ret END(__sinf) - - .section .rodata, "a" .p2align 3 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ @@ -545,8 +540,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */ .p2align 3 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ .long 0x00000000,0xffffffff - .type L(DP_ABS_MASK),@object - ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) + .type L(DP_HI_MASK),@object + ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) .p2align 4 L(SP_ABS_MASK): /* Mask for getting SP absolute value */ |