diff options
author | Ulrich Drepper <drepper@redhat.com> | 2010-02-15 13:04:54 -0800 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2010-02-15 13:04:54 -0800 |
commit | 6bb74d9f86e543c418f94a7732e8ee47c9e8225f (patch) | |
tree | edadff811d4545e48713f224f8fd172f078a576e | |
parent | 904057bc17fb3e3127a35ebf35fcac8d5bc8269b (diff) | |
download | glibc-6bb74d9f86e543c418f94a7732e8ee47c9e8225f.tar.gz glibc-6bb74d9f86e543c418f94a7732e8ee47c9e8225f.tar.xz glibc-6bb74d9f86e543c418f94a7732e8ee47c9e8225f.zip |
Fix up new x86 string functions.
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | sysdeps/i386/i686/multiarch/memcmp-sse4.S | 36 | ||||
-rw-r--r-- | sysdeps/i386/i686/multiarch/memcmp-ssse3.S | 75 | ||||
-rw-r--r-- | sysdeps/i386/i686/multiarch/strcmp-sse4.S | 14 | ||||
-rw-r--r-- | sysdeps/i386/i686/multiarch/strcmp-ssse3.S | 64 |
5 files changed, 161 insertions, 38 deletions
diff --git a/ChangeLog b/ChangeLog index 6ef8d4c9dc..1595a0a55e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2010-02-15 Ulrich Drepper <drepper@redhat.com> + + * sysdeps/i386/i686/multiarch/memcmp-sse4.S: Fix unwind info. + * sysdeps/i386/i686/multiarch/memcmp-ssse3.S: Likewise. + * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Likewise. + * sysdeps/i386/i686/multiarch/strcmp-ssse3.S: Likewise. + + * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Don't fall through to + undefined code. + 2010-02-12 H.J. Lu <hongjiu.lu@intel.com> * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add diff --git a/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/sysdeps/i386/i686/multiarch/memcmp-sse4.S index 06437e484c..71c4e1c337 100644 --- a/sysdeps/i386/i686/multiarch/memcmp-sse4.S +++ b/sysdeps/i386/i686/multiarch/memcmp-sse4.S @@ -105,43 +105,43 @@ L(less8bytes): mov 1(%eax), %bl cmpb 1(%edx), %bl jne L(nonzero) - - cmp $2, %ecx + + cmp $2, %ecx jz L(0bytes) mov 2(%eax), %bl cmpb 2(%edx), %bl jne L(nonzero) - - cmp $3, %ecx + + cmp $3, %ecx jz L(0bytes) - + mov 3(%eax), %bl cmpb 3(%edx), %bl jne L(nonzero) - - cmp $4, %ecx + + cmp $4, %ecx jz L(0bytes) - + mov 4(%eax), %bl cmpb 4(%edx), %bl jne L(nonzero) - cmp $5, %ecx + cmp $5, %ecx jz L(0bytes) - + mov 5(%eax), %bl cmpb 5(%edx), %bl jne L(nonzero) - cmp $6, %ecx + cmp $6, %ecx jz L(0bytes) - + mov 6(%eax), %bl cmpb 6(%edx), %bl je L(0bytes) L(nonzero): - POP (%ebx) + POP (%ebx) mov $1, %eax ja L(above) neg %eax @@ -151,11 +151,11 @@ L(above): ALIGN (4) L(0bytes): - POP (%ebx) + POP (%ebx) xor %eax, %eax ret CFI_PUSH (%ebx) - + ALIGN (4) L(less1bytes): jb L(0bytesend) @@ -609,7 +609,7 @@ L(26bytes): mov -6(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) - + movzwl -2(%eax), %ecx movzwl -2(%edx), %ebx cmp %bl, %cl @@ -873,7 +873,7 @@ L(32bytes): L(less16bytes): add %ebx, %eax add %ebx, %edx - + mov (%eax), %ecx mov (%edx), %ebx cmp %ebx, %ecx @@ -908,7 +908,7 @@ L(find_diff): jne L(end) cmp %bx, %cx L(end): - POP (%ebx) + POP (%ebx) mov $1, %eax ja L(bigger) neg %eax diff --git a/sysdeps/i386/i686/multiarch/memcmp-ssse3.S b/sysdeps/i386/i686/multiarch/memcmp-ssse3.S index bfcf660729..869f37a912 100644 --- a/sysdeps/i386/i686/multiarch/memcmp-ssse3.S +++ b/sysdeps/i386/i686/multiarch/memcmp-ssse3.S @@ -43,8 +43,7 @@ #define BLK2 BLK1+4 #define LEN BLK2+4 #define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret -#define RETURN RETURN_END; CFI_PUSH (%ebx); CFI_PUSH (%edi); \ - CFI_PUSH (%esi) +#define RETURN RETURN_END; cfi_restore_state; cfi_remember_state .section .text.ssse3,"ax",@progbits ENTRY (MEMCMP) @@ -76,12 +75,13 @@ L(1bytesend): L(zero): mov $0, %eax ret - + ALIGN (4) L(48bytesormore): PUSH (%ebx) PUSH (%esi) PUSH (%edi) + cfi_remember_state movdqu (%eax), %xmm3 movdqu (%edx), %xmm0 movl %eax, %edi @@ -155,7 +155,7 @@ L(shr_0): add $32, %esi sub $0xffff, %edx jnz L(exit) - + lea (%ecx, %edi,1), %eax lea (%ecx, %esi,1), %edx @@ -163,6 +163,8 @@ L(shr_0): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_0_gobble): lea -48(%ecx), %ecx @@ -207,6 +209,8 @@ L(shr_0_gobble_loop_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_1): cmp $80, %ecx @@ -235,6 +239,8 @@ L(shr_1): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_1_gobble): sub $32, %ecx @@ -286,6 +292,8 @@ L(shr_1_gobble_next): jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_2): cmp $80, %ecx @@ -314,6 +322,8 @@ L(shr_2): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_2_gobble): sub $32, %ecx @@ -364,6 +374,8 @@ L(shr_2_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_3): cmp $80, %ecx @@ -392,6 +404,8 @@ L(shr_3): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_3_gobble): sub $32, %ecx @@ -442,6 +456,8 @@ L(shr_3_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_4): cmp $80, %ecx @@ -470,6 +486,8 @@ L(shr_4): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_4_gobble): sub $32, %ecx @@ -520,6 +538,8 @@ L(shr_4_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_5): cmp $80, %ecx @@ -548,6 +568,8 @@ L(shr_5): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_5_gobble): sub $32, %ecx @@ -598,6 +620,8 @@ L(shr_5_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_6): cmp $80, %ecx @@ -626,6 +650,8 @@ L(shr_6): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_6_gobble): sub $32, %ecx @@ -676,6 +702,8 @@ L(shr_6_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_7): cmp $80, %ecx @@ -704,6 +732,8 @@ L(shr_7): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_7_gobble): sub $32, %ecx @@ -754,6 +784,8 @@ L(shr_7_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_8): cmp $80, %ecx @@ -782,6 +814,8 @@ L(shr_8): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_8_gobble): sub $32, %ecx @@ -832,6 +866,8 @@ L(shr_8_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_9): cmp $80, %ecx @@ -860,6 +896,8 @@ L(shr_9): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_9_gobble): sub $32, %ecx @@ -910,6 +948,8 @@ L(shr_9_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_10): cmp $80, %ecx @@ -938,6 +978,8 @@ L(shr_10): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_10_gobble): sub $32, %ecx @@ -988,6 +1030,8 @@ L(shr_10_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_11): cmp $80, %ecx @@ -1016,6 +1060,8 @@ L(shr_11): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_11_gobble): sub $32, %ecx @@ -1066,6 +1112,8 @@ L(shr_11_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_12): cmp $80, %ecx @@ -1094,6 +1142,8 @@ L(shr_12): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_12_gobble): sub $32, %ecx @@ -1144,6 +1194,8 @@ L(shr_12_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_13): cmp $80, %ecx @@ -1172,6 +1224,8 @@ L(shr_13): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_13_gobble): sub $32, %ecx @@ -1222,6 +1276,8 @@ L(shr_13_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_14): cmp $80, %ecx @@ -1250,6 +1306,8 @@ L(shr_14): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_14_gobble): sub $32, %ecx @@ -1300,6 +1358,8 @@ L(shr_14_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_15): cmp $80, %ecx @@ -1328,6 +1388,8 @@ L(shr_15): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_15_gobble): sub $32, %ecx @@ -1378,6 +1440,8 @@ L(shr_15_gobble_next): POP (%esi) jmp L(less48bytes) + cfi_restore_state + cfi_remember_state ALIGN (4) L(exit): pmovmskb %xmm1, %ebx @@ -1497,8 +1561,9 @@ L(Byte31): movzbl -9(%edi), %eax movzbl -9(%esi), %edx sub %edx, %eax - RETURN + RETURN_END + CFI_PUSH (%ebx) ALIGN (4) L(more8bytes): cmp $16, %ecx diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S index 977647203f..4b47851ed4 100644 --- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S +++ b/sysdeps/i386/i686/multiarch/strcmp-sse4.S @@ -176,6 +176,7 @@ L(first4bytes): PUSH (%ebx) PUSH (%edi) PUSH (%esi) + cfi_remember_state mov %edx, %edi mov %eax, %esi xorl %eax, %eax @@ -241,6 +242,7 @@ L(ret): #endif ret + cfi_restore_state #ifdef USE_AS_STRNCMP L(more16byteseq): POP (%esi) @@ -253,6 +255,10 @@ L(eq): POP (%ebp) #endif ret + +#ifdef USE_AS_STRNCMP + CFI_PUSH (%ebp) +#endif L(neq): mov $1, %eax ja L(neq_bigger) @@ -263,6 +269,9 @@ L(neq_bigger): #endif ret .p2align 4 +#ifdef USE_AS_STRNCMP + CFI_PUSH (%ebp) +#endif L(less16bytes): add $0xfefefeff, %ecx jnc L(less4bytes) @@ -370,8 +379,13 @@ L(more4bytes): movzbl 7(%eax), %ecx cmpb %cl, 7(%edx) jne L(neq) +#if 0 + // XXX bug in original code. It had a fallthru without any code cmpl $0, %ecx je L(eq) +#else + jmp L(eq) +#endif END (STRCMP) diff --git a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/sysdeps/i386/i686/multiarch/strcmp-ssse3.S index 14caae29a1..338b00339d 100644 --- a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S +++ b/sysdeps/i386/i686/multiarch/strcmp-ssse3.S @@ -160,6 +160,9 @@ L(crosspage): PUSH (%ebx) PUSH (%edi) PUSH (%esi) +#ifdef USE_AS_STRNCMP + cfi_remember_state +#endif movl %edx, %edi movl %eax, %ecx @@ -254,7 +257,7 @@ L(loop_ashr_0): /* * The following cases will be handled by ashr_1 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(15) n -15 0(15 +(n-15) - n) ashr_1 */ .p2align 4 @@ -360,7 +363,7 @@ L(ashr_1_exittail): /* * The following cases will be handled by ashr_2 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(14~15) n -14 1(15 +(n-14) - n) ashr_2 */ .p2align 4 @@ -467,7 +470,7 @@ L(ashr_2_exittail): /* * The following cases will be handled by ashr_3 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(13~15) n -13 2(15 +(n-13) - n) ashr_3 */ .p2align 4 @@ -573,7 +576,7 @@ L(ashr_3_exittail): /* * The following cases will be handled by ashr_4 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(12~15) n -12 3(15 +(n-12) - n) ashr_4 */ .p2align 4 @@ -682,7 +685,7 @@ L(ashr_4_exittail): /* * The following cases will be handled by ashr_5 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(11~15) n -11 4(15 +(n-11) - n) ashr_5 */ .p2align 4 @@ -788,7 +791,7 @@ L(ashr_5_exittail): /* * The following cases will be handled by ashr_6 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(10~15) n -10 5(15 +(n-10) - n) ashr_6 */ @@ -896,7 +899,7 @@ L(ashr_6_exittail): /* * The following cases will be handled by ashr_7 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7 */ @@ -1006,7 +1009,7 @@ L(ashr_7_exittail): /* * The following cases will be handled by ashr_8 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8 */ .p2align 4 @@ -1113,7 +1116,7 @@ L(ashr_8_exittail): /* * The following cases will be handled by ashr_9 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9 */ .p2align 4 @@ -1219,7 +1222,7 @@ L(ashr_9_exittail): /* * The following cases will be handled by ashr_10 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10 */ .p2align 4 @@ -1325,7 +1328,7 @@ L(ashr_10_exittail): /* * The following cases will be handled by ashr_11 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11 */ .p2align 4 @@ -1431,7 +1434,7 @@ L(ashr_11_exittail): /* * The following cases will be handled by ashr_12 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12 */ .p2align 4 @@ -1537,7 +1540,7 @@ L(ashr_12_exittail): /* * The following cases will be handled by ashr_13 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13 */ .p2align 4 @@ -1643,7 +1646,7 @@ L(ashr_13_exittail): /* * The following cases will be handled by ashr_14 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14 */ .p2align 4 @@ -1749,7 +1752,7 @@ L(ashr_14_exittail): /* * The following cases will be handled by ashr_14 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15 */ @@ -1916,6 +1919,9 @@ L(less16bytes): ret .p2align 4 +#ifdef USE_AS_STRNCMP + CFI_PUSH (%ebp) +#endif L(Byte0): #ifdef USE_AS_STRNCMP cmp $0, %ebp @@ -1931,6 +1937,9 @@ L(Byte0): ret .p2align 4 +#ifdef USE_AS_STRNCMP + CFI_PUSH (%ebp) +#endif L(Byte1): #ifdef USE_AS_STRNCMP cmp $1, %ebp @@ -1946,6 +1955,9 @@ L(Byte1): ret .p2align 4 +#ifdef USE_AS_STRNCMP + CFI_PUSH (%ebp) +#endif L(Byte2): #ifdef USE_AS_STRNCMP cmp $2, %ebp @@ -1961,6 +1973,9 @@ L(Byte2): ret .p2align 4 +#ifdef USE_AS_STRNCMP + CFI_PUSH (%ebp) +#endif L(Byte3): #ifdef USE_AS_STRNCMP cmp $3, %ebp @@ -1976,6 +1991,9 @@ L(Byte3): ret .p2align 4 +#ifdef USE_AS_STRNCMP + CFI_PUSH (%ebp) +#endif L(Byte4): #ifdef USE_AS_STRNCMP cmp $4, %ebp @@ -1989,7 +2007,11 @@ L(Byte4): POP (%ebp) #endif ret + .p2align 4 +#ifdef USE_AS_STRNCMP + CFI_PUSH (%ebp) +#endif L(Byte5): #ifdef USE_AS_STRNCMP cmp $5, %ebp @@ -2005,6 +2027,9 @@ L(Byte5): ret .p2align 4 +#ifdef USE_AS_STRNCMP + CFI_PUSH (%ebp) +#endif L(Byte6): #ifdef USE_AS_STRNCMP cmp $6, %ebp @@ -2020,6 +2045,9 @@ L(Byte6): ret .p2align 4 +#ifdef USE_AS_STRNCMP + CFI_PUSH (%ebp) +#endif L(2next_8_bytes): add $8, %eax add $8, %edx @@ -2063,6 +2091,9 @@ L(2next_8_bytes): #endif ret +#ifdef USE_AS_STRNCMP + CFI_PUSH (%ebp) +#endif L(neq): mov $1, %eax ja L(neq_bigger) @@ -2074,6 +2105,7 @@ L(neq_bigger): ret #ifdef USE_AS_STRNCMP + cfi_remember_state L(more8byteseq): POP (%esi) POP (%edi) @@ -2087,7 +2119,9 @@ L(eq): #endif xorl %eax, %eax ret + #ifdef USE_AS_STRNCMP + CFI_PUSH (%ebp) L(less16bytes_sncmp): test %ebp, %ebp jz L(eq) |