about summary refs log tree commit diff
path: root/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
diff options
context:
space:
mode:
authorLiubov Dmitrieva <liubov.dmitrieva@gmail.com>2011-10-23 13:34:15 -0400
committerUlrich Drepper <drepper@gmail.com>2011-10-23 13:34:15 -0400
commit95584d3b3309ff4da4cc458254df383f5cff044b (patch)
tree2b8c9a0284e2b33c15b1d7cc48ef6e72d1a9207e /sysdeps/i386/i686/multiarch/wcscmp-sse2.S
parent774a2669af652979ed965d7225502ba473b2da73 (diff)
downloadglibc-95584d3b3309ff4da4cc458254df383f5cff044b.tar.gz
glibc-95584d3b3309ff4da4cc458254df383f5cff044b.tar.xz
glibc-95584d3b3309ff4da4cc458254df383f5cff044b.zip
Fix signedness in wcscmp comparison
Diffstat (limited to 'sysdeps/i386/i686/multiarch/wcscmp-sse2.S')
-rw-r--r--sysdeps/i386/i686/multiarch/wcscmp-sse2.S158
1 files changed, 87 insertions, 71 deletions
diff --git a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
index 404a9a4d4c..cca0d8340b 100644
--- a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
@@ -21,7 +21,6 @@
 #ifndef NOT_IN_libc
 
 # include <sysdep.h>
-# include "asm-syntax.h"
 
 # define CFI_PUSH(REG)	\
 	cfi_adjust_cfa_offset (4);	\
@@ -34,18 +33,16 @@
 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
 # define POP(REG) popl REG; CFI_POP (REG)
 
-# ifndef STRCMP
-# define STRCMP __wcscmp_sse2
-# endif
-
 # define ENTRANCE PUSH(%esi); PUSH(%edi)
 # define RETURN  POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
 # define PARMS  4
 # define STR1  PARMS
 # define STR2  STR1+4
 
+/* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */
+
 	.text
-ENTRY (STRCMP)
+ENTRY (__wcscmp_sse2)
 /*
 	* This implementation uses SSE to compare up to 16 bytes at a time.
 */
@@ -131,7 +128,7 @@ L(continue_48_48):
 	jne	L(nequal)
 	test	%ecx, %ecx
 	jz	L(equal)
-
+	
 	movdqu	16(%edi), %xmm1
 	movdqu	16(%esi), %xmm2
 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
@@ -264,21 +261,21 @@ L(continue_00_48):
 	test	%ecx, %ecx
 	jnz	L(less4_double_words1)
 
-	sub	(%esi), %eax
-	jnz	L(return)
-
+	cmp	(%esi), %eax
+	jne	L(nequal)
+	
 	mov	4(%edi), %eax
-	sub	4(%esi), %eax
-	jnz	L(return)
+	cmp	4(%esi), %eax
+	jne	L(nequal)
 
 	mov	8(%edi), %eax
-	sub	8(%esi), %eax
-	jnz	L(return)
+	cmp	8(%esi), %eax
+	jne	L(nequal)
 
 	mov	12(%edi), %eax
-	sub	12(%esi), %eax
-	jnz	L(return)
-
+	cmp	12(%esi), %eax
+	jne	L(nequal)
+	
 	movdqu	16(%esi), %xmm2
 	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
 	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
@@ -381,7 +378,7 @@ L(continue_32_48):
 	movdqu	48(%esi), %xmm2
 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results */
 	pmovmskb %xmm1, %edx
 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
 	jnz	L(less4_double_words_48)
@@ -585,21 +582,21 @@ L(continue_48_00):
 	test	%ecx, %ecx
 	jnz	L(less4_double_words1)
 
-	sub	(%esi), %eax
-	jnz	L(return)
-
+	cmp	(%esi), %eax
+	jne	L(nequal)
+	
 	mov	4(%edi), %eax
-	sub	4(%esi), %eax
-	jnz	L(return)
+	cmp	4(%esi), %eax
+	jne	L(nequal)
 
 	mov	8(%edi), %eax
-	sub	8(%esi), %eax
-	jnz	L(return)
+	cmp	8(%esi), %eax
+	jne	L(nequal)
 
 	mov	12(%edi), %eax
-	sub	12(%esi), %eax
-	jnz	L(return)
-
+	cmp	12(%esi), %eax
+	jne	L(nequal)
+	
 	movdqu	16(%edi), %xmm1
 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
 	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
@@ -839,142 +836,161 @@ L(less4_double_words1):
 	test	%ecx, %ecx
 	jz	L(equal)
 
-	mov	12(%esi), %edx
-	mov	12(%edi), %eax
-	sub	%edx, %eax
+	mov	12(%esi), %ecx
+	cmp	%ecx, 12(%edi)
+	jne	L(nequal)
+	xor	%eax, %eax
 	RETURN
 
 	.p2align 4
 L(less4_double_words):
+	xor	%eax, %eax
 	test	%dl, %dl
 	jz	L(next_two_double_words)
 	and	$15, %dl
 	jz	L(second_double_word)
-	mov	(%edi), %eax
-	sub	(%esi), %eax
+	mov	(%esi), %ecx
+	cmp	%ecx, (%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(second_double_word):
-	mov	4(%edi), %eax
-	sub	4(%esi), %eax
+	mov	4(%esi), %ecx
+	cmp	%ecx, 4(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(next_two_double_words):
 	and	$15, %dh
 	jz	L(fourth_double_word)
-	mov	8(%edi), %eax
-	sub	8(%esi), %eax
+	mov	8(%esi), %ecx
+	cmp	%ecx, 8(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(fourth_double_word):
-	mov	12(%edi), %eax
-	sub	12(%esi), %eax
+	mov	12(%esi), %ecx
+	cmp	%ecx, 12(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(less4_double_words_16):
+	xor	%eax, %eax
 	test	%dl, %dl
 	jz	L(next_two_double_words_16)
 	and	$15, %dl
 	jz	L(second_double_word_16)
-	mov	16(%edi), %eax
-	sub	16(%esi), %eax
+	mov	16(%esi), %ecx
+	cmp	%ecx, 16(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(second_double_word_16):
-	mov	20(%edi), %eax
-	sub	20(%esi), %eax
+	mov	20(%esi), %ecx
+	cmp	%ecx, 20(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(next_two_double_words_16):
 	and	$15, %dh
 	jz	L(fourth_double_word_16)
-	mov	24(%edi), %eax
-	sub	24(%esi), %eax
+	mov	24(%esi), %ecx
+	cmp	%ecx, 24(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(fourth_double_word_16):
-	mov	28(%edi), %eax
-	sub	28(%esi), %eax
+	mov	28(%esi), %ecx
+	cmp	%ecx, 28(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(less4_double_words_32):
+	xor	%eax, %eax
 	test	%dl, %dl
 	jz	L(next_two_double_words_32)
 	and	$15, %dl
 	jz	L(second_double_word_32)
-	mov	32(%edi), %eax
-	sub	32(%esi), %eax
+	mov	32(%esi), %ecx
+	cmp	%ecx, 32(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(second_double_word_32):
-	mov	36(%edi), %eax
-	sub	36(%esi), %eax
+	mov	36(%esi), %ecx
+	cmp	%ecx, 36(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(next_two_double_words_32):
 	and	$15, %dh
 	jz	L(fourth_double_word_32)
-	mov	40(%edi), %eax
-	sub	40(%esi), %eax
+	mov	40(%esi), %ecx
+	cmp	%ecx, 40(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(fourth_double_word_32):
-	mov	44(%edi), %eax
-	sub	44(%esi), %eax
+	mov	44(%esi), %ecx
+	cmp	%ecx, 44(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(less4_double_words_48):
+	xor	%eax, %eax
 	test	%dl, %dl
 	jz	L(next_two_double_words_48)
 	and	$15, %dl
 	jz	L(second_double_word_48)
-	mov	48(%edi), %eax
-	sub	48(%esi), %eax
+	mov	48(%esi), %ecx
+	cmp	%ecx, 48(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(second_double_word_48):
-	mov	52(%edi), %eax
-	sub	52(%esi), %eax
+	mov	52(%esi), %ecx
+	cmp	%ecx, 52(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(next_two_double_words_48):
 	and	$15, %dh
 	jz	L(fourth_double_word_48)
-	mov	56(%edi), %eax
-	sub	56(%esi), %eax
+	mov	56(%esi), %ecx
+	cmp	%ecx, 56(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(fourth_double_word_48):
-	mov	60(%edi), %eax
-	sub	60(%esi), %eax
-	RETURN
-
-	.p2align 4
-L(return):
+	mov	60(%esi), %ecx
+	cmp	%ecx, 60(%edi)
+	jne	L(nequal)
 	RETURN
 
 	.p2align 4
 L(nequal):
 	mov	$1, %eax
-	ja	L(nequal_bigger)
+	jg	L(return)
 	neg	%eax
+	RETURN
 
-L(nequal_bigger):
+	.p2align 4
+L(return):
 	RETURN
 
 	.p2align 4
@@ -988,7 +1004,7 @@ L(equal):
 	.p2align 4
 L(neq):
 	mov	$1, %eax
-	ja	L(neq_bigger)
+	jg	L(neq_bigger)
 	neg	%eax
 
 L(neq_bigger):
@@ -999,5 +1015,5 @@ L(eq):
 	xorl	%eax, %eax
 	ret
 
-END (STRCMP)
+END (__wcscmp_sse2)
 #endif