about summary refs log tree commit diff
path: root/sysdeps/i386/i686/multiarch/strcmp-sse4.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/i386/i686/multiarch/strcmp-sse4.S')
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp-sse4.S534
1 files changed, 461 insertions, 73 deletions
diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
index 0de0a113c0..1df63e3156 100644
--- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S
+++ b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
@@ -1,5 +1,5 @@
 /* strcmp with SSE4.2
-   Copyright (C) 2010 Free Software Foundation, Inc.
+   Copyright (C) 2010, 2011 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -34,33 +34,156 @@
 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
 #define POP(REG)	popl REG; CFI_POP (REG)
 
-#ifndef USE_AS_STRNCMP
+#ifdef USE_AS_STRNCMP
 # ifndef STRCMP
-#  define STRCMP	__strcmp_sse4_2
+#  define STRCMP	__strncmp_sse4_2
 # endif
-# define STR1		4
+# define STR1		8
 # define STR2		STR1+4
-# define RETURN		ret; .p2align 4
-#else
+# define CNT		STR2+4
+# define RETURN		POP (REM); ret; .p2align 4; CFI_PUSH (REM)
+# define REM		%ebp
+#elif defined USE_AS_STRCASECMP_L
+# include "locale-defines.h"
 # ifndef STRCMP
-#  define STRCMP	__strncmp_sse4_2
+#  define STRCMP	__strcasecmp_l_sse4_2
 # endif
-# define STR1		8
+# define STR1		12
+# define STR2		STR1+4
+# define LOCALE		12	/* Loaded before the adjustement.  */
+# ifdef PIC
+#  define RETURN	POP (%edi); POP (%ebx); ret; \
+			.p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi)
+# else
+#  define RETURN	POP (%edi); ret; .p2align 4; CFI_PUSH (%edi)
+# endif
+# define NONASCII	__strcasecmp_nonascii
+#elif defined USE_AS_STRNCASECMP_L
+# include "locale-defines.h"
+# ifndef STRCMP
+#  define STRCMP	__strncasecmp_l_sse4_2
+# endif
+# define STR1		16
 # define STR2		STR1+4
 # define CNT		STR2+4
-# define RETURN		POP (%ebp); ret; .p2align 4; CFI_PUSH (%ebp)
+# define LOCALE		16	/* Loaded before the adjustement.  */
+# ifdef PIC
+#  define RETURN	POP (%edi); POP (REM); POP (%ebx); ret; \
+			.p2align 4; \
+			CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi)
+# else
+#  define RETURN	POP (%edi); POP (REM); ret; \
+			.p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi)
+# endif
+# define REM		%ebp
+# define NONASCII	__strncasecmp_nonascii
+#else
+# ifndef STRCMP
+#  define STRCMP	__strcmp_sse4_2
+# endif
+# define STR1		4
+# define STR2		STR1+4
+# define RETURN		ret; .p2align 4
 #endif
 
 	.section .text.sse4.2,"ax",@progbits
-ENTRY (STRCMP)
-#ifdef USE_AS_STRNCMP
-	PUSH	(%ebp)
+
+#ifdef USE_AS_STRCASECMP_L
+ENTRY (__strcasecmp_sse4_2)
+# ifdef PIC
+	PUSH	(%ebx)
+	call	__i686.get_pc_thunk.bx
+	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
+	movl	__libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
+# else
+	movl	__libc_tsd_LOCALE@NTPOFF, %eax
+# endif
+	movl	%gs:(%eax), %eax
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+	movl	LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
+# else
+	movl	(%eax), %eax
+# endif
+	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
+	jne	__strcasecmp_nonascii
+	jmp	L(ascii)
+END (__strcasecmp_sse4_2)
+#endif
+
+#ifdef USE_AS_STRNCASECMP_L
+ENTRY (__strncasecmp_sse4_2)
+# ifdef PIC
+	PUSH	(%ebx)
+	call	__i686.get_pc_thunk.bx
+	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
+	movl	__libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
+# else
+	movl	__libc_tsd_LOCALE@NTPOFF, %eax
+# endif
+	movl	%gs:(%eax), %eax
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+	movl	LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
+# else
+	movl	(%eax), %eax
+# endif
+	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
+	jne	__strncasecmp_nonascii
+	jmp	L(ascii)
+END (__strncasecmp_sse4_2)
+#endif
+
+	ENTRY (STRCMP)
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movl	LOCALE(%esp), %eax
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+	movl	LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
+# else
+	movl	(%eax), %eax
+# endif
+	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
+	jne	NONASCII
+
+# ifdef PIC
+	PUSH	(%ebx)
+	call	__i686.get_pc_thunk.bx
+	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
+# endif
+L(ascii):
+	.section .rodata.cst16,"aM",@progbits,16
+	.align 16
+.Lbelowupper:
+	.quad	0x4040404040404040
+	.quad	0x4040404040404040
+.Ltopupper:
+	.quad	0x5b5b5b5b5b5b5b5b
+	.quad	0x5b5b5b5b5b5b5b5b
+.Ltouppermask:
+	.quad	0x2020202020202020
+	.quad	0x2020202020202020
+	.previous
+
+# ifdef PIC
+#  define UCLOW_reg .Lbelowupper@GOTOFF(%ebx)
+#  define UCHIGH_reg .Ltopupper@GOTOFF(%ebx)
+#  define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx)
+# else
+#  define UCLOW_reg .Lbelowupper
+#  define UCHIGH_reg .Ltopupper
+#  define LCQWORD_reg .Ltouppermask
+# endif
+#endif
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	PUSH	(REM)
+#endif
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	PUSH	(%edi)
 #endif
 	mov	STR1(%esp), %edx
 	mov	STR2(%esp), %eax
-#ifdef USE_AS_STRNCMP
-	movl	CNT(%esp), %ebp
-	test	%ebp, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	movl	CNT(%esp), REM
+	test	REM, REM
 	je	L(eq)
 #endif
 	mov	%dx, %cx
@@ -72,10 +195,40 @@ ENTRY (STRCMP)
 	and	$0xfff, %ecx
 	cmp	$0xff0, %ecx
 	ja	L(first4bytes)
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# define TOLOWER(reg1, reg2) \
+	movdqa	reg1, %xmm3;						      \
+	movdqa	UCHIGH_reg, %xmm4;					      \
+	movdqa	reg2, %xmm5;						      \
+	movdqa	UCHIGH_reg, %xmm6;					      \
+	pcmpgtb	UCLOW_reg, %xmm3;					      \
+	pcmpgtb	reg1, %xmm4;						      \
+	pcmpgtb	UCLOW_reg, %xmm5;					      \
+	pcmpgtb	reg2, %xmm6;						      \
+	pand	%xmm4, %xmm3;						      \
+	pand	%xmm6, %xmm5;						      \
+	pand	LCQWORD_reg, %xmm3;					      \
+	pand	LCQWORD_reg, %xmm5;					      \
+	por	%xmm3, reg1;						      \
+	por	%xmm5, reg2
+
+	movdqu	(%eax), %xmm1
+	TOLOWER (%xmm2, %xmm1)
+	movd	%xmm2, %ecx
+	movd	%xmm1, %edi
+	movdqa	%xmm2, %xmm3
+	movdqa	%xmm1, %xmm4
+	cmpl	%edi, %ecx
+#else
+# define TOLOWER(reg1, reg)
+
 	movd	%xmm2, %ecx
 	cmp	(%eax), %ecx
+#endif
 	jne	L(less4bytes)
+#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	movdqu	(%eax), %xmm1
+#endif
 	pxor	%xmm2, %xmm1
 	pxor	%xmm0, %xmm0
 	ptest	%xmm1, %xmm0
@@ -84,113 +237,210 @@ ENTRY (STRCMP)
 	ptest	%xmm2, %xmm0
 	jnc	L(less16bytes)
 
-#ifdef USE_AS_STRNCMP
-	sub	$16, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	sub	$16, REM
 	jbe	L(eq)
 #endif
 	add	$16, %edx
 	add	$16, %eax
 L(first4bytes):
 	movzbl	(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, (%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
-#ifdef USE_AS_STRNCMP
-	cmp	$1, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$1, REM
 	je	L(eq)
 #endif
 
 	movzbl	1(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	1(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 1(%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
-#ifdef USE_AS_STRNCMP
-	cmp	$2, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$2, REM
 	je	L(eq)
 #endif
 	movzbl	2(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	2(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 2(%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
-#ifdef USE_AS_STRNCMP
-	cmp	$3, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$3, REM
 	je	L(eq)
 #endif
 	movzbl	3(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	3(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 3(%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
-#ifdef USE_AS_STRNCMP
-	cmp	$4, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$4, REM
 	je	L(eq)
 #endif
 	movzbl	4(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	4(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 4(%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
-#ifdef USE_AS_STRNCMP
-	cmp	$5, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$5, REM
 	je	L(eq)
 #endif
 	movzbl	5(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	5(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 5(%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
-#ifdef USE_AS_STRNCMP
-	cmp	$6, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$6, REM
 	je	L(eq)
 #endif
 	movzbl	6(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	6(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 6(%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
-#ifdef USE_AS_STRNCMP
-	cmp	$7, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$7, REM
 	je	L(eq)
 #endif
 	movzbl	7(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	7(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 7(%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
-#ifdef USE_AS_STRNCMP
-	sub	$8, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	sub	$8, REM
 	je	L(eq)
 #endif
 	add	$8, %eax
 	add	$8, %edx
 
-	PUSH	(%ebx)
+#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	PUSH	(%edi)
+#endif
 	PUSH	(%esi)
-#ifdef USE_AS_STRNCMP
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cfi_remember_state
 #endif
 	mov	%edx, %edi
 	mov	%eax, %esi
 	xorl	%eax, %eax
 L(check_offset):
-	movl	%edi, %ebx
+	movl	%edi, %edx
 	movl	%esi, %ecx
-	andl	$0xfff, %ebx
+	andl	$0xfff, %edx
 	andl	$0xfff, %ecx
-	cmpl	%ebx, %ecx
-	cmovl	%ebx, %ecx
+	cmpl	%edx, %ecx
+	cmovl	%edx, %ecx
 	lea	-0xff0(%ecx), %edx
 	sub	%edx, %edi
 	sub	%edx, %esi
@@ -199,11 +449,12 @@ L(check_offset):
 L(loop):
 	movdqu	(%esi,%edx), %xmm2
 	movdqu	(%edi,%edx), %xmm1
+	TOLOWER (%xmm2, %xmm1)
 	pcmpistri	$0x1a, %xmm2, %xmm1
 	jbe	L(end)
 
-#ifdef USE_AS_STRNCMP
-	sub	$16, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	sub	$16, REM
 	jbe	L(more16byteseq)
 #endif
 
@@ -211,13 +462,22 @@ L(loop):
 	jle	L(loop)
 L(crosspage):
 	movzbl	(%edi,%edx), %eax
-	movzbl	(%esi,%edx), %ebx
-	subl	%ebx, %eax
+	movzbl	(%esi,%edx), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+# endif
+#endif
+	subl	%ecx, %eax
 	jne	L(ret)
-	testl	%ebx, %ebx
+	testl	%ecx, %ecx
 	je	L(ret)
-#ifdef USE_AS_STRNCMP
-	sub	$1, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	sub	$1, REM
 	jbe	L(more16byteseq)
 #endif
 	inc	%edx
@@ -230,30 +490,44 @@ L(crosspage):
 	.p2align 4
 L(end):
 	jnc	L(ret)
-#ifdef USE_AS_STRNCMP
-	sub	%ecx, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	sub	%ecx, REM
 	jbe	L(more16byteseq)
 #endif
-	lea	(%ecx,%edx), %ebx
-	movzbl	(%edi,%ebx), %eax
-	movzbl	(%esi,%ebx), %ecx
+	lea	(%ecx,%edx), %ecx
+	movzbl	(%edi,%ecx), %eax
+	movzbl	(%esi,%ecx), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+# endif
+#endif
 	subl	%ecx, %eax
 L(ret):
 	POP	(%esi)
 	POP	(%edi)
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	POP	(REM)
+#endif
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# ifdef PIC
 	POP	(%ebx)
-#ifdef USE_AS_STRNCMP
-	POP	(%ebp)
+# endif
 #endif
 	ret
 
 	.p2align 4
-#ifdef USE_AS_STRNCMP
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cfi_restore_state
 L(more16byteseq):
 	POP	(%esi)
+# ifdef USE_AS_STRNCMP
 	POP	(%edi)
-	POP	(%ebx)
+# endif
 #endif
 L(eq):
 	xorl	%eax, %eax
@@ -269,27 +543,45 @@ L(neq_bigger):
 L(less16bytes):
 	add	$0xfefefeff, %ecx
 	jnc	L(less4bytes)
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movd	%xmm3, %edi
+	xor	%edi, %ecx
+#else
 	xor	(%edx), %ecx
+#endif
 	or	$0xfefefeff, %ecx
 	add	$1, %ecx
 	jnz	L(less4bytes)
 
-#ifdef USE_AS_STRNCMP
-	cmp	$4, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$4, REM
 	jbe	L(eq)
 #endif
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	psrldq	$4, %xmm3
+	psrldq	$4, %xmm4
+	movd	%xmm3, %ecx
+	movd	%xmm4, %edi
+	cmp	%edi, %ecx
+	mov	%ecx, %edi
+#else
 	mov	4(%edx), %ecx
 	cmp	4(%eax), %ecx
+#endif
 	jne	L(more4bytes)
 	add	$0xfefefeff, %ecx
 	jnc	L(more4bytes)
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	xor	%edi, %ecx
+#else
 	xor	4(%edx), %ecx
+#endif
 	or	$0xfefefeff, %ecx
 	add	$1, %ecx
 	jnz	L(more4bytes)
 
-#ifdef USE_AS_STRNCMP
-	sub	$8, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	sub	$8, REM
 	jbe	L(eq)
 #endif
 
@@ -298,80 +590,176 @@ L(less16bytes):
 L(less4bytes):
 
 	movzbl	(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, (%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
-#ifdef USE_AS_STRNCMP
-	cmp	$1, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$1, REM
 	je	L(eq)
 #endif
 	movzbl	1(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	1(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 1(%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
-#ifdef USE_AS_STRNCMP
-	cmp	$2, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$2, REM
 	je	L(eq)
 #endif
 
 	movzbl	2(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	2(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 2(%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
-#ifdef USE_AS_STRNCMP
-	cmp	$3, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$3, REM
 	je	L(eq)
 #endif
 	movzbl	3(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	3(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 3(%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
 L(more4bytes):
-#ifdef USE_AS_STRNCMP
-	cmp	$4, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$4, REM
 	je	L(eq)
 #endif
 	movzbl	4(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	4(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 4(%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
 
-#ifdef USE_AS_STRNCMP
-	cmp	$5, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$5, REM
 	je	L(eq)
 #endif
 	movzbl	5(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	5(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 5(%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
-#ifdef USE_AS_STRNCMP
-	cmp	$6, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$6, REM
 	je	L(eq)
 #endif
 	movzbl	6(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	6(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 6(%edx)
+#endif
 	jne	L(neq)
 	cmpl	$0, %ecx
 	je	L(eq)
 
-#ifdef USE_AS_STRNCMP
-	cmp	$7, %ebp
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+	cmp	$7, REM
 	je	L(eq)
 #endif
 	movzbl	7(%eax), %ecx
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+	movzbl	7(%edx), %edi
+# ifdef PIC
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
+# else
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
+	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
+# endif
+	cmpl	%ecx, %edi
+#else
 	cmpb	%cl, 7(%edx)
+#endif
 	jne	L(neq)
 	jmp	L(eq)