From 310872dd13d57f1aaf0f84078214e523b6cb9ead Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 29 Jul 2015 04:49:38 -0700 Subject: Use SSE optimized strcmp in x86-64 ld.so Since ld.so preserves vector registers now, we can SSE optimized strcmp in x86-64 ld.so. * sysdeps/x86_64/strcmp.S: Remove "#if !IS_IN (libc)". --- sysdeps/x86_64/strcmp.S | 469 ++++++++++++++++++++++-------------------------- 1 file changed, 216 insertions(+), 253 deletions(-) diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S index 1329649d3a..1624b5de92 100644 --- a/sysdeps/x86_64/strcmp.S +++ b/sysdeps/x86_64/strcmp.S @@ -29,13 +29,6 @@ #endif #ifdef USE_AS_STRNCMP -/* The simplified code below is not set up to handle strncmp() so far. - Should this become necessary it has to be implemented. For now - just report the problem. */ -# if !IS_IN (libc) -# error "strncmp not implemented so far" -# endif - /* Since the counter, %r11, is unsigned, we branch to strcmp_exitz if the new counter > the old one or is 0. */ # define UPDATE_STRNCMP_COUNTER \ @@ -50,20 +43,10 @@ #elif defined USE_AS_STRCASECMP_L # include "locale-defines.h" -/* No support for strcasecmp outside libc so far since it is not needed. */ -# if !IS_IN (libc) -# error "strcasecmp_l not implemented so far" -# endif - # define UPDATE_STRNCMP_COUNTER #elif defined USE_AS_STRNCASECMP_L # include "locale-defines.h" -/* No support for strncasecmp outside libc so far since it is not needed. */ -# if !IS_IN (libc) -# error "strncasecmp_l not implemented so far" -# endif - # define UPDATE_STRNCMP_COUNTER \ /* calculate left number to compare */ \ lea -16(%rcx, %r11), %r9; \ @@ -126,63 +109,44 @@ libc_hidden_def (__strncasecmp) #endif ENTRY (STRCMP) -#if !IS_IN (libc) -/* Simple version since we can't use SSE registers in ld.so. */ -L(oop): movb (%rdi), %al - cmpb (%rsi), %al - jne L(neq) - incq %rdi - incq %rsi - testb %al, %al - jnz L(oop) - - xorl %eax, %eax - ret - -L(neq): movl $1, %eax - movl $-1, %ecx - cmovbl %ecx, %eax - ret -END (STRCMP) -#else /* !IS_IN (libc) */ -# ifdef USE_AS_STRCASECMP_L +#ifdef USE_AS_STRCASECMP_L /* We have to fall back on the C implementation for locales with encodings not matching ASCII for single bytes. */ -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rdx), %RAX_LP -# else +# else mov (%rdx), %RAX_LP -# endif +# endif testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) jne __strcasecmp_l_nonascii -# elif defined USE_AS_STRNCASECMP_L +#elif defined USE_AS_STRNCASECMP_L /* We have to fall back on the C implementation for locales with encodings not matching ASCII for single bytes. */ -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rcx), %RAX_LP -# else +# else mov (%rcx), %RAX_LP -# endif +# endif testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) jne __strncasecmp_l_nonascii -# endif +#endif /* * This implementation uses SSE to compare up to 16 bytes at a time. */ -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L test %rdx, %rdx je LABEL(strcmp_exitz) cmp $1, %rdx je LABEL(Byte0) mov %rdx, %r11 -# endif +#endif mov %esi, %ecx mov %edi, %eax /* Use 64bit AND here to avoid long NOP padding. */ and $0x3f, %rcx /* rsi alignment in cache line */ and $0x3f, %rax /* rdi alignment in cache line */ -# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L .section .rodata.cst16,"aM",@progbits,16 .align 16 .Lbelowupper: @@ -196,12 +160,12 @@ END (STRCMP) .quad 0x2020202020202020 .previous movdqa .Lbelowupper(%rip), %xmm5 -# define UCLOW_reg %xmm5 +# define UCLOW_reg %xmm5 movdqa .Ltopupper(%rip), %xmm6 -# define UCHIGH_reg %xmm6 +# define UCHIGH_reg %xmm6 movdqa .Ltouppermask(%rip), %xmm7 -# define LCQWORD_reg %xmm7 -# endif +# define LCQWORD_reg %xmm7 +#endif cmp $0x30, %ecx ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */ cmp $0x30, %eax @@ -210,8 +174,8 @@ END (STRCMP) movlpd (%rsi), %xmm2 movhpd 8(%rdi), %xmm1 movhpd 8(%rsi), %xmm2 -# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# define TOLOWER(reg1, reg2) \ +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# define TOLOWER(reg1, reg2) \ movdqa reg1, %xmm8; \ movdqa UCHIGH_reg, %xmm9; \ movdqa reg2, %xmm10; \ @@ -227,9 +191,9 @@ END (STRCMP) por %xmm8, reg1; \ por %xmm10, reg2 TOLOWER (%xmm1, %xmm2) -# else -# define TOLOWER(reg1, reg2) -# endif +#else +# define TOLOWER(reg1, reg2) +#endif pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ pcmpeqb %xmm1, %xmm0 /* Any null chars? */ pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */ @@ -237,10 +201,10 @@ END (STRCMP) pmovmskb %xmm1, %edx sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */ jnz LABEL(less16bytes) /* If not, find different value or null char */ -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) /* finish comparision */ -# endif +#endif add $16, %rsi /* prepare to search next 16 bytes */ add $16, %rdi /* prepare to search next 16 bytes */ @@ -282,13 +246,13 @@ LABEL(ashr_0): movdqa (%rsi), %xmm1 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */ pcmpeqb %xmm1, %xmm0 /* Any null chars? */ -# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */ -# else +#else movdqa (%rdi), %xmm2 TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */ -# endif +#endif psubb %xmm0, %xmm1 /* packed sub of comparison results*/ pmovmskb %xmm1, %r9d shr %cl, %edx /* adjust 0xffff for offset */ @@ -321,10 +285,10 @@ LABEL(loop_ashr_0): sub $0xffff, %edx jnz LABEL(exit) /* mismatch or null char seen */ -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa (%rsi, %rcx), %xmm1 movdqa (%rdi, %rcx), %xmm2 @@ -336,10 +300,10 @@ LABEL(loop_ashr_0): pmovmskb %xmm1, %edx sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx jmp LABEL(loop_ashr_0) @@ -388,13 +352,13 @@ LABEL(gobble_ashr_1): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 /* store for next cycle */ -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $1, %xmm3 pslldq $15, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -404,10 +368,10 @@ LABEL(gobble_ashr_1): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -418,13 +382,13 @@ LABEL(gobble_ashr_1): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 /* store for next cycle */ -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $1, %xmm3 pslldq $15, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -434,10 +398,10 @@ LABEL(gobble_ashr_1): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 jmp LABEL(loop_ashr_1) @@ -453,10 +417,10 @@ LABEL(nibble_ashr_1): test $0xfffe, %edx jnz LABEL(ashr_1_exittail) /* find null char*/ -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $15, %r11 jbe LABEL(ashr_1_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 /* substract 4K from %r10 */ @@ -518,13 +482,13 @@ LABEL(gobble_ashr_2): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $2, %xmm3 pslldq $14, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -534,10 +498,10 @@ LABEL(gobble_ashr_2): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -549,13 +513,13 @@ LABEL(gobble_ashr_2): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $2, %xmm3 pslldq $14, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -565,10 +529,10 @@ LABEL(gobble_ashr_2): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -581,10 +545,10 @@ LABEL(nibble_ashr_2): test $0xfffc, %edx jnz LABEL(ashr_2_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $14, %r11 jbe LABEL(ashr_2_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -643,13 +607,13 @@ LABEL(gobble_ashr_3): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $3, %xmm3 pslldq $13, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -659,10 +623,10 @@ LABEL(gobble_ashr_3): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -674,13 +638,13 @@ LABEL(gobble_ashr_3): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $3, %xmm3 pslldq $13, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -690,10 +654,10 @@ LABEL(gobble_ashr_3): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -706,10 +670,10 @@ LABEL(nibble_ashr_3): test $0xfff8, %edx jnz LABEL(ashr_3_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $13, %r11 jbe LABEL(ashr_3_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -768,13 +732,13 @@ LABEL(gobble_ashr_4): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $4, %xmm3 pslldq $12, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -784,10 +748,10 @@ LABEL(gobble_ashr_4): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -799,13 +763,13 @@ LABEL(gobble_ashr_4): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $4, %xmm3 pslldq $12, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -815,10 +779,10 @@ LABEL(gobble_ashr_4): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -831,10 +795,10 @@ LABEL(nibble_ashr_4): test $0xfff0, %edx jnz LABEL(ashr_4_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $12, %r11 jbe LABEL(ashr_4_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -893,13 +857,13 @@ LABEL(gobble_ashr_5): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $5, %xmm3 pslldq $11, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -909,10 +873,10 @@ LABEL(gobble_ashr_5): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -924,13 +888,13 @@ LABEL(gobble_ashr_5): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $5, %xmm3 pslldq $11, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -940,10 +904,10 @@ LABEL(gobble_ashr_5): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -956,10 +920,10 @@ LABEL(nibble_ashr_5): test $0xffe0, %edx jnz LABEL(ashr_5_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $11, %r11 jbe LABEL(ashr_5_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1018,13 +982,13 @@ LABEL(gobble_ashr_6): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $6, %xmm3 pslldq $10, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1034,10 +998,10 @@ LABEL(gobble_ashr_6): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1049,13 +1013,13 @@ LABEL(gobble_ashr_6): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $6, %xmm3 pslldq $10, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1065,10 +1029,10 @@ LABEL(gobble_ashr_6): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1081,10 +1045,10 @@ LABEL(nibble_ashr_6): test $0xffc0, %edx jnz LABEL(ashr_6_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $10, %r11 jbe LABEL(ashr_6_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1143,13 +1107,13 @@ LABEL(gobble_ashr_7): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $7, %xmm3 pslldq $9, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1159,10 +1123,10 @@ LABEL(gobble_ashr_7): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1174,13 +1138,13 @@ LABEL(gobble_ashr_7): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $7, %xmm3 pslldq $9, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1190,10 +1154,10 @@ LABEL(gobble_ashr_7): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1206,10 +1170,10 @@ LABEL(nibble_ashr_7): test $0xff80, %edx jnz LABEL(ashr_7_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $9, %r11 jbe LABEL(ashr_7_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1268,13 +1232,13 @@ LABEL(gobble_ashr_8): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $8, %xmm3 pslldq $8, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1284,10 +1248,10 @@ LABEL(gobble_ashr_8): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1299,13 +1263,13 @@ LABEL(gobble_ashr_8): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $8, %xmm3 pslldq $8, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1315,10 +1279,10 @@ LABEL(gobble_ashr_8): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1331,10 +1295,10 @@ LABEL(nibble_ashr_8): test $0xff00, %edx jnz LABEL(ashr_8_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $8, %r11 jbe LABEL(ashr_8_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1393,13 +1357,13 @@ LABEL(gobble_ashr_9): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $9, %xmm3 pslldq $7, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1409,10 +1373,10 @@ LABEL(gobble_ashr_9): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1424,13 +1388,13 @@ LABEL(gobble_ashr_9): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $9, %xmm3 pslldq $7, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1440,10 +1404,10 @@ LABEL(gobble_ashr_9): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 /* store for next cycle */ @@ -1456,10 +1420,10 @@ LABEL(nibble_ashr_9): test $0xfe00, %edx jnz LABEL(ashr_9_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $7, %r11 jbe LABEL(ashr_9_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1518,13 +1482,13 @@ LABEL(gobble_ashr_10): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $10, %xmm3 pslldq $6, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1534,10 +1498,10 @@ LABEL(gobble_ashr_10): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1549,13 +1513,13 @@ LABEL(gobble_ashr_10): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $10, %xmm3 pslldq $6, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1565,10 +1529,10 @@ LABEL(gobble_ashr_10): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1581,10 +1545,10 @@ LABEL(nibble_ashr_10): test $0xfc00, %edx jnz LABEL(ashr_10_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $6, %r11 jbe LABEL(ashr_10_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1643,13 +1607,13 @@ LABEL(gobble_ashr_11): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $11, %xmm3 pslldq $5, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1659,10 +1623,10 @@ LABEL(gobble_ashr_11): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1674,13 +1638,13 @@ LABEL(gobble_ashr_11): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $11, %xmm3 pslldq $5, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1690,10 +1654,10 @@ LABEL(gobble_ashr_11): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1706,10 +1670,10 @@ LABEL(nibble_ashr_11): test $0xf800, %edx jnz LABEL(ashr_11_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $5, %r11 jbe LABEL(ashr_11_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1768,13 +1732,13 @@ LABEL(gobble_ashr_12): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $12, %xmm3 pslldq $4, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1784,10 +1748,10 @@ LABEL(gobble_ashr_12): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1799,13 +1763,13 @@ LABEL(gobble_ashr_12): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $12, %xmm3 pslldq $4, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1815,10 +1779,10 @@ LABEL(gobble_ashr_12): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1831,10 +1795,10 @@ LABEL(nibble_ashr_12): test $0xf000, %edx jnz LABEL(ashr_12_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $4, %r11 jbe LABEL(ashr_12_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -1893,13 +1857,13 @@ LABEL(gobble_ashr_13): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $13, %xmm3 pslldq $3, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1909,10 +1873,10 @@ LABEL(gobble_ashr_13): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1924,13 +1888,13 @@ LABEL(gobble_ashr_13): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $13, %xmm3 pslldq $3, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -1940,10 +1904,10 @@ LABEL(gobble_ashr_13): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -1956,10 +1920,10 @@ LABEL(nibble_ashr_13): test $0xe000, %edx jnz LABEL(ashr_13_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $3, %r11 jbe LABEL(ashr_13_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -2018,13 +1982,13 @@ LABEL(gobble_ashr_14): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $14, %xmm3 pslldq $2, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -2034,10 +1998,10 @@ LABEL(gobble_ashr_14): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -2049,13 +2013,13 @@ LABEL(gobble_ashr_14): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $14, %xmm3 pslldq $2, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -2065,10 +2029,10 @@ LABEL(gobble_ashr_14): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -2081,10 +2045,10 @@ LABEL(nibble_ashr_14): test $0xc000, %edx jnz LABEL(ashr_14_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $2, %r11 jbe LABEL(ashr_14_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -2145,13 +2109,13 @@ LABEL(gobble_ashr_15): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $15, %xmm3 pslldq $1, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -2161,10 +2125,10 @@ LABEL(gobble_ashr_15): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -2176,13 +2140,13 @@ LABEL(gobble_ashr_15): movdqa (%rdi, %rcx), %xmm2 movdqa %xmm2, %xmm4 -# ifndef USE_SSSE3 +#ifndef USE_SSSE3 psrldq $15, %xmm3 pslldq $1, %xmm2 por %xmm3, %xmm2 /* merge into one 16byte value */ -# else +#else palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */ -# endif +#endif TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 @@ -2192,10 +2156,10 @@ LABEL(gobble_ashr_15): sub $0xffff, %edx jnz LABEL(exit) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif add $16, %rcx movdqa %xmm4, %xmm3 @@ -2208,10 +2172,10 @@ LABEL(nibble_ashr_15): test $0x8000, %edx jnz LABEL(ashr_15_exittail) -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmpq $1, %r11 jbe LABEL(ashr_15_exittail) -# endif +#endif pxor %xmm0, %xmm0 sub $0x1000, %r10 @@ -2246,18 +2210,18 @@ LABEL(ret): LABEL(less16bytes): bsf %rdx, %rdx /* find and store bit index in %rdx */ -# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub %rdx, %r11 jbe LABEL(strcmp_exitz) -# endif +#endif movzbl (%rsi, %rdx), %ecx movzbl (%rdi, %rdx), %eax -# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx movl (%rdx,%rcx,4), %ecx movl (%rdx,%rax,4), %eax -# endif +#endif sub %ecx, %eax ret @@ -2271,11 +2235,11 @@ LABEL(Byte0): movzx (%rsi), %ecx movzx (%rdi), %eax -# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx movl (%rdx,%rcx,4), %ecx movl (%rdx,%rax,4), %eax -# endif +#endif sub %ecx, %eax ret @@ -2300,5 +2264,4 @@ LABEL(unaligned_table): .int LABEL(ashr_14) - LABEL(unaligned_table) .int LABEL(ashr_15) - LABEL(unaligned_table) .int LABEL(ashr_0) - LABEL(unaligned_table) -#endif /* !IS_IN (libc) */ libc_hidden_builtin_def (STRCMP) -- cgit 1.4.1