about summary refs log tree commit diff
diff options
context:
space:
mode:
authorOndřej Bílka <neleai@seznam.cz>2015-06-26 22:19:29 +0200
committerOndřej Bílka <neleai@seznam.cz>2015-06-26 22:19:29 +0200
commit165308eb2c66542c88d002d63dc68df112f5c818 (patch)
tree661b192903d8ad1638deab4250b36eab3653fa56
parent0b69916d3c02dfab7987e26325a100815217faa1 (diff)
downloadglibc-165308eb2c66542c88d002d63dc68df112f5c818.tar.gz
glibc-165308eb2c66542c88d002d63dc68df112f5c818.tar.xz
glibc-165308eb2c66542c88d002d63dc68df112f5c818.zip
Optimize sse4 strspn/strcspn/strpbrk
-rw-r--r--sysdeps/x86_64/multiarch/Makefile6
-rw-r--r--sysdeps/x86_64/multiarch/strcspn-c.c173
-rw-r--r--sysdeps/x86_64/multiarch/strcspn_sse42.S3
-rw-r--r--sysdeps/x86_64/multiarch/strpbrk-c.c8
-rw-r--r--sysdeps/x86_64/multiarch/strpbrk_sse42.S204
-rw-r--r--sysdeps/x86_64/multiarch/strspn-c.c145
-rw-r--r--sysdeps/x86_64/multiarch/strspn_sse42.S3
-rw-r--r--sysdeps/x86_64/multiarch/varshift.c25
-rw-r--r--sysdeps/x86_64/multiarch/varshift.h30
9 files changed, 211 insertions, 386 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 80941621eb..05d5c9ba30 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -22,11 +22,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
 		   strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned
 
 ifeq (yes,$(config-cflags-sse4))
-sysdep_routines += strcspn-c strpbrk-c strspn-c varshift
-CFLAGS-varshift.c += -msse4
-CFLAGS-strcspn-c.c += -msse4
-CFLAGS-strpbrk-c.c += -msse4
-CFLAGS-strspn-c.c += -msse4
+sysdep_routines += strcspn_sse42 strpbrk_sse42 strspn_sse42
 endif
 
 ifeq (yes,$(config-cflags-avx2))
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
deleted file mode 100644
index 60b2ed7a3f..0000000000
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/* strcspn with SSE4.2 intrinsics
-   Copyright (C) 2009-2015 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <nmmintrin.h>
-#include <string.h>
-#include "varshift.h"
-
-/* We use 0x2:
-	_SIDD_SBYTE_OPS
-	| _SIDD_CMP_EQUAL_ANY
-	| _SIDD_POSITIVE_POLARITY
-	| _SIDD_LEAST_SIGNIFICANT
-   on pcmpistri to compare xmm/mem128
-
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   X X X X X X X X X X X X X X X X
-
-   against xmm
-
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   A A A A A A A A A A A A A A A A
-
-   to find out if the first 16byte data element has any byte A and
-   the offset of the first byte.  There are 3 cases:
-
-   1. The first 16byte data element has the byte A at the offset X.
-   2. The first 16byte data element has EOS and doesn't have the byte A.
-   3. The first 16byte data element is valid and doesn't have the byte A.
-
-   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
-
-    1		 X	  1	 0/1	  0
-    2		16	  0	  1	  0
-    3		16	  0	  0	  0
-
-   We exit from the loop for cases 1 and 2 with jbe which branches
-   when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
-   X for case 1.  */
-
-#ifndef STRCSPN_SSE2
-# define STRCSPN_SSE2 __strcspn_sse2
-# define STRCSPN_SSE42 __strcspn_sse42
-#endif
-
-#ifdef USE_AS_STRPBRK
-# define RETURN(val1, val2) return val1
-#else
-# define RETURN(val1, val2) return val2
-#endif
-
-extern
-#ifdef USE_AS_STRPBRK
-char *
-#else
-size_t
-#endif
-STRCSPN_SSE2 (const char *, const char *);
-
-
-#ifdef USE_AS_STRPBRK
-char *
-#else
-size_t
-#endif
-__attribute__ ((section (".text.sse4.2")))
-STRCSPN_SSE42 (const char *s, const char *a)
-{
-  if (*a == 0)
-    RETURN (NULL, strlen (s));
-
-  const char *aligned;
-  __m128i mask;
-  int offset = (int) ((size_t) a & 15);
-  if (offset != 0)
-    {
-      /* Load masks.  */
-      aligned = (const char *) ((size_t) a & -16L);
-      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
-
-      mask = __m128i_shift_right (mask0, offset);
-
-      /* Find where the NULL terminator is.  */
-      int length = _mm_cmpistri (mask, mask, 0x3a);
-      if (length == 16 - offset)
-	{
-	  /* There is no NULL terminator.  */
-	  __m128i mask1 = _mm_load_si128 ((__m128i *) (aligned + 16));
-	  int index = _mm_cmpistri (mask1, mask1, 0x3a);
-	  length += index;
-
-	  /* Don't use SSE4.2 if the length of A > 16.  */
-	  if (length > 16)
-	    return STRCSPN_SSE2 (s, a);
-
-	  if (index != 0)
-	    {
-	      /* Combine mask0 and mask1.  We could play games with
-		 palignr, but frankly this data should be in L1 now
-		 so do the merge via an unaligned load.  */
-	      mask = _mm_loadu_si128 ((__m128i *) a);
-	    }
-	}
-    }
-  else
-    {
-      /* A is aligned.  */
-      mask = _mm_load_si128 ((__m128i *) a);
-
-      /* Find where the NULL terminator is.  */
-      int length = _mm_cmpistri (mask, mask, 0x3a);
-      if (length == 16)
-	{
-	  /* There is no NULL terminator.  Don't use SSE4.2 if the length
-	     of A > 16.  */
-	  if (a[16] != 0)
-	    return STRCSPN_SSE2 (s, a);
-	}
-    }
-
-  offset = (int) ((size_t) s & 15);
-  if (offset != 0)
-    {
-      /* Check partial string.  */
-      aligned = (const char *) ((size_t) s & -16L);
-      __m128i value = _mm_load_si128 ((__m128i *) aligned);
-
-      value = __m128i_shift_right (value, offset);
-
-      int length = _mm_cmpistri (mask, value, 0x2);
-      /* No need to check ZFlag since ZFlag is always 1.  */
-      int cflag = _mm_cmpistrc (mask, value, 0x2);
-      if (cflag)
-	RETURN ((char *) (s + length), length);
-      /* Find where the NULL terminator is.  */
-      int index = _mm_cmpistri (value, value, 0x3a);
-      if (index < 16 - offset)
-	RETURN (NULL, index);
-      aligned += 16;
-    }
-  else
-    aligned = s;
-
-  while (1)
-    {
-      __m128i value = _mm_load_si128 ((__m128i *) aligned);
-      int index = _mm_cmpistri (mask, value, 0x2);
-      int cflag = _mm_cmpistrc (mask, value, 0x2);
-      int zflag = _mm_cmpistrz (mask, value, 0x2);
-      if (cflag)
-	RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
-      if (zflag)
-	RETURN (NULL,
-		/* Find where the NULL terminator is.  */
-		(size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
-      aligned += 16;
-    }
-}
diff --git a/sysdeps/x86_64/multiarch/strcspn_sse42.S b/sysdeps/x86_64/multiarch/strcspn_sse42.S
new file mode 100644
index 0000000000..3e4e659937
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcspn_sse42.S
@@ -0,0 +1,3 @@
+#define AS_STRCSPN
+#define __strpbrk_sse42 __strcspn_sse42
+#include "strpbrk_sse42.S"
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c.c
deleted file mode 100644
index bbf5c49d89..0000000000
--- a/sysdeps/x86_64/multiarch/strpbrk-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-/* Don't define multiple versions for strpbrk in static library since we
-   need strpbrk before the initialization happened.  */
-#ifdef SHARED
-# define USE_AS_STRPBRK
-# define STRCSPN_SSE2 __strpbrk_sse2
-# define STRCSPN_SSE42 __strpbrk_sse42
-# include "strcspn-c.c"
-#endif
diff --git a/sysdeps/x86_64/multiarch/strpbrk_sse42.S b/sysdeps/x86_64/multiarch/strpbrk_sse42.S
new file mode 100644
index 0000000000..512ac19d68
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strpbrk_sse42.S
@@ -0,0 +1,204 @@
+/* strcspn (str, ss) -- Return the length of the initial segment of STR
+			which contains no characters from SS.
+   Copyright (C) 1994-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#ifdef AS_STRSPN
+# define AS_STRCSPN
+# define MATCH_ALL $18
+#else
+# define MATCH_ALL $2
+#endif
+
+ENTRY(__strpbrk_sse42)
+	movq	%rdi, %rax
+	andl	$4095, %eax
+	cmp	$4032, %eax
+	ja	L(cross_page)
+	movq	%rsi, %rax
+	andl	$4095, %eax
+	cmp	$4080, %eax
+	ja	L(cross_page)
+	movdqu	(%rsi), %xmm4
+	movdqu	(%rdi), %xmm1
+	movdqu	16(%rdi), %xmm5
+	movdqu	32(%rdi), %xmm6
+	movdqu	48(%rdi), %xmm7
+
+L(back_from_crosspage):
+	pxor	%xmm3, %xmm3
+	pxor	%xmm2, %xmm2
+
+	pcmpeqb	%xmm4, %xmm2
+	pmovmskb %xmm2, %eax
+	testl	%eax, %eax
+	je	L(call)
+	pcmpistri MATCH_ALL, %xmm1, %xmm4
+	jc	L(rx0)
+	je	L(ret0)
+	pcmpistri MATCH_ALL, %xmm5, %xmm4
+	jc	L(rx16)
+	je	L(ret16)
+	pcmpistri MATCH_ALL, %xmm6, %xmm4
+	jc	L(rx32)
+	je	L(ret32)
+	pcmpistri MATCH_ALL, %xmm7, %xmm4
+	jc	L(rx48)
+	je	L(ret48)
+
+	movq	%rdi, %rax
+	andq	$-16, %rax
+	addq	$16, %rax
+	.p2align 4,,10
+	.p2align 3
+L(loop):
+	pcmpistri MATCH_ALL, (%rax), %xmm4
+	lea	16(%rax), %rax
+	jc	L(rx_loop)
+	jne	L(loop)
+#ifdef AS_STRCSPN
+	movdqa -16(%rax), %xmm1
+	pcmpistri $58, %xmm1, %xmm1
+	lea	-16(%rcx, %rax), %rax
+	sub	%rdi, %rax
+#else
+	xor	%eax, %eax
+#endif
+	ret
+L(rx_loop):
+	lea	-16(%rcx, %rax), %rax
+#ifdef AS_STRCSPN
+	sub	%rdi, %rax
+#endif
+	ret
+	.p2align 4,,10
+	.p2align 3
+#ifndef AS_STRCSPN
+L(ret0):
+L(ret16):
+L(ret32):
+L(ret48):
+	xorl	%eax, %eax
+	ret
+#endif
+L(call):
+#ifdef AS_STRCSPN
+# ifdef AS_STRSPN
+	jmp	__strspn_sse2
+# else
+	jmp	__strcspn_sse2
+# endif
+#else
+	jmp	__strpbrk_sse2
+#endif
+	.p2align 4,,10
+	.p2align 3
+#ifdef AS_STRCSPN
+L(ret0):
+	pcmpistri $58, %xmm1, %xmm1
+L(rx0):
+	lea	0(%rcx), %rax
+#else
+L(rx0):
+	leaq	(%rdi,%rcx), %rax
+#endif
+	ret
+#ifdef AS_STRCSPN
+L(ret16):
+	pcmpistri $58, %xmm5, %xmm5
+L(rx16):
+	lea	16(%rcx), %rax
+#else
+L(rx16):
+	leaq	16(%rdi,%rcx), %rax
+#endif
+	ret
+#ifdef AS_STRCSPN
+L(ret32):
+	pcmpistri $58, %xmm6, %xmm6
+L(rx32):
+	lea	32(%rcx), %rax
+#else
+L(rx32):
+	leaq	32(%rdi,%rcx), %rax
+#endif
+	ret
+#ifdef AS_STRCSPN
+L(ret48):
+	pcmpistri $58, %xmm7, %xmm7
+L(rx48):
+	lea	48(%rcx), %rax
+#else
+L(rx48):
+	leaq	48(%rdi,%rcx), %rax
+#endif
+	ret
+
+	.p2align 4,,10
+	.p2align 3
+L(cross_page):
+	movzbl	(%rdi), %ecx
+	xorl	%eax, %eax
+	leaq	-80(%rsp), %r8
+	testb	%cl, %cl
+	je	L(sloop_end)
+	leaq	-80(%rsp), %r8
+	xorl	%edx, %edx
+	xorl	%eax, %eax
+	.p2align 4,,10
+	.p2align 3
+L(sloop):
+	movb	%cl, (%r8,%rdx)
+	movzbl	1(%rdi,%rdx), %ecx
+	addl	$1, %eax
+	testb	%cl, %cl
+	je	L(sloop_end)
+	addq	$1, %rdx
+	cmpl	$64, %eax
+	jne	L(sloop)
+L(sloop_end):
+	movzbl	(%rsi), %ecx
+	cltq
+	movb	$0, -80(%rsp,%rax)
+	movdqu	(%r8), %xmm1
+	movdqu	16(%r8), %xmm5
+	movdqu	32(%r8), %xmm6
+	movdqu	48(%r8), %xmm7
+
+	xorl	%eax, %eax
+	testb	%cl, %cl
+	je	L(aloop_end)
+	xorl	%edx, %edx
+	.p2align 4,,10
+	.p2align 3
+L(aloop):
+	movb	%cl, (%r8,%rdx)
+	movzbl	1(%rsi,%rdx), %ecx
+	addl	$1, %eax
+	testb	%cl, %cl
+	je	L(aloop_end)
+	addq	$1, %rdx
+	cmpl	$16, %eax
+	jne	L(aloop)
+L(aloop_end):
+	cltq
+	movb	$0, -80(%rsp,%rax)
+	movdqu	(%r8), %xmm4
+	jmp	L(back_from_crosspage)
+END(__strpbrk_sse42)
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
deleted file mode 100644
index 6b0c80aa43..0000000000
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/* strspn with SSE4.2 intrinsics
-   Copyright (C) 2009-2015 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <nmmintrin.h>
-#include <string.h>
-#include "varshift.h"
-
-/* We use 0x12:
-	_SIDD_SBYTE_OPS
-	| _SIDD_CMP_EQUAL_ANY
-	| _SIDD_NEGATIVE_POLARITY
-	| _SIDD_LEAST_SIGNIFICANT
-   on pcmpistri to compare xmm/mem128
-
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   X X X X X X X X X X X X X X X X
-
-   against xmm
-
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   A A A A A A A A A A A A A A A A
-
-   to find out if the first 16byte data element has any non-A byte and
-   the offset of the first byte.  There are 2 cases:
-
-   1. The first 16byte data element has the non-A byte, including
-      EOS, at the offset X.
-   2. The first 16byte data element is valid and doesn't have the non-A
-      byte.
-
-   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
-
-   case		ECX	CFlag	ZFlag	SFlag
-    1		 X	  1	 0/1	  0
-    2		16	  0	  0	  0
-
-   We exit from the loop for case 1.  */
-
-extern size_t __strspn_sse2 (const char *, const char *);
-
-
-size_t
-__attribute__ ((section (".text.sse4.2")))
-__strspn_sse42 (const char *s, const char *a)
-{
-  if (*a == 0)
-    return 0;
-
-  const char *aligned;
-  __m128i mask;
-  int offset = (int) ((size_t) a & 15);
-  if (offset != 0)
-    {
-      /* Load masks.  */
-      aligned = (const char *) ((size_t) a & -16L);
-      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
-
-      mask = __m128i_shift_right (mask0, offset);
-
-      /* Find where the NULL terminator is.  */
-      int length = _mm_cmpistri (mask, mask, 0x3a);
-      if (length == 16 - offset)
-	{
-	  /* There is no NULL terminator.  */
-	  __m128i mask1 = _mm_load_si128 ((__m128i *) (aligned + 16));
-	  int index = _mm_cmpistri (mask1, mask1, 0x3a);
-	  length += index;
-
-	  /* Don't use SSE4.2 if the length of A > 16.  */
-	  if (length > 16)
-	    return __strspn_sse2 (s, a);
-
-	  if (index != 0)
-	    {
-	      /* Combine mask0 and mask1.  We could play games with
-		 palignr, but frankly this data should be in L1 now
-		 so do the merge via an unaligned load.  */
-	      mask = _mm_loadu_si128 ((__m128i *) a);
-	    }
-	}
-    }
-  else
-    {
-      /* A is aligned.  */
-      mask = _mm_load_si128 ((__m128i *) a);
-
-      /* Find where the NULL terminator is.  */
-      int length = _mm_cmpistri (mask, mask, 0x3a);
-      if (length == 16)
-	{
-	  /* There is no NULL terminator.  Don't use SSE4.2 if the length
-	     of A > 16.  */
-	  if (a[16] != 0)
-	    return __strspn_sse2 (s, a);
-	}
-    }
-
-  offset = (int) ((size_t) s & 15);
-  if (offset != 0)
-    {
-      /* Check partial string.  */
-      aligned = (const char *) ((size_t) s & -16L);
-      __m128i value = _mm_load_si128 ((__m128i *) aligned);
-
-      value = __m128i_shift_right (value, offset);
-
-      int length = _mm_cmpistri (mask, value, 0x12);
-      /* No need to check CFlag since it is always 1.  */
-      if (length < 16 - offset)
-	return length;
-      /* Find where the NULL terminator is.  */
-      int index = _mm_cmpistri (value, value, 0x3a);
-      if (index < 16 - offset)
-	return length;
-      aligned += 16;
-    }
-  else
-    aligned = s;
-
-  while (1)
-    {
-      __m128i value = _mm_load_si128 ((__m128i *) aligned);
-      int index = _mm_cmpistri (mask, value, 0x12);
-      int cflag = _mm_cmpistrc (mask, value, 0x12);
-      if (cflag)
-	return (size_t) (aligned + index - s);
-      aligned += 16;
-    }
-}
diff --git a/sysdeps/x86_64/multiarch/strspn_sse42.S b/sysdeps/x86_64/multiarch/strspn_sse42.S
new file mode 100644
index 0000000000..d460167773
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strspn_sse42.S
@@ -0,0 +1,3 @@
+#define AS_STRSPN
+#define __strpbrk_sse42 __strspn_sse42
+#include "strpbrk_sse42.S"
diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c
deleted file mode 100644
index 0007ef79e5..0000000000
--- a/sysdeps/x86_64/multiarch/varshift.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Helper for variable shifts of SSE registers.
-   Copyright (C) 2010-2015 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include "varshift.h"
-
-const int8_t ___m128i_shift_right[31] attribute_hidden =
-  {
-    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
-  };
diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h
deleted file mode 100644
index 30ace3d914..0000000000
--- a/sysdeps/x86_64/multiarch/varshift.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Helper for variable shifts of SSE registers.
-   Copyright (C) 2010-2015 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <stdint.h>
-#include <tmmintrin.h>
-
-extern const int8_t ___m128i_shift_right[31] attribute_hidden;
-
-static __inline__ __m128i
-__m128i_shift_right (__m128i value, unsigned long int offset)
-{
-  return _mm_shuffle_epi8 (value,
-			   _mm_loadu_si128 ((__m128i *) (___m128i_shift_right
-							 + offset)));
-}