about summary refs log tree commit diff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2011-09-21 15:21:28 -0700
committerH.J. Lu <hjl.tools@gmail.com>2016-03-28 06:11:19 -0700
commitc835e4cad72cfda9c2702047e6ac31a8aba8e61d (patch)
tree32a42fdc011570ed3d0bec28b3e1d3c4836c2a5c
parente41b395523040fcb58c7d378475720c2836d280c (diff)
downloadglibc-hjl/erms/i386.tar.gz
glibc-hjl/erms/i386.tar.xz
glibc-hjl/erms/i386.zip
Add 32-bit Enhanced REP MOVSB/STOSB (ERMS) memcpy/memset hjl/erms/i386
Add and test 32-bit memcpy/memset with Enhanced REP MOVSB/STOSB (ERMS).

	* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
	bcopy-erms, memcpy-erms, memmove-erms, mempcpy-erms, bzero-erms
	and memset-erms.
	* sysdeps/i386/i686/multiarch/bcopy-erms.S: New file.
	* sysdeps/i386/i686/multiarch/bzero-erms.S: Likewise.
	* sysdeps/i386/i686/multiarch/memcpy-erms.S: Likewise.
	* sysdeps/i386/i686/multiarch/memmove-erms.S: Likewise.
	* sysdeps/i386/i686/multiarch/mempcpy-erms.S: Likewise.
	* sysdeps/i386/i686/multiarch/memset-erms.S: Likewise.
	* sysdeps/i386/i686/multiarch/ifunc-impl-list.c
	(__libc_ifunc_impl_list): Add __bcopy_erms, __bzero_erms,
	__memmove_chk_erms, __memmove_erms, __memset_chk_erms,
	__memset_erms, __memcpy_chk_erms, __memcpy_erms,
	__mempcpy_chk_erms and __mempcpy_erms.
-rw-r--r--sysdeps/i386/i686/multiarch/Makefile4
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy-erms.S4
-rw-r--r--sysdeps/i386/i686/multiarch/bzero-erms.S3
-rw-r--r--sysdeps/i386/i686/multiarch/ifunc-impl-list.c14
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy-erms.S102
-rw-r--r--sysdeps/i386/i686/multiarch/memmove-erms.S4
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy-erms.S4
-rw-r--r--sysdeps/i386/i686/multiarch/memset-erms.S69
8 files changed, 203 insertions, 1 deletions
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 700010d969..6bcef4c21d 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -25,7 +25,9 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
 		   strcasecmp_l-sse4 strncase_l-sse4 \
 		   bcopy-sse2-unaligned memcpy-sse2-unaligned \
 		   mempcpy-sse2-unaligned memmove-sse2-unaligned \
-		   strcspn-c strpbrk-c strspn-c
+		   strcspn-c strpbrk-c strspn-c \
+		   bcopy-erms memcpy-erms memmove-erms mempcpy-erms \
+		   bzero-erms memset-erms
 CFLAGS-varshift.c += -msse4
 CFLAGS-strcspn-c.c += -msse4
 CFLAGS-strpbrk-c.c += -msse4
diff --git a/sysdeps/i386/i686/multiarch/bcopy-erms.S b/sysdeps/i386/i686/multiarch/bcopy-erms.S
new file mode 100644
index 0000000000..da9e160b53
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bcopy-erms.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define USE_AS_BCOPY
+#define MEMCPY		__bcopy_erms
+#include "memcpy-erms.S"
diff --git a/sysdeps/i386/i686/multiarch/bzero-erms.S b/sysdeps/i386/i686/multiarch/bzero-erms.S
new file mode 100644
index 0000000000..2c3bed666e
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bzero-erms.S
@@ -0,0 +1,3 @@
+#define USE_AS_BZERO
+#define __memset_erms __bzero_erms
+#include "memset-erms.S"
diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
index ef30a95432..f3cbca0fad 100644
--- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
+++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
@@ -44,6 +44,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __bcopy_ssse3)
 	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSE2),
 			      __bcopy_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_erms)
 	      IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/bzero.S.  */
@@ -52,6 +53,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __bzero_sse2_rep)
 	      IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2),
 			      __bzero_sse2)
+	      IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_erms)
 	      IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memchr.S.  */
@@ -82,6 +84,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      HAS_CPU_FEATURE (SSE2),
 			      __memmove_chk_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
+			      __memmove_chk_erms)
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
 			      __memmove_chk_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memmove.S.  */
@@ -92,6 +96,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memmove_ssse3)
 	      IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSE2),
 			      __memmove_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_erms)
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memrchr.S.  */
@@ -111,6 +116,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      HAS_CPU_FEATURE (SSE2),
 			      __memset_chk_sse2)
 	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
+			      __memset_chk_erms)
+	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
 			      __memset_chk_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memset.S.  */
@@ -119,6 +126,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memset_sse2_rep)
 	      IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2),
 			      __memset_sse2)
+	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_erms)
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/rawmemchr.S.  */
@@ -319,6 +327,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      HAS_CPU_FEATURE (SSE2),
 			      __memcpy_chk_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+			      __memcpy_chk_erms)
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
 			      __memcpy_chk_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memcpy.S.  */
@@ -329,6 +339,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memcpy_ssse3)
 	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSE2),
 			      __memcpy_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_erms)
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S.  */
@@ -343,6 +354,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      HAS_CPU_FEATURE (SSE2),
 			      __mempcpy_chk_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+			      __mempcpy_chk_erms)
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
 			      __mempcpy_chk_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/mempcpy.S.  */
@@ -353,6 +366,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __mempcpy_ssse3)
 	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSE2),
 			      __mempcpy_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_erms)
 	      IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strlen.S.  */
diff --git a/sysdeps/i386/i686/multiarch/memcpy-erms.S b/sysdeps/i386/i686/multiarch/memcpy-erms.S
new file mode 100644
index 0000000000..f134e79160
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memcpy-erms.S
@@ -0,0 +1,102 @@
+/* memcpy with Enhanced REP MOVSB/STOSB
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+#if !defined NOT_IN_libc \
+    && (defined SHARED \
+	|| defined USE_AS_MEMMOVE \
+	|| !defined USE_MULTIARCH)
+
+#include "asm-syntax.h"
+
+#ifndef MEMCPY
+# define MEMCPY		__memcpy_erms
+# define MEMCPY_CHK	__memcpy_chk_erms
+#endif
+
+#ifdef USE_AS_BCOPY
+# define STR2		12
+# define STR1		STR2+4
+# define N     		STR1+4
+#else
+# define STR1		12
+# define STR2		STR1+4
+# define N     		STR2+4
+#endif
+
+#define CFI_PUSH(REG)						\
+  cfi_adjust_cfa_offset (4);					\
+  cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)						\
+  cfi_adjust_cfa_offset (-4);					\
+  cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+	.section .text.erms,"ax",@progbits
+#if !defined USE_AS_BCOPY
+ENTRY (MEMCPY_CHK)
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMCPY_CHK)
+#endif
+ENTRY (MEMCPY)
+	PUSH	(%esi)
+	PUSH	(%edi)
+	movl	N(%esp), %ecx
+	movl	STR1(%esp), %edi
+	movl	STR2(%esp), %esi
+	mov	%edi, %eax
+#ifdef USE_AS_MEMPCPY
+	add	%ecx, %eax
+#endif
+
+#ifdef USE_AS_MEMMOVE
+	cmp	%esi, %edi
+	jbe	L(copy_forward)
+	lea	(%esi,%ecx), %edx
+	cmp	%edx, %edi
+	jb	L(copy_backward)
+L(copy_forward):
+#endif
+
+	rep	movsb
+	POP	(%edi)
+	POP	(%esi)
+	ret
+
+#ifdef USE_AS_MEMMOVE
+L(copy_backward):
+	lea	-1(%edi,%ecx), %edi
+	lea	-1(%esi,%ecx), %esi
+	std
+	rep	movsb
+	cld
+	POP	(%edi)
+	POP	(%esi)
+	ret
+#endif
+
+END (MEMCPY)
+
+#endif
diff --git a/sysdeps/i386/i686/multiarch/memmove-erms.S b/sysdeps/i386/i686/multiarch/memmove-erms.S
new file mode 100644
index 0000000000..357289a548
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memmove-erms.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY		__memmove_erms
+#define MEMCPY_CHK	__memmove_chk_erms
+#include "memcpy-erms.S"
diff --git a/sysdeps/i386/i686/multiarch/mempcpy-erms.S b/sysdeps/i386/i686/multiarch/mempcpy-erms.S
new file mode 100644
index 0000000000..01d3bf8a99
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mempcpy-erms.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY		__mempcpy_erms
+#define MEMCPY_CHK	__mempcpy_chk_erms
+#include "memcpy-erms.S"
diff --git a/sysdeps/i386/i686/multiarch/memset-erms.S b/sysdeps/i386/i686/multiarch/memset-erms.S
new file mode 100644
index 0000000000..807a6e4f7c
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memset-erms.S
@@ -0,0 +1,69 @@
+/* memset with Enhanced REP MOVSB/STOSB
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef NOT_IN_libc
+
+#include <sysdep.h>
+
+#define CFI_PUSH(REG)						\
+  cfi_adjust_cfa_offset (4);					\
+  cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)						\
+  cfi_adjust_cfa_offset (-4);					\
+  cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+#define STR1  8
+#ifdef USE_AS_BZERO
+#define N     STR1+4
+#else
+#define STR2  STR1+4
+#define N     STR2+4
+#endif
+
+	.section .text.erms,"ax",@progbits
+#if defined SHARED && !defined NOT_IN_libc && !defined USE_AS_BZERO
+ENTRY (__memset_chk_erms)
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk_erms)
+#endif
+ENTRY (__memset_erms)
+	PUSH    (%edi)
+	movl	N(%esp), %ecx
+	movl	STR1(%esp), %edi
+#ifdef USE_AS_BZERO
+	xor	%eax, %eax
+#else
+	movzbl	STR2(%esp), %eax
+	mov	%edi, %edx
+#endif
+	rep	stosb
+#ifndef USE_AS_BZERO
+	mov	%edx, %eax
+#endif
+	POP     (%edi)
+	ret
+END (__memset_erms)
+
+#endif