diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2011-09-21 15:21:28 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2016-03-28 06:11:19 -0700 |
commit | c835e4cad72cfda9c2702047e6ac31a8aba8e61d (patch) | |
tree | 32a42fdc011570ed3d0bec28b3e1d3c4836c2a5c | |
parent | e41b395523040fcb58c7d378475720c2836d280c (diff) | |
download | glibc-hjl/erms/i386.tar.gz glibc-hjl/erms/i386.tar.xz glibc-hjl/erms/i386.zip |
Add 32-bit Enhanced REP MOVSB/STOSB (ERMS) memcpy/memset hjl/erms/i386
Add and test 32-bit memcpy/memset with Enhanced REP MOVSB/STOSB (ERMS). * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add bcopy-erms, memcpy-erms, memmove-erms, mempcpy-erms, bzero-erms and memset-erms. * sysdeps/i386/i686/multiarch/bcopy-erms.S: New file. * sysdeps/i386/i686/multiarch/bzero-erms.S: Likewise. * sysdeps/i386/i686/multiarch/memcpy-erms.S: Likewise. * sysdeps/i386/i686/multiarch/memmove-erms.S: Likewise. * sysdeps/i386/i686/multiarch/mempcpy-erms.S: Likewise. * sysdeps/i386/i686/multiarch/memset-erms.S: Likewise. * sysdeps/i386/i686/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Add __bcopy_erms, __bzero_erms, __memmove_chk_erms, __memmove_erms, __memset_chk_erms, __memset_erms, __memcpy_chk_erms, __memcpy_erms, __mempcpy_chk_erms and __mempcpy_erms.
-rw-r--r-- | sysdeps/i386/i686/multiarch/Makefile | 4 | ||||
-rw-r--r-- | sysdeps/i386/i686/multiarch/bcopy-erms.S | 4 | ||||
-rw-r--r-- | sysdeps/i386/i686/multiarch/bzero-erms.S | 3 | ||||
-rw-r--r-- | sysdeps/i386/i686/multiarch/ifunc-impl-list.c | 14 | ||||
-rw-r--r-- | sysdeps/i386/i686/multiarch/memcpy-erms.S | 102 | ||||
-rw-r--r-- | sysdeps/i386/i686/multiarch/memmove-erms.S | 4 | ||||
-rw-r--r-- | sysdeps/i386/i686/multiarch/mempcpy-erms.S | 4 | ||||
-rw-r--r-- | sysdeps/i386/i686/multiarch/memset-erms.S | 69 |
8 files changed, 203 insertions, 1 deletions
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile index 700010d969..6bcef4c21d 100644 --- a/sysdeps/i386/i686/multiarch/Makefile +++ b/sysdeps/i386/i686/multiarch/Makefile @@ -25,7 +25,9 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ strcasecmp_l-sse4 strncase_l-sse4 \ bcopy-sse2-unaligned memcpy-sse2-unaligned \ mempcpy-sse2-unaligned memmove-sse2-unaligned \ - strcspn-c strpbrk-c strspn-c + strcspn-c strpbrk-c strspn-c \ + bcopy-erms memcpy-erms memmove-erms mempcpy-erms \ + bzero-erms memset-erms CFLAGS-varshift.c += -msse4 CFLAGS-strcspn-c.c += -msse4 CFLAGS-strpbrk-c.c += -msse4 diff --git a/sysdeps/i386/i686/multiarch/bcopy-erms.S b/sysdeps/i386/i686/multiarch/bcopy-erms.S new file mode 100644 index 0000000000..da9e160b53 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/bcopy-erms.S @@ -0,0 +1,4 @@ +#define USE_AS_MEMMOVE +#define USE_AS_BCOPY +#define MEMCPY __bcopy_erms +#include "memcpy-erms.S" diff --git a/sysdeps/i386/i686/multiarch/bzero-erms.S b/sysdeps/i386/i686/multiarch/bzero-erms.S new file mode 100644 index 0000000000..2c3bed666e --- /dev/null +++ b/sysdeps/i386/i686/multiarch/bzero-erms.S @@ -0,0 +1,3 @@ +#define USE_AS_BZERO +#define __memset_erms __bzero_erms +#include "memset-erms.S" diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c index ef30a95432..f3cbca0fad 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c +++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c @@ -44,6 +44,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __bcopy_ssse3) IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSE2), __bcopy_sse2_unaligned) + IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_erms) IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32)) /* Support sysdeps/i386/i686/multiarch/bzero.S. */ @@ -52,6 +53,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __bzero_sse2_rep) IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2), __bzero_sse2) + IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_erms) IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ia32)) /* Support sysdeps/i386/i686/multiarch/memchr.S. */ @@ -82,6 +84,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_CPU_FEATURE (SSE2), __memmove_chk_sse2_unaligned) IFUNC_IMPL_ADD (array, i, __memmove_chk, 1, + __memmove_chk_erms) + IFUNC_IMPL_ADD (array, i, __memmove_chk, 1, __memmove_chk_ia32)) /* Support sysdeps/i386/i686/multiarch/memmove.S. */ @@ -92,6 +96,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memmove_ssse3) IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSE2), __memmove_sse2_unaligned) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_erms) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32)) /* Support sysdeps/i386/i686/multiarch/memrchr.S. */ @@ -111,6 +116,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_CPU_FEATURE (SSE2), __memset_chk_sse2) IFUNC_IMPL_ADD (array, i, __memset_chk, 1, + __memset_chk_erms) + IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_ia32)) /* Support sysdeps/i386/i686/multiarch/memset.S. */ @@ -119,6 +126,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memset_sse2_rep) IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2), __memset_sse2) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_erms) IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ia32)) /* Support sysdeps/i386/i686/multiarch/rawmemchr.S. */ @@ -319,6 +327,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_CPU_FEATURE (SSE2), __memcpy_chk_sse2_unaligned) IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, + __memcpy_chk_erms) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, __memcpy_chk_ia32)) /* Support sysdeps/i386/i686/multiarch/memcpy.S. */ @@ -329,6 +339,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memcpy_ssse3) IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSE2), __memcpy_sse2_unaligned) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_erms) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32)) /* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S. */ @@ -343,6 +354,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_CPU_FEATURE (SSE2), __mempcpy_chk_sse2_unaligned) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1, + __mempcpy_chk_erms) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1, __mempcpy_chk_ia32)) /* Support sysdeps/i386/i686/multiarch/mempcpy.S. */ @@ -353,6 +366,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __mempcpy_ssse3) IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSE2), __mempcpy_sse2_unaligned) + IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_erms) IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32)) /* Support sysdeps/i386/i686/multiarch/strlen.S. */ diff --git a/sysdeps/i386/i686/multiarch/memcpy-erms.S b/sysdeps/i386/i686/multiarch/memcpy-erms.S new file mode 100644 index 0000000000..f134e79160 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/memcpy-erms.S @@ -0,0 +1,102 @@ +/* memcpy with Enhanced REP MOVSB/STOSB + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> + +#if !defined NOT_IN_libc \ + && (defined SHARED \ + || defined USE_AS_MEMMOVE \ + || !defined USE_MULTIARCH) + +#include "asm-syntax.h" + +#ifndef MEMCPY +# define MEMCPY __memcpy_erms +# define MEMCPY_CHK __memcpy_chk_erms +#endif + +#ifdef USE_AS_BCOPY +# define STR2 12 +# define STR1 STR2+4 +# define N STR1+4 +#else +# define STR1 12 +# define STR2 STR1+4 +# define N STR2+4 +#endif + +#define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +#define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +#define PUSH(REG) pushl REG; CFI_PUSH (REG) +#define POP(REG) popl REG; CFI_POP (REG) + + .section .text.erms,"ax",@progbits +#if !defined USE_AS_BCOPY +ENTRY (MEMCPY_CHK) + movl 12(%esp), %eax + cmpl %eax, 16(%esp) + jb HIDDEN_JUMPTARGET (__chk_fail) +END (MEMCPY_CHK) +#endif +ENTRY (MEMCPY) + PUSH (%esi) + PUSH (%edi) + movl N(%esp), %ecx + movl STR1(%esp), %edi + movl STR2(%esp), %esi + mov %edi, %eax +#ifdef USE_AS_MEMPCPY + add %ecx, %eax +#endif + +#ifdef USE_AS_MEMMOVE + cmp %esi, %edi + jbe L(copy_forward) + lea (%esi,%ecx), %edx + cmp %edx, %edi + jb L(copy_backward) +L(copy_forward): +#endif + + rep movsb + POP (%edi) + POP (%esi) + ret + +#ifdef USE_AS_MEMMOVE +L(copy_backward): + lea -1(%edi,%ecx), %edi + lea -1(%esi,%ecx), %esi + std + rep movsb + cld + POP (%edi) + POP (%esi) + ret +#endif + +END (MEMCPY) + +#endif diff --git a/sysdeps/i386/i686/multiarch/memmove-erms.S b/sysdeps/i386/i686/multiarch/memmove-erms.S new file mode 100644 index 0000000000..357289a548 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/memmove-erms.S @@ -0,0 +1,4 @@ +#define USE_AS_MEMMOVE +#define MEMCPY __memmove_erms +#define MEMCPY_CHK __memmove_chk_erms +#include "memcpy-erms.S" diff --git a/sysdeps/i386/i686/multiarch/mempcpy-erms.S b/sysdeps/i386/i686/multiarch/mempcpy-erms.S new file mode 100644 index 0000000000..01d3bf8a99 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/mempcpy-erms.S @@ -0,0 +1,4 @@ +#define USE_AS_MEMPCPY +#define MEMCPY __mempcpy_erms +#define MEMCPY_CHK __mempcpy_chk_erms +#include "memcpy-erms.S" diff --git a/sysdeps/i386/i686/multiarch/memset-erms.S b/sysdeps/i386/i686/multiarch/memset-erms.S new file mode 100644 index 0000000000..807a6e4f7c --- /dev/null +++ b/sysdeps/i386/i686/multiarch/memset-erms.S @@ -0,0 +1,69 @@ +/* memset with Enhanced REP MOVSB/STOSB + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef NOT_IN_libc + +#include <sysdep.h> + +#define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +#define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +#define PUSH(REG) pushl REG; CFI_PUSH (REG) +#define POP(REG) popl REG; CFI_POP (REG) + +#define STR1 8 +#ifdef USE_AS_BZERO +#define N STR1+4 +#else +#define STR2 STR1+4 +#define N STR2+4 +#endif + + .section .text.erms,"ax",@progbits +#if defined SHARED && !defined NOT_IN_libc && !defined USE_AS_BZERO +ENTRY (__memset_chk_erms) + movl 12(%esp), %eax + cmpl %eax, 16(%esp) + jb HIDDEN_JUMPTARGET (__chk_fail) +END (__memset_chk_erms) +#endif +ENTRY (__memset_erms) + PUSH (%edi) + movl N(%esp), %ecx + movl STR1(%esp), %edi +#ifdef USE_AS_BZERO + xor %eax, %eax +#else + movzbl STR2(%esp), %eax + mov %edi, %edx +#endif + rep stosb +#ifndef USE_AS_BZERO + mov %edx, %eax +#endif + POP (%edi) + ret +END (__memset_erms) + +#endif |