diff options
author | H.J. Lu <hongjiu.lu@intel.com> | 2010-01-12 11:22:03 -0800 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2010-01-12 11:22:03 -0800 |
commit | 3af48cbdfaeb8bc389de1caeb33bc29811da80e8 (patch) | |
tree | 94a209777ab8c7e24cff9e50660a4075e6338594 /sysdeps/i386/i686/multiarch/memset_chk.S | |
parent | 4bfc6ab9ae3b259caa8b12229f0c67b4b514e9cd (diff) | |
download | glibc-3af48cbdfaeb8bc389de1caeb33bc29811da80e8.tar.gz glibc-3af48cbdfaeb8bc389de1caeb33bc29811da80e8.tar.xz glibc-3af48cbdfaeb8bc389de1caeb33bc29811da80e8.zip |
Optimize 32bit memset/memcpy with SSE2/SSSE3.
Diffstat (limited to 'sysdeps/i386/i686/multiarch/memset_chk.S')
-rw-r--r-- | sysdeps/i386/i686/multiarch/memset_chk.S | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/sysdeps/i386/i686/multiarch/memset_chk.S b/sysdeps/i386/i686/multiarch/memset_chk.S new file mode 100644 index 0000000000..e3665be7b2 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/memset_chk.S @@ -0,0 +1,116 @@ +/* Multiple versions of __memset_chk + Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <init-arch.h> + +/* Define multiple versions only for the definition in lib. */ +#ifndef NOT_IN_libc +# ifdef SHARED + .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits + .globl __i686.get_pc_thunk.bx + .hidden __i686.get_pc_thunk.bx + .p2align 4 + .type __i686.get_pc_thunk.bx,@function +__i686.get_pc_thunk.bx: + movl (%esp), %ebx + ret + + .text +ENTRY(__memset_chk) + .type __memset_chk, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __memset_chk_ia32@GOTOFF(%ebx), %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memset_chk_sse2@GOTOFF(%ebx), %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memset_chk_sse2_rep@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4) + cfi_restore (ebx) + ret +END(__memset_chk) + +strong_alias (__memset_chk, __memset_zero_constant_len_parameter) + .section .gnu.warning.__memset_zero_constant_len_parameter + .string "memset used with constant zero length parameter; this could be due to transposed parameters" +# else + .text +ENTRY(__memset_chk) + .type __memset_chk, @gnu_indirect_function + cmpl $0, KIND_OFFSET+__cpu_features + jne 1f + call __init_cpu_features +1: leal __memset_chk_ia32, %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features + jz 2f + leal __memset_chk_sse2, %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features + jz 2f + leal __memset_chk_sse2_rep, %eax +2: ret +END(__memset_chk) + + .type __memset_chk_sse2, @function + .p2align 4; +__memset_chk_sse2: + cfi_startproc + CALL_MCOUNT + movl 12(%esp), %eax + cmpl %eax, 16(%esp) + jb __chk_fail + jmp __memset_sse2 + cfi_endproc + .size __memset_chk_sse2, .-__memset_chk_sse2 + + .type __memset_chk_sse2_rep, @function + .p2align 4; +__memset_chk_sse2_rep: + cfi_startproc + CALL_MCOUNT + movl 12(%esp), %eax + cmpl %eax, 16(%esp) + jb __chk_fail + jmp __memset_sse2_rep + cfi_endproc + .size __memset_chk_sse2_rep, .-__memset_chk_sse2_rep + + .type __memset_chk_ia32, @function + .p2align 4; +__memset_chk_ia32: + cfi_startproc + CALL_MCOUNT + movl 12(%esp), %eax + cmpl %eax, 16(%esp) + jb __chk_fail + jmp __memset_ia32 + cfi_endproc + .size __memset_chk_ia32, .-__memset_chk_ia32 +# endif +#endif |