about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2010-11-08 03:41:34 -0500
committerUlrich Drepper <drepper@gmail.com>2010-11-08 03:41:34 -0500
commitff02d5280bf252e86d325ff4348feaf531ede831 (patch)
tree243484af328916c3945588aab649615521ceebc6 /sysdeps/x86_64/multiarch
parent344d0b545d0a0a0ab737ff333d807969721ce381 (diff)
downloadglibc-ff02d5280bf252e86d325ff4348feaf531ede831.tar.gz
glibc-ff02d5280bf252e86d325ff4348feaf531ede831.tar.xz
glibc-ff02d5280bf252e86d325ff4348feaf531ede831.zip
Use IFUNC on x86-64 memset
Diffstat (limited to 'sysdeps/x86_64/multiarch')
-rw-r--r--sysdeps/x86_64/multiarch/Makefile3
-rw-r--r--sysdeps/x86_64/multiarch/bzero.S56
-rw-r--r--sysdeps/x86_64/multiarch/cacheinfo.c2
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c5
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h12
-rw-r--r--sysdeps/x86_64/multiarch/memset-x86-64.S18
-rw-r--r--sysdeps/x86_64/multiarch/memset.S74
-rw-r--r--sysdeps/x86_64/multiarch/memset_chk.S44
8 files changed, 210 insertions, 4 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 5d2e34ebc8..19aa4be4cf 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -8,7 +8,8 @@ sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
 		   strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
 		   memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
 		   memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
-		   strncase_l-ssse3 strlen-sse4 strlen-no-bsf
+		   strncase_l-ssse3 strlen-sse4 strlen-no-bsf \
+		   memset-x86-64
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
 CFLAGS-varshift.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/bzero.S b/sysdeps/x86_64/multiarch/bzero.S
new file mode 100644
index 0000000000..9c9eebd5ef
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/bzero.S
@@ -0,0 +1,56 @@
+/* Multiple versions of bzero
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+	.text
+ENTRY(__bzero)
+	.type	__bzero, @gnu_indirect_function
+	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	jne	1f
+	call	__init_cpu_features
+1:	leaq	__bzero_x86_64(%rip), %rax
+	testl	$bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
+	jz	2f
+	leaq	__bzero_sse2(%rip), %rax
+2:	ret
+END(__bzero)
+
+	.type	__bzero_sse2, @function
+__bzero_sse2:
+	cfi_startproc
+	CALL_MCOUNT
+	mov	%rsi,%rdx	/* Adjust parameter.  */
+	xorl	%esi,%esi	/* Fill with 0s.  */
+	jmp	__memset_sse2
+	cfi_endproc
+	.size __bzero_sse2, .-__bzero_sse2
+
+	.type	__bzero_x86_64, @function
+__bzero_x86_64:
+	cfi_startproc
+	CALL_MCOUNT
+	mov	%rsi,%rdx	/* Adjust parameter.  */
+	xorl	%esi,%esi	/* Fill with 0s.  */
+	jmp	__memset_x86_64
+	cfi_endproc
+	.size __bzero_x86_64, .-__bzero_x86_64
+
+weak_alias (__bzero, bzero)
diff --git a/sysdeps/x86_64/multiarch/cacheinfo.c b/sysdeps/x86_64/multiarch/cacheinfo.c
new file mode 100644
index 0000000000..f87b8dce6b
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/cacheinfo.c
@@ -0,0 +1,2 @@
+#define DISABLE_PREFERRED_MEMORY_INSTRUCTION
+#include "../cacheinfo.c"
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 786466d5fd..15bc9046e3 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -59,6 +59,11 @@ __init_cpu_features (void)
 
       get_common_indeces (&family, &model);
 
+      /* Intel processors prefer SSE instruction for memory/string
+	 routines if they are avaiable.  */
+      __cpu_features.feature[index_Prefer_SSE_for_memop]
+	|= bit_Prefer_SSE_for_memop;
+
       unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
       unsigned int extended_family = (eax >> 20) & 0xff;
       unsigned int extended_model = (eax >> 12) & 0xf0;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 783b02015e..6e409b8f17 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -16,9 +16,10 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-#define bit_Fast_Rep_String	(1 << 0)
-#define bit_Fast_Copy_Backward	(1 << 1)
-#define bit_Slow_BSF		(1 << 2)
+#define bit_Fast_Rep_String		(1 << 0)
+#define bit_Fast_Copy_Backward		(1 << 1)
+#define bit_Slow_BSF			(1 << 2)
+#define bit_Prefer_SSE_for_memop	(1 << 3)
 
 #ifdef	__ASSEMBLER__
 
@@ -37,6 +38,7 @@
 # define index_Fast_Rep_String		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Slow_BSF			FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Prefer_SSE_for_memop	FEATURE_INDEX_1*FEATURE_SIZE
 
 #else	/* __ASSEMBLER__ */
 
@@ -109,6 +111,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_Fast_Rep_String		FEATURE_INDEX_1
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1
 # define index_Slow_BSF			FEATURE_INDEX_1
+# define index_Prefer_SSE_for_memop	FEATURE_INDEX_1
 
 #define HAS_ARCH_FEATURE(idx, bit) \
   ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
@@ -122,4 +125,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 #define HAS_SLOW_BSF \
   HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
 
+#define HAS_PREFER_SSE_FOR_MEMOP \
+  HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
+
 #endif	/* __ASSEMBLER__ */
diff --git a/sysdeps/x86_64/multiarch/memset-x86-64.S b/sysdeps/x86_64/multiarch/memset-x86-64.S
new file mode 100644
index 0000000000..5e8cfb3e9b
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset-x86-64.S
@@ -0,0 +1,18 @@
+#include <sysdep.h>
+
+#ifndef NOT_IN_libc
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
+	.type __memset_chk_x86_64, @function; \
+	.globl __memset_chk_x86_64; \
+	.p2align 4; \
+	__memset_chk_x86_64: cfi_startproc; \
+	CALL_MCOUNT
+# undef END_CHK
+# define END_CHK(name) \
+	cfi_endproc; .size __memset_chk_x86_64, .-__memset_chk_x86_64
+
+# define libc_hidden_builtin_def(name)
+# define memset __memset_x86_64
+# include "../memset.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
new file mode 100644
index 0000000000..a8d0e9ea22
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset.S
@@ -0,0 +1,74 @@
+/* Multiple versions of memset
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib.  */
+#ifndef NOT_IN_libc
+ENTRY(memset)
+	.type	memset, @gnu_indirect_function
+	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	jne	1f
+	call	__init_cpu_features
+1:	leaq	__memset_x86_64(%rip), %rax
+	testl	$bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
+	jz	2f
+	leaq	__memset_sse2(%rip), %rax
+2:	ret
+END(memset)
+
+# define USE_SSE2 1
+
+# undef ENTRY
+# define ENTRY(name) \
+	.type __memset_sse2, @function; \
+	.globl __memset_sse2; \
+	.p2align 4; \
+	__memset_sse2: cfi_startproc; \
+	CALL_MCOUNT
+# undef END
+# define END(name) \
+	cfi_endproc; .size __memset_sse2, .-__memset_sse2
+
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
+	.type __memset_chk_sse2, @function; \
+	.globl __memset_chk_sse2; \
+	.p2align 4; \
+	__memset_chk_sse2: cfi_startproc; \
+	CALL_MCOUNT
+# undef END_CHK
+# define END_CHK(name) \
+	cfi_endproc; .size __memset_chk_sse2, .-__memset_chk_sse2
+
+# ifdef SHARED
+#  undef libc_hidden_builtin_def
+/* It doesn't make sense to send libc-internal memset calls through a PLT.
+   The speedup we get from using GPR instruction is likely eaten away
+   by the indirect call in the PLT.  */
+#  define libc_hidden_builtin_def(name) \
+	.globl __GI_memset; __GI_memset = __memset_sse2
+# endif
+
+# undef strong_alias
+# define strong_alias(original, alias)
+#endif
+
+#include "../memset.S"
diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S
new file mode 100644
index 0000000000..16afe60c66
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset_chk.S
@@ -0,0 +1,44 @@
+/* Multiple versions of __memset_chk
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in lib.  */
+#ifndef NOT_IN_libc
+# ifdef SHARED
+ENTRY(__memset_chk)
+	.type	__memset_chk, @gnu_indirect_function
+	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
+	jne	1f
+	call	__init_cpu_features
+1:	leaq	__memset_chk_x86_64(%rip), %rax
+	testl	$bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
+	jz	2f
+	leaq	__memset_chk_sse2(%rip), %rax
+2:	ret
+END(__memset_chk)
+
+strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
+	.section .gnu.warning.__memset_zero_constant_len_parameter
+	.string "memset used with constant zero length parameter; this could be due to transposed parameters"
+# else
+#  include "../memset_chk.S"
+# endif
+#endif