about summary refs log tree commit diff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2015-07-31 16:52:19 -0700
committerH.J. Lu <hjl.tools@gmail.com>2015-08-05 07:32:49 -0700
commit51fb7715d7d0ee666c3a24b82976f2c7379aa9de (patch)
tree3b226e8a67e883241b477064b98a5c4eb9babbc7
parent23c0d903e9c3e770ef16e57230fe432e5d86bb5c (diff)
downloadglibc-51fb7715d7d0ee666c3a24b82976f2c7379aa9de.tar.gz
glibc-51fb7715d7d0ee666c3a24b82976f2c7379aa9de.tar.xz
glibc-51fb7715d7d0ee666c3a24b82976f2c7379aa9de.zip
Update i686 multiarch functions for <cpu-features.h>
This patch updates i686 multiarch functions to use the newly defined
HAS_CPU_FEATURE, HAS_ARCH_FEATURE, LOAD_GOT_AND_RTLD_GLOBAL_RO and
LOAD_FUNC_GOT_EAX from <cpu-features.h>.

	* sysdeps/i386/i686/fpu/multiarch/e_expf.c: Replace HAS_XXX
	with HAS_CPU_FEATURE/HAS_ARCH_FEATURE (XXX).
	* sysdeps/i386/i686/fpu/multiarch/s_cosf.c: Likewise.
	* sysdeps/i386/i686/fpu/multiarch/s_cosf.c: Likewise.
	* sysdeps/i386/i686/fpu/multiarch/s_sincosf.c: Likewise.
	* sysdeps/i386/i686/fpu/multiarch/s_sinf.c: Likewise.
	* sysdeps/i386/i686/multiarch/ifunc-impl-list.c: Likewise.
	* sysdeps/i386/i686/multiarch/s_fma.c: Likewise.
	* sysdeps/i386/i686/multiarch/s_fmaf.c: Likewise.
	* sysdeps/i386/i686/multiarch/bcopy.S: Remove __init_cpu_features
	call.  Merge SHARED and !SHARED.  Add LOAD_GOT_AND_RTLD_GLOBAL_RO.
	Use LOAD_FUNC_GOT_EAX to load function address.  Replace HAS_XXX
	with HAS_CPU_FEATURE/HAS_ARCH_FEATURE (XXX).
	* sysdeps/i386/i686/multiarch/bzero.S: Likewise.
	* sysdeps/i386/i686/multiarch/memchr.S: Likewise.
	* sysdeps/i386/i686/multiarch/memcmp.S: Likewise.
	* sysdeps/i386/i686/multiarch/memcpy.S: Likewise.
	* sysdeps/i386/i686/multiarch/memcpy_chk.S: Likewise.
	* sysdeps/i386/i686/multiarch/memmove.S: Likewise.
	* sysdeps/i386/i686/multiarch/memmove_chk.S: Likewise.
	* sysdeps/i386/i686/multiarch/mempcpy.S: Likewise.
	* sysdeps/i386/i686/multiarch/mempcpy_chk.S: Likewise.
	* sysdeps/i386/i686/multiarch/memrchr.S: Likewise.
	* sysdeps/i386/i686/multiarch/memset.S: Likewise.
	* sysdeps/i386/i686/multiarch/memset_chk.S: Likewise.
	* sysdeps/i386/i686/multiarch/rawmemchr.S: Likewise.
	* sysdeps/i386/i686/multiarch/strcasecmp.S: Likewise.
	* sysdeps/i386/i686/multiarch/strcat.S: Likewise.
	* sysdeps/i386/i686/multiarch/strchr.S: Likewise.
	* sysdeps/i386/i686/multiarch/strcmp.S: Likewise.
	* sysdeps/i386/i686/multiarch/strcpy.S: Likewise.
	* sysdeps/i386/i686/multiarch/strcspn.S: Likewise.
	* sysdeps/i386/i686/multiarch/strlen.S: Likewise.
	* sysdeps/i386/i686/multiarch/strncase.S: Likewise.
	* sysdeps/i386/i686/multiarch/strnlen.S: Likewise.
	* sysdeps/i386/i686/multiarch/strrchr.S: Likewise.
	* sysdeps/i386/i686/multiarch/strspn.S: Likewise.
	* sysdeps/i386/i686/multiarch/wcschr.S: Likewise.
	* sysdeps/i386/i686/multiarch/wcscmp.S: Likewise.
	* sysdeps/i386/i686/multiarch/wcscpy.S: Likewise.
	* sysdeps/i386/i686/multiarch/wcslen.S: Likewise.
	* sysdeps/i386/i686/multiarch/wcsrchr.S: Likewise.
	* sysdeps/i386/i686/multiarch/wmemcmp.S: Likewise.
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/e_expf.c8
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_cosf.c2
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_sincosf.c3
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_sinf.c2
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy.S45
-rw-r--r--sysdeps/i386/i686/multiarch/bzero.S39
-rw-r--r--sysdeps/i386/i686/multiarch/ifunc-impl-list.c199
-rw-r--r--sysdeps/i386/i686/multiarch/memchr.S36
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp.S39
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy.S29
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy_chk.S29
-rw-r--r--sysdeps/i386/i686/multiarch/memmove.S60
-rw-r--r--sysdeps/i386/i686/multiarch/memmove_chk.S50
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy.S29
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy_chk.S29
-rw-r--r--sysdeps/i386/i686/multiarch/memrchr.S36
-rw-r--r--sysdeps/i386/i686/multiarch/memset.S39
-rw-r--r--sysdeps/i386/i686/multiarch/memset_chk.S40
-rw-r--r--sysdeps/i386/i686/multiarch/rawmemchr.S36
-rw-r--r--sysdeps/i386/i686/multiarch/s_fma.c3
-rw-r--r--sysdeps/i386/i686/multiarch/s_fmaf.c3
-rw-r--r--sysdeps/i386/i686/multiarch/strcasecmp.S43
-rw-r--r--sysdeps/i386/i686/multiarch/strcat.S44
-rw-r--r--sysdeps/i386/i686/multiarch/strchr.S23
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp.S43
-rw-r--r--sysdeps/i386/i686/multiarch/strcpy.S44
-rw-r--r--sysdeps/i386/i686/multiarch/strcspn.S32
-rw-r--r--sysdeps/i386/i686/multiarch/strlen.S23
-rw-r--r--sysdeps/i386/i686/multiarch/strncase.S43
-rw-r--r--sysdeps/i386/i686/multiarch/strnlen.S19
-rw-r--r--sysdeps/i386/i686/multiarch/strrchr.S23
-rw-r--r--sysdeps/i386/i686/multiarch/strspn.S32
-rw-r--r--sysdeps/i386/i686/multiarch/wcschr.S19
-rw-r--r--sysdeps/i386/i686/multiarch/wcscmp.S19
-rw-r--r--sysdeps/i386/i686/multiarch/wcscpy.S19
-rw-r--r--sysdeps/i386/i686/multiarch/wcslen.S19
-rw-r--r--sysdeps/i386/i686/multiarch/wcsrchr.S19
-rw-r--r--sysdeps/i386/i686/multiarch/wmemcmp.S23
38 files changed, 356 insertions, 887 deletions
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_expf.c b/sysdeps/i386/i686/fpu/multiarch/e_expf.c
index 5102dae3bb..697888343b 100644
--- a/sysdeps/i386/i686/fpu/multiarch/e_expf.c
+++ b/sysdeps/i386/i686/fpu/multiarch/e_expf.c
@@ -23,11 +23,15 @@ extern double __ieee754_expf_ia32 (double);
 
 double __ieee754_expf (double);
 libm_ifunc (__ieee754_expf,
-	    HAS_SSE2 ? __ieee754_expf_sse2 : __ieee754_expf_ia32);
+	    HAS_CPU_FEATURE (SSE2)
+	    ? __ieee754_expf_sse2
+	    : __ieee754_expf_ia32);
 
 extern double __expf_finite_sse2 (double);
 extern double __expf_finite_ia32 (double);
 
 double __expf_finite (double);
 libm_ifunc (__expf_finite,
-	    HAS_SSE2 ? __expf_finite_sse2 : __expf_finite_ia32);
+	    HAS_CPU_FEATURE (SSE2)
+	    ? __expf_finite_sse2
+	    : __expf_finite_ia32);
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf.c b/sysdeps/i386/i686/fpu/multiarch/s_cosf.c
index 0799dca3ef..e32b2f4529 100644
--- a/sysdeps/i386/i686/fpu/multiarch/s_cosf.c
+++ b/sysdeps/i386/i686/fpu/multiarch/s_cosf.c
@@ -22,7 +22,7 @@ extern float __cosf_sse2 (float);
 extern float __cosf_ia32 (float);
 float __cosf (float);
 
-libm_ifunc (__cosf, HAS_SSE2 ? __cosf_sse2 : __cosf_ia32);
+libm_ifunc (__cosf, HAS_CPU_FEATURE (SSE2) ? __cosf_sse2 : __cosf_ia32);
 weak_alias (__cosf, cosf);
 
 #define COSF __cosf_ia32
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c b/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c
index 384d84470f..0d827e0daa 100644
--- a/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c
+++ b/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c
@@ -22,7 +22,8 @@ extern void __sincosf_sse2 (float, float *, float *);
 extern void __sincosf_ia32 (float, float *, float *);
 void __sincosf (float, float *, float *);
 
-libm_ifunc (__sincosf, HAS_SSE2 ? __sincosf_sse2 : __sincosf_ia32);
+libm_ifunc (__sincosf,
+	    HAS_CPU_FEATURE (SSE2) ? __sincosf_sse2 : __sincosf_ia32);
 weak_alias (__sincosf, sincosf);
 
 #define SINCOSF __sincosf_ia32
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf.c b/sysdeps/i386/i686/fpu/multiarch/s_sinf.c
index 6b627720cf..18afaa2714 100644
--- a/sysdeps/i386/i686/fpu/multiarch/s_sinf.c
+++ b/sysdeps/i386/i686/fpu/multiarch/s_sinf.c
@@ -22,7 +22,7 @@ extern float __sinf_sse2 (float);
 extern float __sinf_ia32 (float);
 float __sinf (float);
 
-libm_ifunc (__sinf, HAS_SSE2 ? __sinf_sse2 : __sinf_ia32);
+libm_ifunc (__sinf, HAS_CPU_FEATURE (SSE2) ? __sinf_sse2 : __sinf_ia32);
 weak_alias (__sinf, sinf);
 #define SINF __sinf_ia32
 #include <sysdeps/ieee754/flt-32/s_sinf.c>
diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/bcopy.S
index e767d97019..3fc95dcba9 100644
--- a/sysdeps/i386/i686/multiarch/bcopy.S
+++ b/sysdeps/i386/i686/multiarch/bcopy.S
@@ -23,51 +23,24 @@
 
 /* Define multiple versions only for the definition in lib.  */
 #if IS_IN (libc)
-# ifdef SHARED
 	.text
 ENTRY(bcopy)
 	.type	bcopy, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__bcopy_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__bcopy_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__bcopy_sse2_unaligned@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__bcopy_sse2_unaligned)
+	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	__bcopy_ssse3@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__bcopy_ssse3)
+	HAS_CPU_FEATURE (Fast_Rep_String)
 	jz	2f
-	leal	__bcopy_ssse3_rep@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
-END(bcopy)
-# else
-	.text
-ENTRY(bcopy)
-	.type	bcopy, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__bcopy_ia32, %eax
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
-	jz	2f
-	leal	__bcopy_ssse3, %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features
-	jz	2f
-	leal	__bcopy_ssse3_rep, %eax
+	LOAD_FUNC_GOT_EAX (__bcopy_ssse3_rep)
 2:	ret
 END(bcopy)
-# endif
 
 # undef ENTRY
 # define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/bzero.S b/sysdeps/i386/i686/multiarch/bzero.S
index e8dc85fc9f..95c96a81ea 100644
--- a/sysdeps/i386/i686/multiarch/bzero.S
+++ b/sysdeps/i386/i686/multiarch/bzero.S
@@ -23,46 +23,19 @@
 
 /* Define multiple versions only for the definition in lib.  */
 #if IS_IN (libc)
-# ifdef SHARED
-	.text
-ENTRY(__bzero)
-	.type	__bzero, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__bzero_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
-	jz	2f
-	leal	__bzero_sse2@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
-	jz	2f
-	leal	__bzero_sse2_rep@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
-END(__bzero)
-# else
 	.text
 ENTRY(__bzero)
 	.type	__bzero, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__bzero_ia32, %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__bzero_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__bzero_sse2, %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features
+	LOAD_FUNC_GOT_EAX ( __bzero_sse2)
+	HAS_CPU_FEATURE (Fast_Rep_String)
 	jz	2f
-	leal	__bzero_sse2_rep, %eax
+	LOAD_FUNC_GOT_EAX (__bzero_sse2_rep)
 2:	ret
 END(__bzero)
-# endif
 
 # undef ENTRY
 # define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
index 92366a7b46..a6735a804b 100644
--- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
+++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
@@ -38,152 +38,179 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
   /* Support sysdeps/i386/i686/multiarch/bcopy.S.  */
   IFUNC_IMPL (i, name, bcopy,
-	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3),
 			      __bcopy_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3),
+			      __bcopy_ssse3)
+	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSE2),
 			      __bcopy_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/bzero.S.  */
   IFUNC_IMPL (i, name, bzero,
-	      IFUNC_IMPL_ADD (array, i, bzero, HAS_SSE2, __bzero_sse2_rep)
-	      IFUNC_IMPL_ADD (array, i, bzero, HAS_SSE2, __bzero_sse2)
+	      IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2),
+			      __bzero_sse2_rep)
+	      IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2),
+			      __bzero_sse2)
 	      IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memchr.S.  */
   IFUNC_IMPL (i, name, memchr,
-	      IFUNC_IMPL_ADD (array, i, memchr, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2),
 			      __memchr_sse2_bsf)
-	      IFUNC_IMPL_ADD (array, i, memchr, HAS_SSE2, __memchr_sse2)
+	      IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2),
+			      __memchr_sse2)
 	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memcmp.S.  */
   IFUNC_IMPL (i, name, memcmp,
-	      IFUNC_IMPL_ADD (array, i, memcmp, HAS_SSE4_2,
+	      IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_2),
 			      __memcmp_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, memcmp, HAS_SSSE3, __memcmp_ssse3)
+	      IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3),
+			      __memcmp_ssse3)
 	      IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memmove_chk.S.  */
   IFUNC_IMPL (i, name, __memmove_chk,
-	      IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
+			      HAS_CPU_FEATURE (SSSE3),
 			      __memmove_chk_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
+			      HAS_CPU_FEATURE (SSSE3),
 			      __memmove_chk_ssse3)
-	      IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
+			      HAS_CPU_FEATURE (SSE2),
 			      __memmove_chk_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
 			      __memmove_chk_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memmove.S.  */
   IFUNC_IMPL (i, name, memmove,
-	      IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
 			      __memmove_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
 			      __memmove_ssse3)
-	      IFUNC_IMPL_ADD (array, i, memmove, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSE2),
 			      __memmove_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memrchr.S.  */
   IFUNC_IMPL (i, name, memrchr,
-	      IFUNC_IMPL_ADD (array, i, memrchr, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2),
 			      __memrchr_sse2_bsf)
-	      IFUNC_IMPL_ADD (array, i, memrchr, HAS_SSE2, __memrchr_sse2)
+	      IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2),
+			      __memrchr_sse2)
 	      IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memset_chk.S.  */
   IFUNC_IMPL (i, name, __memset_chk,
-	      IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, __memset_chk,
+			      HAS_CPU_FEATURE (SSE2),
 			      __memset_chk_sse2_rep)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, __memset_chk,
+			      HAS_CPU_FEATURE (SSE2),
 			      __memset_chk_sse2)
 	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
 			      __memset_chk_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memset.S.  */
   IFUNC_IMPL (i, name, memset,
-	      IFUNC_IMPL_ADD (array, i, memset, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2),
 			      __memset_sse2_rep)
-	      IFUNC_IMPL_ADD (array, i, memset, HAS_SSE2, __memset_sse2)
+	      IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2),
+			      __memset_sse2)
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/rawmemchr.S.  */
   IFUNC_IMPL (i, name, rawmemchr,
-	      IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2),
 			      __rawmemchr_sse2_bsf)
-	      IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2),
 			      __rawmemchr_sse2)
 	      IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/stpncpy.S.  */
   IFUNC_IMPL (i, name, stpncpy,
-	      IFUNC_IMPL_ADD (array, i, stpncpy, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSSE3),
 			      __stpncpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, stpncpy, HAS_SSE2, __stpncpy_sse2)
+	      IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSE2),
+			      __stpncpy_sse2)
 	      IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/stpcpy.S.  */
   IFUNC_IMPL (i, name, stpcpy,
-	      IFUNC_IMPL_ADD (array, i, stpcpy, HAS_SSSE3, __stpcpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, stpcpy, HAS_SSE2, __stpcpy_sse2)
+	      IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSSE3),
+			      __stpcpy_ssse3)
+	      IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSE2),
+			      __stpcpy_sse2)
 	      IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strcasecmp.S.  */
   IFUNC_IMPL (i, name, strcasecmp,
-	      IFUNC_IMPL_ADD (array, i, strcasecmp, HAS_SSE4_2,
+	      IFUNC_IMPL_ADD (array, i, strcasecmp,
+			      HAS_CPU_FEATURE (SSE4_2),
 			      __strcasecmp_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, strcasecmp, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, strcasecmp,
+			      HAS_CPU_FEATURE (SSSE3),
 			      __strcasecmp_ssse3)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strcasecmp_l.S.  */
   IFUNC_IMPL (i, name, strcasecmp_l,
-	      IFUNC_IMPL_ADD (array, i, strcasecmp_l, HAS_SSE4_2,
+	      IFUNC_IMPL_ADD (array, i, strcasecmp_l,
+			      HAS_CPU_FEATURE (SSE4_2),
 			      __strcasecmp_l_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, strcasecmp_l, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, strcasecmp_l,
+			      HAS_CPU_FEATURE (SSSE3),
 			      __strcasecmp_l_ssse3)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1,
 			      __strcasecmp_l_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strcat.S.  */
   IFUNC_IMPL (i, name, strcat,
-	      IFUNC_IMPL_ADD (array, i, strcat, HAS_SSSE3, __strcat_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strcat, HAS_SSE2, __strcat_sse2)
+	      IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3),
+			      __strcat_ssse3)
+	      IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSE2),
+			      __strcat_sse2)
 	      IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strchr.S.  */
   IFUNC_IMPL (i, name, strchr,
-	      IFUNC_IMPL_ADD (array, i, strchr, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2),
 			      __strchr_sse2_bsf)
-	      IFUNC_IMPL_ADD (array, i, strchr, HAS_SSE2, __strchr_sse2)
+	      IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2),
+			      __strchr_sse2)
 	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strcmp.S.  */
   IFUNC_IMPL (i, name, strcmp,
-	      IFUNC_IMPL_ADD (array, i, strcmp, HAS_SSE4_2,
+	      IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2),
 			      __strcmp_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, strcmp, HAS_SSSE3, __strcmp_ssse3)
+	      IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3),
+			      __strcmp_ssse3)
 	      IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strcpy.S.  */
   IFUNC_IMPL (i, name, strcpy,
-	      IFUNC_IMPL_ADD (array, i, strcpy, HAS_SSSE3, __strcpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strcpy, HAS_SSE2, __strcpy_sse2)
+	      IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3),
+			      __strcpy_ssse3)
+	      IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSE2),
+			      __strcpy_sse2)
 	      IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strcspn.S.  */
   IFUNC_IMPL (i, name, strcspn,
-	      IFUNC_IMPL_ADD (array, i, strcspn, HAS_SSE4_2,
+	      IFUNC_IMPL_ADD (array, i, strcspn, HAS_CPU_FEATURE (SSE4_2),
 			      __strcspn_sse42)
 	      IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strncase.S.  */
   IFUNC_IMPL (i, name, strncasecmp,
-	      IFUNC_IMPL_ADD (array, i, strncasecmp, HAS_SSE4_2,
+	      IFUNC_IMPL_ADD (array, i, strncasecmp,
+			      HAS_CPU_FEATURE (SSE4_2),
 			      __strncasecmp_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, strncasecmp, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, strncasecmp,
+			      HAS_CPU_FEATURE (SSSE3),
 			      __strncasecmp_ssse3)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp, 1,
 			      __strncasecmp_ia32))
@@ -191,136 +218,156 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/i386/i686/multiarch/strncase_l.S.  */
   IFUNC_IMPL (i, name, strncasecmp_l,
 	      IFUNC_IMPL_ADD (array, i, strncasecmp_l,
-			      HAS_SSE4_2, __strncasecmp_l_sse4_2)
+			      HAS_CPU_FEATURE (SSE4_2),
+			      __strncasecmp_l_sse4_2)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp_l,
-			      HAS_SSSE3, __strncasecmp_l_ssse3)
+			      HAS_CPU_FEATURE (SSSE3),
+			      __strncasecmp_l_ssse3)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1,
 			      __strncasecmp_l_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strncat.S.  */
   IFUNC_IMPL (i, name, strncat,
-	      IFUNC_IMPL_ADD (array, i, strncat, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3),
 			      __strncat_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strncat, HAS_SSE2, __strncat_sse2)
+	      IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSE2),
+			      __strncat_sse2)
 	      IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strncpy.S.  */
   IFUNC_IMPL (i, name, strncpy,
-	      IFUNC_IMPL_ADD (array, i, strncpy, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3),
 			      __strncpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strncpy, HAS_SSE2, __strncpy_sse2)
+	      IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSE2),
+			      __strncpy_sse2)
 	      IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strnlen.S.  */
   IFUNC_IMPL (i, name, strnlen,
-	      IFUNC_IMPL_ADD (array, i, strnlen, HAS_SSE2, __strnlen_sse2)
+	      IFUNC_IMPL_ADD (array, i, strnlen, HAS_CPU_FEATURE (SSE2),
+			      __strnlen_sse2)
 	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strpbrk.S.  */
   IFUNC_IMPL (i, name, strpbrk,
-	      IFUNC_IMPL_ADD (array, i, strpbrk, HAS_SSE4_2,
+	      IFUNC_IMPL_ADD (array, i, strpbrk, HAS_CPU_FEATURE (SSE4_2),
 			      __strpbrk_sse42)
 	      IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strrchr.S.  */
   IFUNC_IMPL (i, name, strrchr,
-	      IFUNC_IMPL_ADD (array, i, strrchr, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2),
 			      __strrchr_sse2_bsf)
-	      IFUNC_IMPL_ADD (array, i, strrchr, HAS_SSE2, __strrchr_sse2)
+	      IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2),
+			      __strrchr_sse2)
 	      IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strspn.S.  */
   IFUNC_IMPL (i, name, strspn,
-	      IFUNC_IMPL_ADD (array, i, strspn, HAS_SSE4_2, __strspn_sse42)
+	      IFUNC_IMPL_ADD (array, i, strspn, HAS_CPU_FEATURE (SSE4_2),
+			      __strspn_sse42)
 	      IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/wcschr.S.  */
   IFUNC_IMPL (i, name, wcschr,
-	      IFUNC_IMPL_ADD (array, i, wcschr, HAS_SSE2, __wcschr_sse2)
+	      IFUNC_IMPL_ADD (array, i, wcschr, HAS_CPU_FEATURE (SSE2),
+			      __wcschr_sse2)
 	      IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/wcscmp.S.  */
   IFUNC_IMPL (i, name, wcscmp,
-	      IFUNC_IMPL_ADD (array, i, wcscmp, HAS_SSE2, __wcscmp_sse2)
+	      IFUNC_IMPL_ADD (array, i, wcscmp, HAS_CPU_FEATURE (SSE2),
+			      __wcscmp_sse2)
 	      IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/wcscpy.S.  */
   IFUNC_IMPL (i, name, wcscpy,
-	      IFUNC_IMPL_ADD (array, i, wcscpy, HAS_SSSE3, __wcscpy_ssse3)
+	      IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3),
+			      __wcscpy_ssse3)
 	      IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/wcslen.S.  */
   IFUNC_IMPL (i, name, wcslen,
-	      IFUNC_IMPL_ADD (array, i, wcslen, HAS_SSE2, __wcslen_sse2)
+	      IFUNC_IMPL_ADD (array, i, wcslen, HAS_CPU_FEATURE (SSE2),
+			      __wcslen_sse2)
 	      IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/wcsrchr.S.  */
   IFUNC_IMPL (i, name, wcsrchr,
-	      IFUNC_IMPL_ADD (array, i, wcsrchr, HAS_SSE2, __wcsrchr_sse2)
+	      IFUNC_IMPL_ADD (array, i, wcsrchr, HAS_CPU_FEATURE (SSE2),
+			      __wcsrchr_sse2)
 	      IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/wmemcmp.S.  */
   IFUNC_IMPL (i, name, wmemcmp,
-	      IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_SSE4_2,
+	      IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_2),
 			      __wmemcmp_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3),
 			      __wmemcmp_ssse3)
 	      IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_ia32))
 
 #ifdef SHARED
   /* Support sysdeps/i386/i686/multiarch/memcpy_chk.S.  */
   IFUNC_IMPL (i, name, __memcpy_chk,
-	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+			      HAS_CPU_FEATURE (SSSE3),
 			      __memcpy_chk_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+			      HAS_CPU_FEATURE (SSSE3),
 			      __memcpy_chk_ssse3)
-	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+			      HAS_CPU_FEATURE (SSE2),
 			      __memcpy_chk_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
 			      __memcpy_chk_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memcpy.S.  */
   IFUNC_IMPL (i, name, memcpy,
-	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
 			      __memcpy_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
+			      __memcpy_ssse3)
+	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSE2),
 			      __memcpy_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S.  */
   IFUNC_IMPL (i, name, __mempcpy_chk,
-	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+			      HAS_CPU_FEATURE (SSSE3),
 			      __mempcpy_chk_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+			      HAS_CPU_FEATURE (SSSE3),
 			      __mempcpy_chk_ssse3)
-	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+			      HAS_CPU_FEATURE (SSE2),
 			      __mempcpy_chk_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
 			      __mempcpy_chk_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/mempcpy.S.  */
   IFUNC_IMPL (i, name, mempcpy,
-	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
 			      __mempcpy_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
 			      __mempcpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSE2),
 			      __mempcpy_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strlen.S.  */
   IFUNC_IMPL (i, name, strlen,
-	      IFUNC_IMPL_ADD (array, i, strlen, HAS_SSE2,
+	      IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2),
 			      __strlen_sse2_bsf)
-	      IFUNC_IMPL_ADD (array, i, strlen, HAS_SSE2, __strlen_sse2)
+	      IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2),
+			      __strlen_sse2)
 	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strncmp.S.  */
   IFUNC_IMPL (i, name, strncmp,
-	      IFUNC_IMPL_ADD (array, i, strncmp, HAS_SSE4_2,
+	      IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2),
 			      __strncmp_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, strncmp, HAS_SSSE3,
+	      IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3),
 			      __strncmp_ssse3)
 	      IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_ia32))
 #endif
diff --git a/sysdeps/i386/i686/multiarch/memchr.S b/sysdeps/i386/i686/multiarch/memchr.S
index 02994d055e..65e6b96f3b 100644
--- a/sysdeps/i386/i686/multiarch/memchr.S
+++ b/sysdeps/i386/i686/multiarch/memchr.S
@@ -22,46 +22,22 @@
 #include <init-arch.h>
 
 #if IS_IN (libc)
-# define CFI_POP(REG) \
-	cfi_adjust_cfa_offset (-4); \
-	cfi_restore (REG)
-
-# define CFI_PUSH(REG) \
-	cfi_adjust_cfa_offset (4); \
-	cfi_rel_offset (REG, 0)
-
 	.text
 ENTRY(__memchr)
 	.type	__memchr, @gnu_indirect_function
-	pushl	%ebx
-	CFI_PUSH (%ebx)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-
-1:	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	testl	$bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+	HAS_ARCH_FEATURE (Slow_BSF)
 	jz	3f
 
-	leal	__memchr_sse2@GOTOFF(%ebx), %eax
-	popl	%ebx
-	CFI_POP	(%ebx)
+	LOAD_FUNC_GOT_EAX ( __memchr_sse2)
 	ret
 
-	CFI_PUSH (%ebx)
-
-2:	leal	__memchr_ia32@GOTOFF(%ebx), %eax
-	popl	%ebx
-	CFI_POP	(%ebx)
+2:	LOAD_FUNC_GOT_EAX (__memchr_ia32)
 	ret
 
-	CFI_PUSH (%ebx)
-
-3:	leal	__memchr_sse2_bsf@GOTOFF(%ebx), %eax
-	popl	%ebx
-	CFI_POP	(%ebx)
+3:	LOAD_FUNC_GOT_EAX (__memchr_sse2_bsf)
 	ret
 END(__memchr)
 
diff --git a/sysdeps/i386/i686/multiarch/memcmp.S b/sysdeps/i386/i686/multiarch/memcmp.S
index 6b607ebd3b..d4d7d2e91d 100644
--- a/sysdeps/i386/i686/multiarch/memcmp.S
+++ b/sysdeps/i386/i686/multiarch/memcmp.S
@@ -23,46 +23,19 @@
 
 /* Define multiple versions only for the definition in libc. */
 #if IS_IN (libc)
-# ifdef SHARED
-	.text
-ENTRY(memcmp)
-	.type	memcmp, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__memcmp_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
-	jz	2f
-	leal	__memcmp_ssse3@GOTOFF(%ebx), %eax
-	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
-	jz	2f
-	leal	__memcmp_sse4_2@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
-END(memcmp)
-# else
 	.text
 ENTRY(memcmp)
 	.type	memcmp, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__memcmp_ia32, %eax
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__memcmp_ia32)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	__memcmp_ssse3, %eax
-	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
+	LOAD_FUNC_GOT_EAX (__memcmp_ssse3)
+	HAS_CPU_FEATURE (SSE4_2)
 	jz	2f
-	leal	__memcmp_sse4_2, %eax
+	LOAD_FUNC_GOT_EAX (__memcmp_sse4_2)
 2:	ret
 END(memcmp)
-# endif
 
 # undef ENTRY
 # define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/memcpy.S
index c6d20bd8e0..9a4d183e01 100644
--- a/sysdeps/i386/i686/multiarch/memcpy.S
+++ b/sysdeps/i386/i686/multiarch/memcpy.S
@@ -28,29 +28,20 @@
 	.text
 ENTRY(memcpy)
 	.type	memcpy, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__memcpy_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__memcpy_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__memcpy_sse2_unaligned@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__memcpy_sse2_unaligned)
+	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	__memcpy_ssse3@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__memcpy_ssse3)
+	HAS_CPU_FEATURE (Fast_Rep_String)
 	jz	2f
-	leal	__memcpy_ssse3_rep@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__memcpy_ssse3_rep)
+2:	ret
 END(memcpy)
 
 # undef ENTRY
diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/memcpy_chk.S
index 93995879f8..3bbd921555 100644
--- a/sysdeps/i386/i686/multiarch/memcpy_chk.S
+++ b/sysdeps/i386/i686/multiarch/memcpy_chk.S
@@ -29,29 +29,20 @@
 	.text
 ENTRY(__memcpy_chk)
 	.type	__memcpy_chk, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__memcpy_chk_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__memcpy_chk_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__memcpy_chk_sse2_unaligned@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__memcpy_chk_sse2_unaligned)
+	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	__memcpy_chk_ssse3@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3)
+	HAS_CPU_FEATURE (Fast_Rep_String)
 	jz	2f
-	leal	__memcpy_chk_ssse3_rep@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3_rep)
+2:	ret
 END(__memcpy_chk)
 # else
 #  include "../memcpy_chk.S"
diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/memmove.S
index 70334639ba..2bf427fe93 100644
--- a/sysdeps/i386/i686/multiarch/memmove.S
+++ b/sysdeps/i386/i686/multiarch/memmove.S
@@ -23,37 +23,28 @@
 
 /* Define multiple versions only for the definition in lib.  */
 #if IS_IN (libc)
-# ifdef SHARED
 	.text
 ENTRY(memmove)
 	.type	memmove, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__memmove_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__memmove_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__memmove_sse2_unaligned@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__memmove_sse2_unaligned)
+	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	__memmove_ssse3@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__memmove_ssse3)
+	HAS_ARCH_FEATURE (Fast_Rep_String)
 	jz	2f
-	leal	__memmove_ssse3_rep@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__memmove_ssse3_rep)
+2:	ret
 END(memmove)
 
-# undef ENTRY
-# define ENTRY(name) \
+# ifdef SHARED
+#  undef ENTRY
+#  define ENTRY(name) \
 	.type __memmove_ia32, @function; \
 	.p2align 4; \
 	.globl __memmove_ia32; \
@@ -61,29 +52,8 @@ END(memmove)
 	__memmove_ia32: cfi_startproc; \
 	CALL_MCOUNT
 # else
-	.text
-ENTRY(memmove)
-	.type	memmove, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__memmove_ia32, %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
-	jz	2f
-	leal	__memmove_sse2_unaligned, %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
-	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
-	jz	2f
-	leal	__memmove_ssse3, %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features
-	jz	2f
-	leal	__memmove_ssse3_rep, %eax
-2:	ret
-END(memmove)
-
-# undef ENTRY
-# define ENTRY(name) \
+#  undef ENTRY
+#  define ENTRY(name) \
 	.type __memmove_ia32, @function; \
 	.globl __memmove_ia32; \
 	.p2align 4; \
diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/memmove_chk.S
index 2b576d4960..b17f6edbdc 100644
--- a/sysdeps/i386/i686/multiarch/memmove_chk.S
+++ b/sysdeps/i386/i686/multiarch/memmove_chk.S
@@ -23,56 +23,26 @@
 
 /* Define multiple versions only for the definition in lib.  */
 #if IS_IN (libc)
-# ifdef SHARED
 	.text
 ENTRY(__memmove_chk)
 	.type	__memmove_chk, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__memmove_chk_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__memmove_chk_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__memmove_chk_sse2_unaligned@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__memmove_chk_sse2_unaligned)
+	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	__memmove_chk_ssse3@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3)
+	HAS_CPU_FEATURE (Fast_Rep_String)
 	jz	2f
-	leal	__memmove_chk_ssse3_rep@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
-END(__memmove_chk)
-# else
-	.text
-ENTRY(__memmove_chk)
-	.type	__memmove_chk, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__memmove_chk_ia32, %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
-	jz	2f
-	leal	__memmove_chk_sse2_unaligned, %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
-	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
-	jz	2f
-	leal	__memmove_chk_ssse3, %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features
-	jz	2f
-	leal	__memmove_chk_ssse3_rep, %eax
+	LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3_rep)
 2:	ret
 END(__memmove_chk)
 
+# ifndef SHARED
 	.type __memmove_chk_sse2_unaligned, @function
 	.p2align 4;
 __memmove_chk_sse2_unaligned:
diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/mempcpy.S
index 39c934eb49..021558a5b0 100644
--- a/sysdeps/i386/i686/multiarch/mempcpy.S
+++ b/sysdeps/i386/i686/multiarch/mempcpy.S
@@ -28,29 +28,20 @@
 	.text
 ENTRY(__mempcpy)
 	.type	__mempcpy, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__mempcpy_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__mempcpy_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__mempcpy_sse2_unaligned@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__mempcpy_sse2_unaligned)
+	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	__mempcpy_ssse3@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__mempcpy_ssse3)
+	HAS_CPU_FEATURE (Fast_Rep_String)
 	jz	2f
-	leal	__mempcpy_ssse3_rep@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__mempcpy_ssse3_rep)
+2:	ret
 END(__mempcpy)
 
 # undef ENTRY
diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/mempcpy_chk.S
index b6fa202833..1bea6eab38 100644
--- a/sysdeps/i386/i686/multiarch/mempcpy_chk.S
+++ b/sysdeps/i386/i686/multiarch/mempcpy_chk.S
@@ -29,29 +29,20 @@
 	.text
 ENTRY(__mempcpy_chk)
 	.type	__mempcpy_chk, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__mempcpy_chk_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__mempcpy_chk_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__mempcpy_chk_sse2_unaligned@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__mempcpy_chk_sse2_unaligned)
+	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	__mempcpy_chk_ssse3@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3)
+	HAS_CPU_FEATURE (Fast_Rep_String)
 	jz	2f
-	leal	__mempcpy_chk_ssse3_rep@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3_rep)
+2:	ret
 END(__mempcpy_chk)
 # else
 #  include "../mempcpy_chk.S"
diff --git a/sysdeps/i386/i686/multiarch/memrchr.S b/sysdeps/i386/i686/multiarch/memrchr.S
index 321e0b7e4e..32fb1a6792 100644
--- a/sysdeps/i386/i686/multiarch/memrchr.S
+++ b/sysdeps/i386/i686/multiarch/memrchr.S
@@ -22,46 +22,22 @@
 #include <init-arch.h>
 
 #if IS_IN (libc)
-# define CFI_POP(REG) \
-	cfi_adjust_cfa_offset (-4); \
-	cfi_restore (REG)
-
-# define CFI_PUSH(REG) \
-	cfi_adjust_cfa_offset (4); \
-	cfi_rel_offset (REG, 0)
-
 	.text
 ENTRY(__memrchr)
 	.type	__memrchr, @gnu_indirect_function
-	pushl	%ebx
-	CFI_PUSH (%ebx)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-
-1:	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	testl	$bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+	HAS_ARCH_FEATURE (Slow_BSF)
 	jz	3f
 
-	leal	__memrchr_sse2@GOTOFF(%ebx), %eax
-	popl	%ebx
-	CFI_POP	(%ebx)
+	LOAD_FUNC_GOT_EAX (__memrchr_sse2)
 	ret
 
-	CFI_PUSH (%ebx)
-
-2:	leal	__memrchr_ia32@GOTOFF(%ebx), %eax
-	popl	%ebx
-	CFI_POP	(%ebx)
+2:	LOAD_FUNC_GOT_EAX (__memrchr_ia32)
 	ret
 
-	CFI_PUSH (%ebx)
-
-3:	leal	__memrchr_sse2_bsf@GOTOFF(%ebx), %eax
-	popl	%ebx
-	CFI_POP	(%ebx)
+3:	LOAD_FUNC_GOT_EAX (__memrchr_sse2_bsf)
 	ret
 END(__memrchr)
 
diff --git a/sysdeps/i386/i686/multiarch/memset.S b/sysdeps/i386/i686/multiarch/memset.S
index 6d7d919409..8015d57894 100644
--- a/sysdeps/i386/i686/multiarch/memset.S
+++ b/sysdeps/i386/i686/multiarch/memset.S
@@ -23,46 +23,19 @@
 
 /* Define multiple versions only for the definition in lib.  */
 #if IS_IN (libc)
-# ifdef SHARED
-	.text
-ENTRY(memset)
-	.type	memset, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__memset_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
-	jz	2f
-	leal	__memset_sse2@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
-	jz	2f
-	leal	__memset_sse2_rep@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
-END(memset)
-# else
 	.text
 ENTRY(memset)
 	.type	memset, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__memset_ia32, %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__memset_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__memset_sse2, %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features
+	LOAD_FUNC_GOT_EAX (__memset_sse2)
+	HAS_CPU_FEATURE (Fast_Rep_String)
 	jz	2f
-	leal	__memset_sse2_rep, %eax
+	LOAD_FUNC_GOT_EAX (__memset_sse2_rep)
 2:	ret
 END(memset)
-# endif
 
 # undef ENTRY
 # define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/memset_chk.S b/sysdeps/i386/i686/multiarch/memset_chk.S
index a770c0d9e2..7be45e710e 100644
--- a/sysdeps/i386/i686/multiarch/memset_chk.S
+++ b/sysdeps/i386/i686/multiarch/memset_chk.S
@@ -23,50 +23,26 @@
 
 /* Define multiple versions only for the definition in lib.  */
 #if IS_IN (libc)
-# ifdef SHARED
 	.text
 ENTRY(__memset_chk)
 	.type	__memset_chk, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__memset_chk_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__memset_chk_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__memset_chk_sse2@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__memset_chk_sse2)
+	HAS_CPU_FEATURE (Fast_Rep_String)
 	jz	2f
-	leal	__memset_chk_sse2_rep@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__memset_chk_sse2_rep)
+2:	ret
 END(__memset_chk)
 
+# ifdef SHARED
 strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
 	.section .gnu.warning.__memset_zero_constant_len_parameter
 	.string "memset used with constant zero length parameter; this could be due to transposed parameters"
 # else
 	.text
-ENTRY(__memset_chk)
-	.type	__memset_chk, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__memset_chk_ia32, %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
-	jz	2f
-	leal	__memset_chk_sse2, %eax
-	testl	$bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features
-	jz	2f
-	leal	__memset_chk_sse2_rep, %eax
-2:	ret
-END(__memset_chk)
-
 	.type __memset_chk_sse2, @function
 	.p2align 4;
 __memset_chk_sse2:
diff --git a/sysdeps/i386/i686/multiarch/rawmemchr.S b/sysdeps/i386/i686/multiarch/rawmemchr.S
index c2b7ee647e..2cfbe1b3fc 100644
--- a/sysdeps/i386/i686/multiarch/rawmemchr.S
+++ b/sysdeps/i386/i686/multiarch/rawmemchr.S
@@ -22,46 +22,22 @@
 #include <init-arch.h>
 
 #if IS_IN (libc)
-# define CFI_POP(REG) \
-	cfi_adjust_cfa_offset (-4); \
-	cfi_restore (REG)
-
-# define CFI_PUSH(REG) \
-	cfi_adjust_cfa_offset (4); \
-	cfi_rel_offset (REG, 0)
-
 	.text
 ENTRY(__rawmemchr)
 	.type	__rawmemchr, @gnu_indirect_function
-	pushl	%ebx
-	CFI_PUSH (%ebx)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-
-1:	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	testl	$bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+	HAS_ARCH_FEATURE (Slow_BSF)
 	jz	3f
 
-	leal	__rawmemchr_sse2@GOTOFF(%ebx), %eax
-	popl	%ebx
-	CFI_POP	(%ebx)
+	LOAD_FUNC_GOT_EAX (__rawmemchr_sse2)
 	ret
 
-	CFI_PUSH (%ebx)
-
-2:	leal	__rawmemchr_ia32@GOTOFF(%ebx), %eax
-	popl	%ebx
-	CFI_POP	(%ebx)
+2:	LOAD_FUNC_GOT_EAX (__rawmemchr_ia32)
 	ret
 
-	CFI_PUSH (%ebx)
-
-3:	leal	__rawmemchr_sse2_bsf@GOTOFF(%ebx), %eax
-	popl	%ebx
-	CFI_POP	(%ebx)
+3:	LOAD_FUNC_GOT_EAX (__rawmemchr_sse2_bsf)
 	ret
 END(__rawmemchr)
 
diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/i386/i686/multiarch/s_fma.c
index dd70f78fd7..cf2ede5309 100644
--- a/sysdeps/i386/i686/multiarch/s_fma.c
+++ b/sysdeps/i386/i686/multiarch/s_fma.c
@@ -26,7 +26,8 @@
 extern double __fma_ia32 (double x, double y, double z) attribute_hidden;
 extern double __fma_fma (double x, double y, double z) attribute_hidden;
 
-libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_ia32);
+libm_ifunc (__fma,
+	    HAS_ARCH_FEATURE (FMA_Usable) ? __fma_fma : __fma_ia32);
 weak_alias (__fma, fma)
 
 # define __fma __fma_ia32
diff --git a/sysdeps/i386/i686/multiarch/s_fmaf.c b/sysdeps/i386/i686/multiarch/s_fmaf.c
index 9ffa4f15d9..526cdf16e4 100644
--- a/sysdeps/i386/i686/multiarch/s_fmaf.c
+++ b/sysdeps/i386/i686/multiarch/s_fmaf.c
@@ -26,7 +26,8 @@
 extern float __fmaf_ia32 (float x, float y, float z) attribute_hidden;
 extern float __fmaf_fma (float x, float y, float z) attribute_hidden;
 
-libm_ifunc (__fmaf, HAS_FMA ? __fmaf_fma : __fmaf_ia32);
+libm_ifunc (__fmaf,
+	    HAS_ARCH_FEATURE (FMA_Usable) ? __fmaf_fma : __fmaf_ia32);
 weak_alias (__fmaf, fmaf)
 
 # define __fmaf __fmaf_ia32
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp.S b/sysdeps/i386/i686/multiarch/strcasecmp.S
index c30ac3a8bd..e4b3cf5b46 100644
--- a/sysdeps/i386/i686/multiarch/strcasecmp.S
+++ b/sysdeps/i386/i686/multiarch/strcasecmp.S
@@ -20,49 +20,20 @@
 #include <sysdep.h>
 #include <init-arch.h>
 
-#ifdef SHARED
 	.text
 ENTRY(__strcasecmp)
 	.type	__strcasecmp, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__strcasecmp_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__strcasecmp_ia32)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	__strcasecmp_ssse3@GOTOFF(%ebx), %eax
-	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__strcasecmp_ssse3)
+	HAS_CPU_FEATURE (SSE4_2)
 	jz	2f
-	testl	$bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
+	HAS_ARCH_FEATURE (Slow_SSE4_2)
 	jnz	2f
-	leal	__strcasecmp_sse4_2@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
-END(__strcasecmp)
-#else
-	.text
-ENTRY(__strcasecmp)
-	.type	__strcasecmp, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__strcasecmp_ia32, %eax
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
-	jz	2f
-	leal	__strcasecmp_ssse3, %eax
-	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
-	jz	2f
-	testl	$bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features
-	jnz	2f
-	leal	__strcasecmp_sse4_2, %eax
+	LOAD_FUNC_GOT_EAX (__strcasecmp_sse4_2)
 2:	ret
 END(__strcasecmp)
-#endif
 
 weak_alias (__strcasecmp, strcasecmp)
diff --git a/sysdeps/i386/i686/multiarch/strcat.S b/sysdeps/i386/i686/multiarch/strcat.S
index 474f753f3f..45d84cdf4e 100644
--- a/sysdeps/i386/i686/multiarch/strcat.S
+++ b/sysdeps/i386/i686/multiarch/strcat.S
@@ -45,52 +45,22 @@
    need strncat before the initialization happened.  */
 #if IS_IN (libc)
 
-# ifdef SHARED
 	.text
 ENTRY(STRCAT)
 	.type	STRCAT, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	STRCAT_IA32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
-	jz	2f
-	leal	STRCAT_SSE2@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
-	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
-	jz	2f
-	leal	STRCAT_SSSE3@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
-END(STRCAT)
-# else
-
-ENTRY(STRCAT)
-	.type	STRCAT, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	STRCAT_IA32, %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (STRCAT_IA32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	STRCAT_SSE2, %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
+	LOAD_FUNC_GOT_EAX (STRCAT_SSE2)
+	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	STRCAT_SSSE3, %eax
+	LOAD_FUNC_GOT_EAX (STRCAT_SSSE3)
 2:	ret
 END(STRCAT)
 
-# endif
-
 # undef ENTRY
 # define ENTRY(name) \
 	.type STRCAT_IA32, @function; \
diff --git a/sysdeps/i386/i686/multiarch/strchr.S b/sysdeps/i386/i686/multiarch/strchr.S
index 45624fdee0..6b4656582a 100644
--- a/sysdeps/i386/i686/multiarch/strchr.S
+++ b/sysdeps/i386/i686/multiarch/strchr.S
@@ -25,24 +25,15 @@
 	.text
 ENTRY(strchr)
 	.type	strchr, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__strchr_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__strchr_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__strchr_sse2_bsf@GOTOFF(%ebx), %eax
-	testl	$bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__strchr_sse2_bsf)
+	HAS_ARCH_FEATURE (Slow_BSF)
 	jz	2f
-	leal	__strchr_sse2@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4);
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__strchr_sse2)
+2:	ret
 END(strchr)
 
 # undef ENTRY
diff --git a/sysdeps/i386/i686/multiarch/strcmp.S b/sysdeps/i386/i686/multiarch/strcmp.S
index 9df40089a7..cad179d8f2 100644
--- a/sysdeps/i386/i686/multiarch/strcmp.S
+++ b/sysdeps/i386/i686/multiarch/strcmp.S
@@ -51,50 +51,21 @@
    define multiple versions for strncmp in static library since we
    need strncmp before the initialization happened.  */
 #if (defined SHARED || !defined USE_AS_STRNCMP) && IS_IN (libc)
-# ifdef SHARED
 	.text
 ENTRY(STRCMP)
 	.type	STRCMP, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__STRCMP_IA32@GOTOFF(%ebx), %eax
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__STRCMP_IA32)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	__STRCMP_SSSE3@GOTOFF(%ebx), %eax
-	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__STRCMP_SSSE3)
+	HAS_CPU_FEATURE (SSE4_2)
 	jz	2f
-	testl	$bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
+	HAS_ARCH_FEATURE (Slow_SSE4_2)
 	jnz	2f
-	leal	__STRCMP_SSE4_2@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
-END(STRCMP)
-# else
-	.text
-ENTRY(STRCMP)
-	.type	STRCMP, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__STRCMP_IA32, %eax
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
-	jz	2f
-	leal	__STRCMP_SSSE3, %eax
-	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
-	jz	2f
-	testl	$bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features
-	jnz	2f
-	leal	__STRCMP_SSE4_2, %eax
+	LOAD_FUNC_GOT_EAX (__STRCMP_SSE4_2)
 2:	ret
 END(STRCMP)
-# endif
 
 # undef ENTRY
 # define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/strcpy.S b/sysdeps/i386/i686/multiarch/strcpy.S
index c279d468cb..e9db766347 100644
--- a/sysdeps/i386/i686/multiarch/strcpy.S
+++ b/sysdeps/i386/i686/multiarch/strcpy.S
@@ -61,52 +61,22 @@
    need strncpy before the initialization happened.  */
 #if IS_IN (libc)
 
-# ifdef SHARED
 	.text
 ENTRY(STRCPY)
 	.type	STRCPY, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	STRCPY_IA32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (STRCPY_IA32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	STRCPY_SSE2@GOTOFF(%ebx), %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (STRCPY_SSE2)
+	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	STRCPY_SSSE3@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
-END(STRCPY)
-# else
-
-ENTRY(STRCPY)
-	.type	STRCPY, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	STRCPY_IA32, %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
-	jz	2f
-	leal	STRCPY_SSE2, %eax
-	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
-	jnz	2f
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
-	jz	2f
-	leal	STRCPY_SSSE3, %eax
+	LOAD_FUNC_GOT_EAX (STRCPY_SSSE3)
 2:	ret
 END(STRCPY)
 
-# endif
-
 # undef ENTRY
 # define ENTRY(name) \
 	.type STRCPY_IA32, @function; \
diff --git a/sysdeps/i386/i686/multiarch/strcspn.S b/sysdeps/i386/i686/multiarch/strcspn.S
index e6ea454150..b669b9797a 100644
--- a/sysdeps/i386/i686/multiarch/strcspn.S
+++ b/sysdeps/i386/i686/multiarch/strcspn.S
@@ -42,40 +42,16 @@
    define multiple versions for strpbrk in static library since we
    need strpbrk before the initialization happened.  */
 #if (defined SHARED || !defined USE_AS_STRPBRK) && IS_IN (libc)
-# ifdef SHARED
 	.text
 ENTRY(STRCSPN)
 	.type	STRCSPN, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	STRCSPN_IA32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (STRCSPN_IA32)
+	HAS_CPU_FEATURE (SSE4_2)
 	jz	2f
-	leal	STRCSPN_SSE42@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4);
-	cfi_restore (ebx)
-	ret
-END(STRCSPN)
-# else
-	.text
-ENTRY(STRCSPN)
-	.type	STRCSPN, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	STRCSPN_IA32, %eax
-	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
-	jz	2f
-	leal	STRCSPN_SSE42, %eax
+	LOAD_FUNC_GOT_EAX (STRCSPN_SSE42)
 2:	ret
 END(STRCSPN)
-# endif
 
 # undef ENTRY
 # define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/strlen.S b/sysdeps/i386/i686/multiarch/strlen.S
index 2e6993b738..613559ca01 100644
--- a/sysdeps/i386/i686/multiarch/strlen.S
+++ b/sysdeps/i386/i686/multiarch/strlen.S
@@ -28,24 +28,15 @@
 	.text
 ENTRY(strlen)
 	.type	strlen, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__strlen_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__strlen_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__strlen_sse2_bsf@GOTOFF(%ebx), %eax
-	testl	$bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__strlen_sse2_bsf)
+	HAS_ARCH_FEATURE (Slow_BSF)
 	jz	2f
-	leal	__strlen_sse2@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4);
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__strlen_sse2)
+2:	ret
 END(strlen)
 
 # undef ENTRY
diff --git a/sysdeps/i386/i686/multiarch/strncase.S b/sysdeps/i386/i686/multiarch/strncase.S
index c2cb03c2bb..0cdbeff42a 100644
--- a/sysdeps/i386/i686/multiarch/strncase.S
+++ b/sysdeps/i386/i686/multiarch/strncase.S
@@ -20,49 +20,20 @@
 #include <sysdep.h>
 #include <init-arch.h>
 
-#ifdef SHARED
 	.text
 ENTRY(__strncasecmp)
 	.type	__strncasecmp, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__strncasecmp_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__strncasecmp_ia32)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	__strncasecmp_ssse3@GOTOFF(%ebx), %eax
-	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__strncasecmp_ssse3)
+	HAS_CPU_FEATURE (SSE4_2)
 	jz	2f
-	testl	$bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
+	HAS_ARCH_FEATURE (Slow_SSE4_2)
 	jnz	2f
-	leal	__strncasecmp_sse4_2@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
-END(__strncasecmp)
-#else
-	.text
-ENTRY(__strncasecmp)
-	.type	__strncasecmp, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__strncasecmp_ia32, %eax
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
-	jz	2f
-	leal	__strncasecmp_ssse3, %eax
-	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
-	jz	2f
-	testl	$bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features
-	jnz	2f
-	leal	__strncasecmp_sse4_2, %eax
+	LOAD_FUNC_GOT_EAX (__strncasecmp_sse4_2)
 2:	ret
 END(__strncasecmp)
-#endif
 
 weak_alias (__strncasecmp, strncasecmp)
diff --git a/sysdeps/i386/i686/multiarch/strnlen.S b/sysdeps/i386/i686/multiarch/strnlen.S
index 56a5136606..baf21fc205 100644
--- a/sysdeps/i386/i686/multiarch/strnlen.S
+++ b/sysdeps/i386/i686/multiarch/strnlen.S
@@ -25,21 +25,12 @@
 	.text
 ENTRY(__strnlen)
 	.type	__strnlen, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__strnlen_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__strnlen_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__strnlen_sse2@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4);
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__strnlen_sse2)
+2:	ret
 END(__strnlen)
 
 weak_alias(__strnlen, strnlen)
diff --git a/sysdeps/i386/i686/multiarch/strrchr.S b/sysdeps/i386/i686/multiarch/strrchr.S
index 91074b4315..6aa3321907 100644
--- a/sysdeps/i386/i686/multiarch/strrchr.S
+++ b/sysdeps/i386/i686/multiarch/strrchr.S
@@ -25,24 +25,15 @@
 	.text
 ENTRY(strrchr)
 	.type	strrchr, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__strrchr_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__strrchr_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__strrchr_sse2_bsf@GOTOFF(%ebx), %eax
-	testl	$bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__strrchr_sse2_bsf)
+	HAS_ARCH_FEATURE (Slow_BSF)
 	jz	2f
-	leal	__strrchr_sse2@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4);
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__strrchr_sse2)
+2:	ret
 END(strrchr)
 
 # undef ENTRY
diff --git a/sysdeps/i386/i686/multiarch/strspn.S b/sysdeps/i386/i686/multiarch/strspn.S
index 9d353a2735..4ba87be45b 100644
--- a/sysdeps/i386/i686/multiarch/strspn.S
+++ b/sysdeps/i386/i686/multiarch/strspn.S
@@ -27,40 +27,16 @@
 
 /* Define multiple versions only for the definition in libc.  */
 #if IS_IN (libc)
-# ifdef SHARED
 	.text
 ENTRY(strspn)
 	.type	strspn, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__strspn_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__strspn_ia32)
+	HAS_CPU_FEATURE (SSE4_2)
 	jz	2f
-	leal	__strspn_sse42@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4);
-	cfi_restore (ebx)
-	ret
-END(strspn)
-# else
-	.text
-ENTRY(strspn)
-	.type	strspn, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__strspn_ia32, %eax
-	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
-	jz	2f
-	leal	__strspn_sse42, %eax
+	LOAD_FUNC_GOT_EAX (__strspn_sse42)
 2:	ret
 END(strspn)
-# endif
 
 # undef ENTRY
 # define ENTRY(name) \
diff --git a/sysdeps/i386/i686/multiarch/wcschr.S b/sysdeps/i386/i686/multiarch/wcschr.S
index 603d7d79c1..5918b127a6 100644
--- a/sysdeps/i386/i686/multiarch/wcschr.S
+++ b/sysdeps/i386/i686/multiarch/wcschr.S
@@ -25,21 +25,12 @@
 	.text
 ENTRY(__wcschr)
 	.type	wcschr, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__wcschr_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__wcschr_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__wcschr_sse2@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4);
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__wcschr_sse2)
+2:	ret
 END(__wcschr)
 weak_alias (__wcschr, wcschr)
 #endif
diff --git a/sysdeps/i386/i686/multiarch/wcscmp.S b/sysdeps/i386/i686/multiarch/wcscmp.S
index 92c2c84ad4..db9c05af0d 100644
--- a/sysdeps/i386/i686/multiarch/wcscmp.S
+++ b/sysdeps/i386/i686/multiarch/wcscmp.S
@@ -28,21 +28,12 @@
 	.text
 ENTRY(__wcscmp)
 	.type	__wcscmp, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__wcscmp_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__wcscmp_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__wcscmp_sse2@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4);
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__wcscmp_sse2)
+2:	ret
 END(__wcscmp)
 weak_alias (__wcscmp, wcscmp)
 #endif
diff --git a/sysdeps/i386/i686/multiarch/wcscpy.S b/sysdeps/i386/i686/multiarch/wcscpy.S
index f7253c7f45..5f149701d3 100644
--- a/sysdeps/i386/i686/multiarch/wcscpy.S
+++ b/sysdeps/i386/i686/multiarch/wcscpy.S
@@ -26,20 +26,11 @@
 	.text
 ENTRY(wcscpy)
 	.type	wcscpy, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__wcscpy_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__wcscpy_ia32)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	__wcscpy_ssse3@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__wcscpy_ssse3)
+2:	ret
 END(wcscpy)
 #endif
diff --git a/sysdeps/i386/i686/multiarch/wcslen.S b/sysdeps/i386/i686/multiarch/wcslen.S
index 3926a50a87..7740404a82 100644
--- a/sysdeps/i386/i686/multiarch/wcslen.S
+++ b/sysdeps/i386/i686/multiarch/wcslen.S
@@ -25,21 +25,12 @@
 	.text
 ENTRY(__wcslen)
 	.type	__wcslen, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__wcslen_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__wcslen_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__wcslen_sse2@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4);
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__wcslen_sse2)
+2:	ret
 END(__wcslen)
 
 weak_alias(__wcslen, wcslen)
diff --git a/sysdeps/i386/i686/multiarch/wcsrchr.S b/sysdeps/i386/i686/multiarch/wcsrchr.S
index 5c96129fbe..9ed68101e1 100644
--- a/sysdeps/i386/i686/multiarch/wcsrchr.S
+++ b/sysdeps/i386/i686/multiarch/wcsrchr.S
@@ -25,20 +25,11 @@
 	.text
 ENTRY(wcsrchr)
 	.type	wcsrchr, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__wcsrchr_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__wcsrchr_ia32)
+	HAS_CPU_FEATURE (SSE2)
 	jz	2f
-	leal	__wcsrchr_sse2@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4);
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__wcsrchr_sse2)
+2:	ret
 END(wcsrchr)
 #endif
diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.S b/sysdeps/i386/i686/multiarch/wmemcmp.S
index 6ca60531db..6025942fb6 100644
--- a/sysdeps/i386/i686/multiarch/wmemcmp.S
+++ b/sysdeps/i386/i686/multiarch/wmemcmp.S
@@ -27,23 +27,14 @@
 	.text
 ENTRY(wmemcmp)
 	.type	wmemcmp, @gnu_indirect_function
-	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-	LOAD_PIC_REG(bx)
-	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
-	jne	1f
-	call	__init_cpu_features
-1:	leal	__wmemcmp_ia32@GOTOFF(%ebx), %eax
-	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__wmemcmp_ia32)
+	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leal	__wmemcmp_ssse3@GOTOFF(%ebx), %eax
-	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+	LOAD_FUNC_GOT_EAX (__wmemcmp_ssse3)
+	HAS_CPU_FEATURE (SSE4_2)
 	jz	2f
-	leal	__wmemcmp_sse4_2@GOTOFF(%ebx), %eax
-2:	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (ebx)
-	ret
+	LOAD_FUNC_GOT_EAX (__wmemcmp_sse4_2)
+2:	ret
 END(wmemcmp)
 #endif