about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRoland McGrath <roland@hack.frob.com>2014-10-22 14:20:35 -0700
committerRoland McGrath <roland@hack.frob.com>2014-10-22 14:20:35 -0700
commit8c2b1ed8bbd20d35314c2a602b903159fa567ffb (patch)
tree4a9e06786fdb32c5558cdae218af3b4ff0b78af6
parentb5af9297d51a43f96c5be1bafab032184690dd6f (diff)
downloadglibc-8c2b1ed8bbd20d35314c2a602b903159fa567ffb.tar.gz
glibc-8c2b1ed8bbd20d35314c2a602b903159fa567ffb.tar.xz
glibc-8c2b1ed8bbd20d35314c2a602b903159fa567ffb.zip
ARM: Use movw/movt more when available
-rw-r--r--ChangeLog27
-rw-r--r--config.h.in3
-rw-r--r--setjmp/Makefile3
-rw-r--r--setjmp/tst-setjmp-static.c1
-rw-r--r--sysdeps/arm/__longjmp.S35
-rw-r--r--sysdeps/arm/configure52
-rw-r--r--sysdeps/arm/configure.ac44
-rw-r--r--sysdeps/arm/setjmp.S35
-rw-r--r--sysdeps/arm/sysdep.h78
9 files changed, 209 insertions, 69 deletions
diff --git a/ChangeLog b/ChangeLog
index 589da6b2f4..e7fc13428c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,30 @@
+2014-10-22  Roland McGrath  <roland@hack.frob.com>
+
+	* sysdeps/arm/__longjmp.S [NEED_HWCAP] [IS_IN_rtld]: Use LDST_PCREL
+	macro to get at the _rt_local_ro field.
+	[NEED_HWCAP] [!IS_IN_rtld]: Use LDR_GLOBAL to get at _rtld_global_ro
+	([PIC] case) or _dl_hwcap ([!PIC] case).
+	* sysdeps/arm/setjmp.S: Likewise.
+
+	* config.h.in (ARM_PCREL_MOVW_OK): New macro.
+	* sysdeps/arm/configure.ac: New check to define it.
+	* sysdeps/arm/configure: Regenerated.
+	* sysdeps/arm/sysdep.h [__ASSEMBLER__]: Include <arm-features.h>.
+	(LDST_INDEXED_NOINDEX, LDST_INDEXED_INDEX): New macros.
+	(LDST_INDEXED, LDST_PC_INDEXED): New macros, differing definitions
+	depending on [ARM_NO_INDEX_REGISTER] and [__thumb2__].
+	(LDST_PCREL) [!__thumb2__ && ARCH_HAS_T2 && ARM_PCREL_MOVW_OK]:
+	Use move/movt pair instead of a load.
+	(LDST_GLOBAL): Macro removed.
+	(LDR_GLOBAL): New macro replaces it.
+	(LDR_HIDDEN): New macro.
+	(PTR_MANGLE_LOAD): Use LDR_GLOBAL rather than LDST_GLOBAL.
+	Use LDR_HIDDEN instead for __pointer_chk_guard_local.
+
+	* setjmp/tst-setjmp-static.c: New file.
+	* setjmp/Makefile (tests): Add it.
+	(tests-static): New variable.
+
 2014-10-22  Maciej W. Rozycki  <macro@codesourcery.com>
 
 	[BZ #17485]
diff --git a/config.h.in b/config.h.in
index 20c082586d..695ca35dc0 100644
--- a/config.h.in
+++ b/config.h.in
@@ -243,6 +243,9 @@
 /* The ARM hard-float ABI is being used.  */
 #undef HAVE_ARM_PCS_VFP
 
+/* The ARM movw/movt instructions using PC-relative relocs work right.  */
+#define ARM_PCREL_MOVW_OK 0
+
 /* The pt_chown binary is being built and used by grantpt.  */
 #define HAVE_PT_CHOWN 0
 
diff --git a/setjmp/Makefile b/setjmp/Makefile
index 047b9ec9f2..8006d16d13 100644
--- a/setjmp/Makefile
+++ b/setjmp/Makefile
@@ -28,7 +28,8 @@ routines	:= setjmp sigjmp bsd-setjmp bsd-_setjmp \
 		   longjmp __longjmp jmp-unwind
 
 tests		:= tst-setjmp jmpbug bug269-setjmp tst-setjmp-fp \
-		   tst-sigsetjmp
+		   tst-sigsetjmp tst-setjmp-static
+tests-static	:= tst-setjmp-static
 
 
 include ../Rules
diff --git a/setjmp/tst-setjmp-static.c b/setjmp/tst-setjmp-static.c
new file mode 100644
index 0000000000..5ca5df8cb4
--- /dev/null
+++ b/setjmp/tst-setjmp-static.c
@@ -0,0 +1 @@
+#include "tst-setjmp.c"
diff --git a/sysdeps/arm/__longjmp.S b/sysdeps/arm/__longjmp.S
index 27d1b713de..a98395797a 100644
--- a/sysdeps/arm/__longjmp.S
+++ b/sysdeps/arm/__longjmp.S
@@ -77,21 +77,15 @@ ENTRY (__longjmp)
 
 #ifdef NEED_HWCAP
 # ifdef IS_IN_rtld
-	ldr	a4, 1f
-	ldr	a3, .Lrtld_local_ro
-0:	add	a4, pc, a4
-	add	a4, a4, a3
-	ldr	a4, [a4, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
+	LDST_PCREL (ldr, a4, a3, \
+		    C_SYMBOL_NAME(_rtld_local_ro) \
+		    + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
 # else
 #  ifdef PIC
-	ldr	a4, 1f
-	ldr	a3, .Lrtld_global_ro
-0:	add	a4, pc, a4
-	ldr	a4, [a4, a3]
-	ldr	a4, [a4, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
+	LDR_GLOBAL (a4, a3, C_SYMBOL_NAME(_rtld_global_ro), \
+		    RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
 #  else
-	ldr	a4, .Lhwcap
-	ldr	a4, [a4, #0]
+	LDR_GLOBAL (a4, a3, C_SYMBOL_NAME(_dl_hwcap), 0)
 #  endif
 # endif
 #endif
@@ -138,21 +132,4 @@ ENTRY (__longjmp)
 
 	DO_RET(lr)
 
-#ifdef NEED_HWCAP
-# ifdef IS_IN_rtld
-1:	.long	_GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
-.Lrtld_local_ro:
-	.long	C_SYMBOL_NAME(_rtld_local_ro)(GOTOFF)
-# else
-#  ifdef PIC
-1:	.long	_GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
-.Lrtld_global_ro:
-	.long	C_SYMBOL_NAME(_rtld_global_ro)(GOT)
-#  else
-.Lhwcap:
-	.long	C_SYMBOL_NAME(_dl_hwcap)
-#  endif
-# endif
-#endif
-
 END (__longjmp)
diff --git a/sysdeps/arm/configure b/sysdeps/arm/configure
index 238b335f7d..45667cc522 100644
--- a/sysdeps/arm/configure
+++ b/sysdeps/arm/configure
@@ -150,8 +150,8 @@ else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #ifdef __ARM_PCS_VFP
-                      yes
-                     #endif
+		      yes
+		     #endif
 
 _ACEOF
 if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
@@ -211,6 +211,54 @@ else
 have-arm-tls-desc = no"
 fi
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether PC-relative relocs in movw/movt work properly" >&5
+$as_echo_n "checking whether PC-relative relocs in movw/movt work properly... " >&6; }
+if ${libc_cv_arm_pcrel_movw+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+
+cat > conftest.s <<\EOF
+	.syntax unified
+	.arm
+	.arch armv7-a
+
+	.text
+	.globl foo
+	.type foo,%function
+foo:	movw r0, #:lower16:symbol - 1f - 8
+	movt r0, #:upper16:symbol - 1f - 8
+1:	add r0, pc
+	@ And now a case with a local symbol.
+	movw r0, #:lower16:3f - 2f - 8
+	movt r0, #:upper16:3f - 2f - 8
+2:	add r0, pc
+	bx lr
+
+.data
+	.globl symbol
+	.hidden symbol
+symbol:	.long 23
+3:	.long 17
+EOF
+libc_cv_arm_pcrel_movw=no
+${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \
+	 -nostartfiles -nostdlib -shared \
+	 -o conftest.so conftest.s 1>&5 2>&5 &&
+LC_ALL=C $READELF -dr conftest.so > conftest.dr 2>&5 &&
+{
+  cat conftest.dr 1>&5
+  fgrep 'TEXTREL
+R_ARM_NONE' conftest.dr > /dev/null || libc_cv_arm_pcrel_movw=yes
+}
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arm_pcrel_movw" >&5
+$as_echo "$libc_cv_arm_pcrel_movw" >&6; }
+if test $libc_cv_arm_pcrel_movw = yes; then
+  $as_echo "#define ARM_PCREL_MOVW_OK 1" >>confdefs.h
+
+fi
+
 libc_cv_gcc_unwind_find_fde=no
 
 # Remove -fno-unwind-tables that was added in sysdeps/arm/preconfigure.ac.
diff --git a/sysdeps/arm/configure.ac b/sysdeps/arm/configure.ac
index 86c0c08317..002b8ef365 100644
--- a/sysdeps/arm/configure.ac
+++ b/sysdeps/arm/configure.ac
@@ -17,8 +17,8 @@ dnl it.  Until we do, don't define it.
 AC_CACHE_CHECK([whether the compiler is using the ARM hard-float ABI],
   [libc_cv_arm_pcs_vfp],
   [AC_EGREP_CPP(yes,[#ifdef __ARM_PCS_VFP
-                      yes
-                     #endif
+		      yes
+		     #endif
   ], libc_cv_arm_pcs_vfp=yes, libc_cv_arm_pcs_vfp=no)])
 if test $libc_cv_arm_pcs_vfp = yes; then
   AC_DEFINE(HAVE_ARM_PCS_VFP)
@@ -40,6 +40,46 @@ else
   LIBC_CONFIG_VAR([have-arm-tls-desc], [no])
 fi
 
+AC_CACHE_CHECK([whether PC-relative relocs in movw/movt work properly],
+	       libc_cv_arm_pcrel_movw, [
+cat > conftest.s <<\EOF
+	.syntax unified
+	.arm
+	.arch armv7-a
+
+	.text
+	.globl foo
+	.type foo,%function
+foo:	movw r0, #:lower16:symbol - 1f - 8
+	movt r0, #:upper16:symbol - 1f - 8
+1:	add r0, pc
+	@ And now a case with a local symbol.
+	movw r0, #:lower16:3f - 2f - 8
+	movt r0, #:upper16:3f - 2f - 8
+2:	add r0, pc
+	bx lr
+
+.data
+	.globl symbol
+	.hidden symbol
+symbol:	.long 23
+3:	.long 17
+EOF
+libc_cv_arm_pcrel_movw=no
+${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \
+	 -nostartfiles -nostdlib -shared \
+	 -o conftest.so conftest.s 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD &&
+LC_ALL=C $READELF -dr conftest.so > conftest.dr 2>&AS_MESSAGE_LOG_FD &&
+{
+  cat conftest.dr 1>&AS_MESSAGE_LOG_FD
+  fgrep 'TEXTREL
+R_ARM_NONE' conftest.dr > /dev/null || libc_cv_arm_pcrel_movw=yes
+}
+rm -f conftest*])
+if test $libc_cv_arm_pcrel_movw = yes; then
+  AC_DEFINE([ARM_PCREL_MOVW_OK])
+fi
+
 libc_cv_gcc_unwind_find_fde=no
 
 # Remove -fno-unwind-tables that was added in sysdeps/arm/preconfigure.ac.
diff --git a/sysdeps/arm/setjmp.S b/sysdeps/arm/setjmp.S
index 17a16c9b5d..6f54ab3b70 100644
--- a/sysdeps/arm/setjmp.S
+++ b/sysdeps/arm/setjmp.S
@@ -58,21 +58,15 @@ ENTRY (__sigsetjmp)
 #ifdef NEED_HWCAP
 	/* Check if we have a VFP unit.  */
 # ifdef IS_IN_rtld
-	ldr	a3, 1f
-	ldr	a4, .Lrtld_local_ro
-0:	add	a3, pc, a3
-	add	a3, a3, a4
-	ldr	a3, [a3, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
+	LDST_PCREL (ldr, a3, a4, \
+		    C_SYMBOL_NAME(_rtld_local_ro) \
+		    + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
 # else
 #  ifdef PIC
-	ldr	a3, 1f
-	ldr	a4, .Lrtld_global_ro
-0:	add	a3, pc, a3
-	ldr	a3, [a3, a4]
-	ldr	a3, [a3, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
+	LDR_GLOBAL (a3, a4, C_SYMBOL_NAME(_rtld_global_ro), \
+		    RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
 #  else
-	ldr	a3, .Lhwcap
-	ldr	a3, [a3, #0]
+	LDR_GLOBAL (a3, a4, C_SYMBOL_NAME(_dl_hwcap), 0)
 #  endif
 # endif
 #endif
@@ -114,23 +108,6 @@ ENTRY (__sigsetjmp)
 	/* Make a tail call to __sigjmp_save; it takes the same args.  */
 	B	PLTJMP(C_SYMBOL_NAME(__sigjmp_save))
 
-#ifdef NEED_HWCAP
-# ifdef IS_IN_rtld
-1:	.long	_GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
-.Lrtld_local_ro:
-	.long	C_SYMBOL_NAME(_rtld_local_ro)(GOTOFF)
-# else
-#  ifdef PIC
-1:	.long	_GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
-.Lrtld_global_ro:
-	.long	C_SYMBOL_NAME(_rtld_global_ro)(GOT)
-#  else
-.Lhwcap:
-	.long	C_SYMBOL_NAME(_dl_hwcap)
-#  endif
-# endif
-#endif
-
 END (__sigsetjmp)
 
 hidden_def (__sigsetjmp)
diff --git a/sysdeps/arm/sysdep.h b/sysdeps/arm/sysdep.h
index 4c41213c37..8614b4a058 100644
--- a/sysdeps/arm/sysdep.h
+++ b/sysdeps/arm/sysdep.h
@@ -21,6 +21,8 @@
 
 #ifndef __ASSEMBLER__
 # include <stdint.h>
+#else
+# include <arm-features.h>
 #endif
 
 /* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */
@@ -157,6 +159,32 @@
 	.arm
 # endif
 
+/* Load or store to/from address X + Y into/from R, (maybe) using T.
+   X or Y can use T freely; T can be R if OP is a load.  The first
+   version eschews the two-register addressing mode, while the
+   second version uses it.  */
+# define LDST_INDEXED_NOINDEX(OP, R, T, X, Y)		\
+	add	T, X, Y;				\
+	sfi_breg T,					\
+	OP	R, [T]
+# define LDST_INDEXED_INDEX(OP, R, X, Y)		\
+	OP	R, [X, Y]
+
+# ifdef ARM_NO_INDEX_REGISTER
+/* We're never using the two-register addressing mode, so this
+   always uses an intermediate add.  */
+#  define LDST_INDEXED(OP, R, T, X, Y)	LDST_INDEXED_NOINDEX (OP, R, T, X, Y)
+#  define LDST_PC_INDEXED(OP, R, T, X)	LDST_INDEXED_NOINDEX (OP, R, T, pc, X)
+# else
+/* The two-register addressing mode is OK, except on Thumb with pc.  */
+#  define LDST_INDEXED(OP, R, T, X, Y)	LDST_INDEXED_INDEX (OP, R, X, Y)
+#  ifdef __thumb2__
+#   define LDST_PC_INDEXED(OP, R, T, X)	LDST_INDEXED_NOINDEX (OP, R, T, pc, X)
+#  else
+#   define LDST_PC_INDEXED(OP, R, T, X)	LDST_INDEXED_INDEX (OP, R, pc, X)
+#  endif
+# endif
+
 /* Load or store to/from a pc-relative EXPR into/from R, using T.  */
 # ifdef __thumb2__
 #  define LDST_PCREL(OP, R, T, EXPR) \
@@ -166,6 +194,11 @@
 	.previous;					\
 99:	add	T, T, pc;				\
 	OP	R, [T]
+# elif defined (ARCH_HAS_T2) && ARM_PCREL_MOVW_OK
+#  define LDST_PCREL(OP, R, T, EXPR)			\
+	movw	T, #:lower16:EXPR - 99f - PC_OFS;	\
+	movt	T, #:upper16:EXPR - 99f - PC_OFS;	\
+99:	LDST_PC_INDEXED (OP, R, T, T)
 # else
 #  define LDST_PCREL(OP, R, T, EXPR) \
 	ldr	T, 98f;					\
@@ -175,17 +208,50 @@
 99:	OP	R, [pc, T]
 # endif
 
-/* Load or store to/from a global EXPR into/from R, using T.  */
-# define LDST_GLOBAL(OP, R, T, EXPR)			\
+/* Load from a global SYMBOL + CONSTANT into R, using T.  */
+# if defined (ARCH_HAS_T2) && !defined (PIC)
+#  define LDR_GLOBAL(R, T, SYMBOL, CONSTANT)				\
+	movw	T, #:lower16:SYMBOL;					\
+	movt	T, #:upper16:SYMBOL;					\
+	ldr	R, [T, $CONSTANT]
+# elif defined (ARCH_HAS_T2) && defined (PIC) && ARM_PCREL_MOVW_OK
+#  define LDR_GLOBAL(R, T, SYMBOL, CONSTANT)				\
+	movw	R, #:lower16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS;	\
+	movw	T, #:lower16:99f - 98f - PC_OFS;			\
+	movt	R, #:upper16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS;	\
+	movt	T, #:upper16:99f - 98f - PC_OFS;			\
+	.pushsection .rodata.cst4, "aM", %progbits, 4;			\
+	.balign 4;							\
+99:	.word	SYMBOL##(GOT);						\
+	.popsection;							\
+97:	add	R, R, pc;						\
+98:	LDST_PC_INDEXED (ldr, T, T, T);					\
+	LDST_INDEXED (ldr, R, T, R, T);					\
+	ldr	R, [R, $CONSTANT]
+# else
+#  define LDR_GLOBAL(R, T, SYMBOL, CONSTANT)		\
 	ldr	T, 99f;					\
 	ldr	R, 100f;				\
 98:	add	T, T, pc;				\
 	ldr	T, [T, R];				\
 	.subsection 2;					\
 99:	.word	_GLOBAL_OFFSET_TABLE_ - 98b - PC_OFS;	\
-100:	.word	EXPR##(GOT);				\
+100:	.word	SYMBOL##(GOT);				\
 	.previous;					\
-	OP	R, [T]
+	ldr	R, [T, $CONSTANT]
+# endif
+
+/* This is the same as LDR_GLOBAL, but for a SYMBOL that is known to
+   be in the same linked object (as for one with hidden visibility).
+   We can avoid the GOT indirection in the PIC case.  For the pure
+   static case, LDR_GLOBAL is already optimal.  */
+# ifdef PIC
+#  define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \
+  LDST_PCREL (ldr, R, T, SYMBOL + CONSTANT)
+# else
+#  define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \
+  LDR_GLOBAL (R, T, SYMBOL, CONSTANT)
+# endif
 
 /* Cope with negative memory offsets, which thumb can't encode.
    Use NEGOFF_ADJ_BASE to (conditionally) alter the base register,
@@ -296,7 +362,7 @@
      (!defined SHARED && (!defined NOT_IN_libc || defined IS_IN_libpthread)))
 # ifdef __ASSEMBLER__
 #  define PTR_MANGLE_LOAD(guard, tmp)					\
-  LDST_PCREL(ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local));
+  LDR_HIDDEN (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local), 0)
 #  define PTR_MANGLE(dst, src, guard, tmp)				\
   PTR_MANGLE_LOAD(guard, tmp);						\
   PTR_MANGLE2(dst, src, guard)
@@ -316,7 +382,7 @@ extern uintptr_t __pointer_chk_guard_local attribute_relro attribute_hidden;
 #else
 # ifdef __ASSEMBLER__
 #  define PTR_MANGLE_LOAD(guard, tmp)					\
-  LDST_GLOBAL(ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard));
+  LDR_GLOBAL (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard), 0);
 #  define PTR_MANGLE(dst, src, guard, tmp)				\
   PTR_MANGLE_LOAD(guard, tmp);						\
   PTR_MANGLE2(dst, src, guard)