about summary refs log tree commit diff
path: root/sysdeps/arm/sysdep.h
diff options
context:
space:
mode:
authorRoland McGrath <roland@hack.frob.com>2014-10-22 14:20:35 -0700
committerRoland McGrath <roland@hack.frob.com>2014-10-22 14:20:35 -0700
commit8c2b1ed8bbd20d35314c2a602b903159fa567ffb (patch)
tree4a9e06786fdb32c5558cdae218af3b4ff0b78af6 /sysdeps/arm/sysdep.h
parentb5af9297d51a43f96c5be1bafab032184690dd6f (diff)
downloadglibc-8c2b1ed8bbd20d35314c2a602b903159fa567ffb.tar.gz
glibc-8c2b1ed8bbd20d35314c2a602b903159fa567ffb.tar.xz
glibc-8c2b1ed8bbd20d35314c2a602b903159fa567ffb.zip
ARM: Use movw/movt more when available
Diffstat (limited to 'sysdeps/arm/sysdep.h')
-rw-r--r--sysdeps/arm/sysdep.h78
1 files changed, 72 insertions, 6 deletions
diff --git a/sysdeps/arm/sysdep.h b/sysdeps/arm/sysdep.h
index 4c41213c37..8614b4a058 100644
--- a/sysdeps/arm/sysdep.h
+++ b/sysdeps/arm/sysdep.h
@@ -21,6 +21,8 @@
 
 #ifndef __ASSEMBLER__
 # include <stdint.h>
+#else
+# include <arm-features.h>
 #endif
 
 /* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */
@@ -157,6 +159,32 @@
 	.arm
 # endif
 
+/* Load or store to/from address X + Y into/from R, (maybe) using T.
+   X or Y can use T freely; T can be R if OP is a load.  The first
+   version eschews the two-register addressing mode, while the
+   second version uses it.  */
+# define LDST_INDEXED_NOINDEX(OP, R, T, X, Y)		\
+	add	T, X, Y;				\
+	sfi_breg T,					\
+	OP	R, [T]
+# define LDST_INDEXED_INDEX(OP, R, X, Y)		\
+	OP	R, [X, Y]
+
+# ifdef ARM_NO_INDEX_REGISTER
+/* We're never using the two-register addressing mode, so this
+   always uses an intermediate add.  */
+#  define LDST_INDEXED(OP, R, T, X, Y)	LDST_INDEXED_NOINDEX (OP, R, T, X, Y)
+#  define LDST_PC_INDEXED(OP, R, T, X)	LDST_INDEXED_NOINDEX (OP, R, T, pc, X)
+# else
+/* The two-register addressing mode is OK, except on Thumb with pc.  */
+#  define LDST_INDEXED(OP, R, T, X, Y)	LDST_INDEXED_INDEX (OP, R, X, Y)
+#  ifdef __thumb2__
+#   define LDST_PC_INDEXED(OP, R, T, X)	LDST_INDEXED_NOINDEX (OP, R, T, pc, X)
+#  else
+#   define LDST_PC_INDEXED(OP, R, T, X)	LDST_INDEXED_INDEX (OP, R, pc, X)
+#  endif
+# endif
+
 /* Load or store to/from a pc-relative EXPR into/from R, using T.  */
 # ifdef __thumb2__
 #  define LDST_PCREL(OP, R, T, EXPR) \
@@ -166,6 +194,11 @@
 	.previous;					\
 99:	add	T, T, pc;				\
 	OP	R, [T]
+# elif defined (ARCH_HAS_T2) && ARM_PCREL_MOVW_OK
+#  define LDST_PCREL(OP, R, T, EXPR)			\
+	movw	T, #:lower16:EXPR - 99f - PC_OFS;	\
+	movt	T, #:upper16:EXPR - 99f - PC_OFS;	\
+99:	LDST_PC_INDEXED (OP, R, T, T)
 # else
 #  define LDST_PCREL(OP, R, T, EXPR) \
 	ldr	T, 98f;					\
@@ -175,17 +208,50 @@
 99:	OP	R, [pc, T]
 # endif
 
-/* Load or store to/from a global EXPR into/from R, using T.  */
-# define LDST_GLOBAL(OP, R, T, EXPR)			\
+/* Load from a global SYMBOL + CONSTANT into R, using T.  */
+# if defined (ARCH_HAS_T2) && !defined (PIC)
+#  define LDR_GLOBAL(R, T, SYMBOL, CONSTANT)				\
+	movw	T, #:lower16:SYMBOL;					\
+	movt	T, #:upper16:SYMBOL;					\
+	ldr	R, [T, $CONSTANT]
+# elif defined (ARCH_HAS_T2) && defined (PIC) && ARM_PCREL_MOVW_OK
+#  define LDR_GLOBAL(R, T, SYMBOL, CONSTANT)				\
+	movw	R, #:lower16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS;	\
+	movw	T, #:lower16:99f - 98f - PC_OFS;			\
+	movt	R, #:upper16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS;	\
+	movt	T, #:upper16:99f - 98f - PC_OFS;			\
+	.pushsection .rodata.cst4, "aM", %progbits, 4;			\
+	.balign 4;							\
+99:	.word	SYMBOL##(GOT);						\
+	.popsection;							\
+97:	add	R, R, pc;						\
+98:	LDST_PC_INDEXED (ldr, T, T, T);					\
+	LDST_INDEXED (ldr, R, T, R, T);					\
+	ldr	R, [R, $CONSTANT]
+# else
+#  define LDR_GLOBAL(R, T, SYMBOL, CONSTANT)		\
 	ldr	T, 99f;					\
 	ldr	R, 100f;				\
 98:	add	T, T, pc;				\
 	ldr	T, [T, R];				\
 	.subsection 2;					\
 99:	.word	_GLOBAL_OFFSET_TABLE_ - 98b - PC_OFS;	\
-100:	.word	EXPR##(GOT);				\
+100:	.word	SYMBOL##(GOT);				\
 	.previous;					\
-	OP	R, [T]
+	ldr	R, [T, $CONSTANT]
+# endif
+
+/* This is the same as LDR_GLOBAL, but for a SYMBOL that is known to
+   be in the same linked object (as for one with hidden visibility).
+   We can avoid the GOT indirection in the PIC case.  For the pure
+   static case, LDR_GLOBAL is already optimal.  */
+# ifdef PIC
+#  define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \
+  LDST_PCREL (ldr, R, T, SYMBOL + CONSTANT)
+# else
+#  define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \
+  LDR_GLOBAL (R, T, SYMBOL, CONSTANT)
+# endif
 
 /* Cope with negative memory offsets, which thumb can't encode.
    Use NEGOFF_ADJ_BASE to (conditionally) alter the base register,
@@ -296,7 +362,7 @@
      (!defined SHARED && (!defined NOT_IN_libc || defined IS_IN_libpthread)))
 # ifdef __ASSEMBLER__
 #  define PTR_MANGLE_LOAD(guard, tmp)					\
-  LDST_PCREL(ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local));
+  LDR_HIDDEN (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local), 0)
 #  define PTR_MANGLE(dst, src, guard, tmp)				\
   PTR_MANGLE_LOAD(guard, tmp);						\
   PTR_MANGLE2(dst, src, guard)
@@ -316,7 +382,7 @@ extern uintptr_t __pointer_chk_guard_local attribute_relro attribute_hidden;
 #else
 # ifdef __ASSEMBLER__
 #  define PTR_MANGLE_LOAD(guard, tmp)					\
-  LDST_GLOBAL(ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard));
+  LDR_GLOBAL (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard), 0);
 #  define PTR_MANGLE(dst, src, guard, tmp)				\
   PTR_MANGLE_LOAD(guard, tmp);						\
   PTR_MANGLE2(dst, src, guard)