about summary refs log tree commit diff
path: root/sysdeps/x86
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2023-01-26 08:26:18 -0800
committerH.J. Lu <hjl.tools@gmail.com>2023-02-22 18:28:37 -0800
commit317f1c0a8a71a862b1e600ff5386b08e02cf4b95 (patch)
treec380e1c78d06fbb479bb39c82e8dd28119eef13f /sysdeps/x86
parentbde121872001d8f3224eeafa5b7effb871c3fbca (diff)
downloadglibc-317f1c0a8a71a862b1e600ff5386b08e02cf4b95.tar.gz
glibc-317f1c0a8a71a862b1e600ff5386b08e02cf4b95.tar.xz
glibc-317f1c0a8a71a862b1e600ff5386b08e02cf4b95.zip
x86-64: Add glibc.cpu.prefer_map_32bit_exec [BZ #28656]
Crossing 2GB boundaries with indirect calls and jumps can use more
branch prediction resources on Intel Golden Cove CPU (see the
"Misprediction for Branches >2GB" section in Intel 64 and IA-32
Architectures Optimization Reference Manual.)  There is visible
performance improvement on workloads with many PLT calls when executable
and shared libraries are mmapped below 2GB.  Add the Prefer_MAP_32BIT_EXEC
bit so that mmap will try to map executable or denywrite pages in shared
libraries with MAP_32BIT first.

NB: Prefer_MAP_32BIT_EXEC reduces bits available for address space
layout randomization (ASLR), which is always disabled for SUID programs
and can only be enabled by the tunable, glibc.cpu.prefer_map_32bit_exec,
or the environment variable, LD_PREFER_MAP_32BIT_EXEC.  This works only
between shared libraries or between shared libraries and executables with
addresses below 2GB.  PIEs are usually loaded at a random address above
4GB by the kernel.
Diffstat (limited to 'sysdeps/x86')
-rw-r--r--sysdeps/x86/cpu-features.c15
-rw-r--r--sysdeps/x86/include/cpu-features-preferred_feature_index_1.def1
2 files changed, 16 insertions, 0 deletions
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index a2197ed211..822688e21f 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -27,6 +27,16 @@
 extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
   attribute_hidden;
 
+# ifdef __LP64__
+static void
+TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
+{
+  if (valp->numval)
+    GLRO(dl_x86_cpu_features).preferred[index_arch_Prefer_MAP_32BIT_EXEC]
+      |= bit_arch_Prefer_MAP_32BIT_EXEC;
+}
+# endif
+
 # if CET_ENABLED
 extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
   attribute_hidden;
@@ -705,6 +715,11 @@ no_cpuid:
 #if HAVE_TUNABLES
   TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
 
+# ifdef __LP64__
+  TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
+	       TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
+# endif
+
   bool disable_xsave_features = false;
 
   if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))
diff --git a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
index e45f9cb159..d20c5b3196 100644
--- a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
+++ b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
@@ -26,6 +26,7 @@ BIT (I586)
 BIT (I686)
 BIT (Slow_SSE4_2)
 BIT (AVX_Fast_Unaligned_Load)
+BIT (Prefer_MAP_32BIT_EXEC)
 BIT (Prefer_No_VZEROUPPER)
 BIT (Prefer_ERMS)
 BIT (Prefer_No_AVX512)