about summary refs log tree commit diff
path: root/sysdeps/unix/sysv
diff options
context:
space:
mode:
authorMatheus Castanho <msc@linux.ibm.com>2020-12-03 14:15:27 -0300
committerTulio Magno Quites Machado Filho <tuliom@linux.ibm.com>2020-12-30 18:26:25 -0300
commit68ab82f56690ada86ac1e0c46bad06ba189a10ef (patch)
treee76e999e3b9ccda3133e2dc09b53161a6dec8460 /sysdeps/unix/sysv
parent9835632cf43fd6d1f8b8f40a88892a45b6bfad6e (diff)
downloadglibc-68ab82f56690ada86ac1e0c46bad06ba189a10ef.tar.gz
glibc-68ab82f56690ada86ac1e0c46bad06ba189a10ef.tar.xz
glibc-68ab82f56690ada86ac1e0c46bad06ba189a10ef.zip
powerpc: Runtime selection between sc and scv for syscalls
Linux kernel v5.9 added support for system calls using the scv
instruction for POWER9 and later.  The new codepath provides better
performance (see below) if compared to using sc.  For the
foreseeable future, both sc and scv mechanisms will co-exist, so this
patch enables glibc to do a runtime check and use scv when it is
available.

Before issuing the system call to the kernel, we check hwcap2 in the TCB
for PPC_FEATURE2_SCV to see if scv is supported by the kernel.  If not,
we fallback to sc and keep the old behavior.

The kernel implements a different error return convention for scv, so
when returning from a system call we need to handle the return value
differently depending on the instruction we used to enter the kernel.

For syscalls implemented in ASM, entry and exit are implemented by
different macros (PSEUDO and PSEUDO_RET, resp.), which may be used in
sequence (e.g. for templated syscalls) or with other instructions in
between (e.g. clone).  To avoid accessing the TCB a second time on
PSEUDO_RET to check which instruction we used, the value read from
hwcap2 is cached on a non-volatile register.

This is not needed when using INTERNAL_SYSCALL macro, since entry and
exit are bundled into the same inline asm directive.

The dynamic loader may issue syscalls before the TCB has been setup
so it always uses sc with no extra checks.  For the static case, there
is no compile-time way to determine if we are inside startup code,
so we also check the value of the thread pointer before effectively
accessing the TCB.  For such situations in which the availability of
scv cannot be determined, sc is always used.

Support for scv in syscalls implemented in their own ASM file (clone and
vfork) will be added later. For now simply use sc as before.

Average performance over 1M calls for each syscall "type":
  - stat: C wrapper calling INTERNAL_SYSCALL
  - getpid: templated ASM syscall
  - syscall: call to gettid using syscall function

  Standard:
     stat : 1.573445 us / ~3619 cycles
   getpid : 0.164986 us / ~379 cycles
  syscall : 0.162743 us / ~374 cycles

  With scv:
     stat : 1.537049 us / ~3535 cycles <~ -84 cycles  / -2.32%
   getpid : 0.109923 us / ~253 cycles  <~ -126 cycles / -33.25%
  syscall : 0.116410 us / ~268 cycles  <~ -106 cycles / -28.34%

Tested on powerpc, powerpc64, powerpc64le (with and without scv)

Tested-by: Lucas A. M. Magalhães <lamm@linux.ibm.com>
Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
Diffstat (limited to 'sysdeps/unix/sysv')
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S9
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc64/vfork.S6
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/syscall.S11
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/sysdep.h93
4 files changed, 88 insertions, 31 deletions
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S
index b30641c805..fc496fa671 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S
@@ -68,7 +68,8 @@ ENTRY (__clone)
 	cfi_endproc
 
 	/* Do the call.  */
-	DO_CALL(SYS_ify(clone))
+	li 	r0,SYS_ify(clone)
+	DO_CALL_SC
 
 	/* Check for child process.  */
 	cmpdi	cr1,r3,0
@@ -82,7 +83,8 @@ ENTRY (__clone)
 	bctrl
 	ld	r2,FRAME_TOC_SAVE(r1)
 
-	DO_CALL(SYS_ify(exit))
+	li	r0,(SYS_ify(exit))
+	DO_CALL_SC
 	/* We won't ever get here but provide a nop so that the linker
 	   will insert a toc adjusting stub if necessary.  */
 	nop
@@ -104,7 +106,8 @@ L(parent):
 	cfi_restore(r30)
 	cfi_restore(r31)
 
-	PSEUDO_RET
+	RET_SC
+	TAIL_CALL_SYSCALL_ERROR
 
 END (__clone)
 
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/vfork.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/vfork.S
index 17199fb14a..a71f69e929 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/vfork.S
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/vfork.S
@@ -28,9 +28,11 @@
 ENTRY (__vfork)
 	CALL_MCOUNT 0
 
-	DO_CALL (SYS_ify (vfork))
+	li r0,SYS_ify (vfork)
+	DO_CALL_SC
 
-	PSEUDO_RET
+	RET_SC
+	TAIL_CALL_SYSCALL_ERROR
 
 PSEUDO_END (__vfork)
 libc_hidden_def (__vfork)
diff --git a/sysdeps/unix/sysv/linux/powerpc/syscall.S b/sysdeps/unix/sysv/linux/powerpc/syscall.S
index 48dade4642..9fc4ddd3cb 100644
--- a/sysdeps/unix/sysv/linux/powerpc/syscall.S
+++ b/sysdeps/unix/sysv/linux/powerpc/syscall.S
@@ -25,6 +25,13 @@ ENTRY (syscall)
 	mr   r6,r7
 	mr   r7,r8
 	mr   r8,r9
-	sc
-	PSEUDO_RET
+#if !IS_IN(rtld) && (defined(__PPC64__) || defined(__powerpc64__))
+	CHECK_SCV_SUPPORT r9 0f
+	DO_CALL_SCV
+	RET_SCV
+	b 1f
+#endif
+0:	DO_CALL_SC
+	RET_SC
+1:	TAIL_CALL_SYSCALL_ERROR
 PSEUDO_END (syscall)
diff --git a/sysdeps/unix/sysv/linux/powerpc/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/sysdep.h
index b2bca598b9..7f69804edc 100644
--- a/sysdeps/unix/sysv/linux/powerpc/sysdep.h
+++ b/sysdeps/unix/sysv/linux/powerpc/sysdep.h
@@ -64,39 +64,84 @@
 #define INTERNAL_VSYSCALL_CALL(funcptr, nr, args...)			\
   INTERNAL_VSYSCALL_CALL_TYPE(funcptr, long int, nr, args)
 
+#define DECLARE_REGS				\
+  register long int r0  __asm__ ("r0");		\
+  register long int r3  __asm__ ("r3");		\
+  register long int r4  __asm__ ("r4");		\
+  register long int r5  __asm__ ("r5");		\
+  register long int r6  __asm__ ("r6");		\
+  register long int r7  __asm__ ("r7");		\
+  register long int r8  __asm__ ("r8");
 
-#undef INTERNAL_SYSCALL
-#define INTERNAL_SYSCALL_NCS(name, nr, args...) \
-  ({									\
-    register long int r0  __asm__ ("r0");				\
-    register long int r3  __asm__ ("r3");				\
-    register long int r4  __asm__ ("r4");				\
-    register long int r5  __asm__ ("r5");				\
-    register long int r6  __asm__ ("r6");				\
-    register long int r7  __asm__ ("r7");				\
-    register long int r8  __asm__ ("r8");				\
-    LOADARGS_##nr (name, ##args);					\
-    __asm__ __volatile__						\
-      ("sc\n\t"								\
-       "mfcr  %0\n\t"							\
-       "0:"								\
-       : "=&r" (r0),							\
-         "=&r" (r3), "=&r" (r4), "=&r" (r5),				\
-         "=&r" (r6), "=&r" (r7), "=&r" (r8)				\
-       : ASM_INPUT_##nr							\
-       : "r9", "r10", "r11", "r12",					\
-         "cr0", "ctr", "memory");					\
-    r0 & (1 << 28) ? -r3 : r3;						\
+#define SYSCALL_SCV(nr)				\
+  ({						\
+    __asm__ __volatile__			\
+      ("scv 0\n\t"				\
+       "0:"					\
+       : "=&r" (r0),				\
+	 "=&r" (r3), "=&r" (r4), "=&r" (r5),	\
+	 "=&r" (r6), "=&r" (r7), "=&r" (r8)	\
+       : ASM_INPUT_##nr			\
+       : "r9", "r10", "r11", "r12",		\
+	 "lr", "ctr", "memory");		\
+    r3;					\
   })
-#define INTERNAL_SYSCALL(name, nr, args...)				\
-  INTERNAL_SYSCALL_NCS (__NR_##name, nr, args)
+
+#define SYSCALL_SC(nr)				\
+  ({						\
+    __asm__ __volatile__			\
+      ("sc\n\t"				\
+       "mfcr %0\n\t"				\
+       "0:"					\
+       : "=&r" (r0),				\
+	 "=&r" (r3), "=&r" (r4), "=&r" (r5),	\
+	 "=&r" (r6), "=&r" (r7), "=&r" (r8)	\
+       : ASM_INPUT_##nr			\
+       : "r9", "r10", "r11", "r12",		\
+	 "cr0", "ctr", "memory");		\
+    r0 & (1 << 28) ? -r3 : r3;			\
+  })
+
+/* This will only be non-empty for 64-bit systems, see below.  */
+#define TRY_SYSCALL_SCV(nr)
 
 #if defined(__PPC64__) || defined(__powerpc64__)
 # define SYSCALL_ARG_SIZE 8
+
+/* For the static case, unlike the dynamic loader, there is no compile-time way
+   to check if we are inside startup code.  So we need to check if the thread
+   pointer has already been setup before trying to access the TLS.  */
+# ifndef SHARED
+#  define CHECK_THREAD_POINTER (__thread_register != 0)
+# else
+#  define CHECK_THREAD_POINTER (1)
+# endif
+
+/* When inside the dynamic loader, the thread pointer may not have been
+   initialized yet, so don't check for scv support in that case.  */
+# if !IS_IN(rtld)
+#  undef TRY_SYSCALL_SCV
+#  define TRY_SYSCALL_SCV(nr)						\
+  CHECK_THREAD_POINTER && THREAD_GET_HWCAP() & PPC_FEATURE2_SCV ?	\
+      SYSCALL_SCV(nr) :
+# endif
+
 #else
 # define SYSCALL_ARG_SIZE 4
 #endif
 
+# define INTERNAL_SYSCALL_NCS(name, nr, args...)	\
+  ({							\
+    DECLARE_REGS;					\
+    LOADARGS_##nr (name, ##args);			\
+    TRY_SYSCALL_SCV(nr)					\
+    SYSCALL_SC(nr);					\
+  })
+
+#undef INTERNAL_SYSCALL
+#define INTERNAL_SYSCALL(name, nr, args...)				\
+  INTERNAL_SYSCALL_NCS (__NR_##name, nr, args)
+
 #define LOADARGS_0(name, dummy) \
 	r0 = name
 #define LOADARGS_1(name, __arg1) \