diff options
Diffstat (limited to 'sysdeps/unix/sysv')
44 files changed, 685 insertions, 109 deletions
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index 7df51a325c..527c7a5ae8 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -230,6 +230,7 @@ tests += \ tst-scm_rights \ tst-sigtimedwait \ tst-sync_file_range \ + tst-syscall-restart \ tst-sysconf-iov_max \ tst-sysvmsg-linux \ tst-sysvsem-linux \ diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h index bbbe35723c..974b503b2f 100644 --- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h +++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h @@ -164,6 +164,7 @@ # define HAVE_CLOCK_GETRES64_VSYSCALL "__kernel_clock_getres" # define HAVE_CLOCK_GETTIME64_VSYSCALL "__kernel_clock_gettime" # define HAVE_GETTIMEOFDAY_VSYSCALL "__kernel_gettimeofday" +# define HAVE_GETRANDOM_VSYSCALL "__kernel_getrandom" # define HAVE_CLONE3_WRAPPER 1 diff --git a/sysdeps/unix/sysv/linux/arm/bits/hwcap.h b/sysdeps/unix/sysv/linux/arm/bits/hwcap.h index b104de473c..7b573facb9 100644 --- a/sysdeps/unix/sysv/linux/arm/bits/hwcap.h +++ b/sysdeps/unix/sysv/linux/arm/bits/hwcap.h @@ -49,3 +49,11 @@ #define HWCAP_ASIMDFHM 33554432 #define HWCAP_ASIMDBF16 67108864 #define HWCAP_I8MM 134217728 + +#define HWCAP2_AES 1 +#define HWCAP2_PMULL 2 +#define HWCAP2_SHA1 4 +#define HWCAP2_SHA2 8 +#define HWCAP2_CRC32 16 +#define HWCAP2_SB 32 +#define HWCAP2_SSBS 64 diff --git a/sysdeps/unix/sysv/linux/bits/mman-linux.h b/sysdeps/unix/sysv/linux/bits/mman-linux.h index 522333c50a..161a885096 100644 --- a/sysdeps/unix/sysv/linux/bits/mman-linux.h +++ b/sysdeps/unix/sysv/linux/bits/mman-linux.h @@ -43,6 +43,7 @@ #define MAP_PRIVATE 0x02 /* Changes are private. */ #define MAP_SHARED_VALIDATE 0x03 /* Share changes and validate extension flags. */ +#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */ #define MAP_TYPE 0x0f /* Mask for type of mapping. */ /* Other flags. */ diff --git a/sysdeps/unix/sysv/linux/bits/mman-shared.h b/sysdeps/unix/sysv/linux/bits/mman-shared.h index d8ed4436b6..7a9874204d 100644 --- a/sysdeps/unix/sysv/linux/bits/mman-shared.h +++ b/sysdeps/unix/sysv/linux/bits/mman-shared.h @@ -42,7 +42,7 @@ # define MLOCK_ONFAULT 1U # endif -/* Access rights for pkey_alloc. */ +/* Access restrictions for pkey_alloc. */ # ifndef PKEY_DISABLE_ACCESS # define PKEY_DISABLE_ACCESS 0x1 # define PKEY_DISABLE_WRITE 0x2 @@ -59,16 +59,16 @@ int memfd_create (const char *__name, unsigned int __flags) __THROW; int mlock2 (const void *__addr, size_t __length, unsigned int __flags) __THROW; /* Allocate a new protection key, with the PKEY_DISABLE_* bits - specified in ACCESS_RIGHTS. The protection key mask for the + specified in ACCESS_RESTRICTIONS. The protection key mask for the current thread is updated to match the access privilege for the new key. */ -int pkey_alloc (unsigned int __flags, unsigned int __access_rights) __THROW; +int pkey_alloc (unsigned int __flags, unsigned int __access_restrictions) __THROW; -/* Update the access rights for the current thread for KEY, which must +/* Update the access restrictions for the current thread for KEY, which must have been allocated using pkey_alloc. */ -int pkey_set (int __key, unsigned int __access_rights) __THROW; +int pkey_set (int __key, unsigned int __access_restrictions) __THROW; -/* Return the access rights for the current thread for KEY, which must +/* Return the access restrictions for the current thread for KEY, which must have been allocated using pkey_alloc. */ int pkey_get (int __key) __THROW; diff --git a/sysdeps/unix/sysv/linux/bits/sched.h b/sysdeps/unix/sysv/linux/bits/sched.h index a02cb69de7..f03f5f5ab3 100644 --- a/sysdeps/unix/sysv/linux/bits/sched.h +++ b/sysdeps/unix/sysv/linux/bits/sched.h @@ -29,7 +29,7 @@ #define SCHED_FIFO 1 #define SCHED_RR 2 #ifdef __USE_GNU -# define SCHED_NORMAL SCHED_OTHER +# define SCHED_NORMAL 0 # define SCHED_BATCH 3 # define SCHED_ISO 4 # define SCHED_IDLE 5 @@ -48,8 +48,10 @@ #define SCHED_FLAG_UTIL_CLAMP_MAX 0x40 /* Combinations of sched_flags fields. */ -#define SCHED_FLAG_KEEP_ALL 0x18 -#define SCHED_FLAG_UTIL_CLAMP 0x60 +#define SCHED_FLAG_KEEP_ALL \ + (SCHED_FLAG_KEEP_POLICY | SCHED_FLAG_KEEP_PARAMS) +#define SCHED_FLAG_UTIL_CLAMP \ + (SCHED_FLAG_UTIL_CLAMP_MIN | SCHED_FLAG_UTIL_CLAMP_MAX) /* Use "" to work around incorrect macro expansion of the __has_include argument (GCC PR 80005). */ diff --git a/sysdeps/unix/sysv/linux/bits/uio-ext.h b/sysdeps/unix/sysv/linux/bits/uio-ext.h index ead7a09156..85ed21bac5 100644 --- a/sysdeps/unix/sysv/linux/bits/uio-ext.h +++ b/sysdeps/unix/sysv/linux/bits/uio-ext.h @@ -48,6 +48,8 @@ extern ssize_t process_vm_writev (pid_t __pid, const struct iovec *__lvec, #define RWF_NOWAIT 0x00000008 /* per-IO nonblocking mode. */ #define RWF_APPEND 0x00000010 /* per-IO O_APPEND. */ #define RWF_NOAPPEND 0x00000020 /* per-IO negation of O_APPEND */ +#define RWF_ATOMIC 0x00000040 /* Write is to be issued with torn-write + prevention. */ __END_DECLS diff --git a/sysdeps/unix/sysv/linux/configure b/sysdeps/unix/sysv/linux/configure index 4d642e953b..8cda8e9451 100644..100755 --- a/sysdeps/unix/sysv/linux/configure +++ b/sysdeps/unix/sysv/linux/configure @@ -1,3 +1,4 @@ +ac_default_prefix=/usr # This file is generated from configure.ac by Autoconf. DO NOT EDIT! # Local configure fragment for sysdeps/unix/sysv/linux. @@ -116,7 +117,9 @@ if test -n "$sysheaders"; then CPPFLAGS=$OLD_CPPFLAGS fi -if test "$prefix" = "/usr/local" -o "$prefix" = "/usr/local/" -o "$prefix" = "NONE"; then + + +if test "$prefix" = "/usr/local" -o "$prefix" = "/usr/local/"; then if test $enable_sanity = yes; then echo "\ *** On GNU/Linux systems the GNU C Library should not be installed into diff --git a/sysdeps/unix/sysv/linux/configure.ac b/sysdeps/unix/sysv/linux/configure.ac index c177e082a9..4c43cda877 100644 --- a/sysdeps/unix/sysv/linux/configure.ac +++ b/sysdeps/unix/sysv/linux/configure.ac @@ -71,7 +71,9 @@ if test -n "$sysheaders"; then CPPFLAGS=$OLD_CPPFLAGS fi -if test "$prefix" = "/usr/local" -o "$prefix" = "/usr/local/" -o "$prefix" = "NONE"; then +AC_PREFIX_DEFAULT([/usr]) + +if test "$prefix" = "/usr/local" -o "$prefix" = "/usr/local/"; then if test $enable_sanity = yes; then echo "\ *** On GNU/Linux systems the GNU C Library should not be installed into diff --git a/sysdeps/unix/sysv/linux/dl-vdso-setup.c b/sysdeps/unix/sysv/linux/dl-vdso-setup.c index 3a44944dbb..476c6db75a 100644 --- a/sysdeps/unix/sysv/linux/dl-vdso-setup.c +++ b/sysdeps/unix/sysv/linux/dl-vdso-setup.c @@ -66,6 +66,11 @@ PROCINFO_CLASS int (*_dl_vdso_clock_getres) (clockid_t, PROCINFO_CLASS int (*_dl_vdso_clock_getres_time64) (clockid_t, struct __timespec64 *) RELRO; # endif +# ifdef HAVE_GETRANDOM_VSYSCALL +PROCINFO_CLASS ssize_t (*_dl_vdso_getrandom) (void *buffer, size_t len, + unsigned int flags, void *state, + size_t state_len) RELRO; +# endif /* PowerPC specific ones. */ # ifdef HAVE_GET_TBFREQ diff --git a/sysdeps/unix/sysv/linux/dl-vdso-setup.h b/sysdeps/unix/sysv/linux/dl-vdso-setup.h index 8aee5a8212..cde99f608c 100644 --- a/sysdeps/unix/sysv/linux/dl-vdso-setup.h +++ b/sysdeps/unix/sysv/linux/dl-vdso-setup.h @@ -50,6 +50,9 @@ setup_vdso_pointers (void) #ifdef HAVE_RISCV_HWPROBE GLRO(dl_vdso_riscv_hwprobe) = dl_vdso_vsym (HAVE_RISCV_HWPROBE); #endif +#ifdef HAVE_GETRANDOM_VSYSCALL + GLRO(dl_vdso_getrandom) = dl_vdso_vsym (HAVE_GETRANDOM_VSYSCALL); +#endif } #endif diff --git a/sysdeps/unix/sysv/linux/getrandom-internal.h b/sysdeps/unix/sysv/linux/getrandom-internal.h new file mode 100644 index 0000000000..37e6c9bc15 --- /dev/null +++ b/sysdeps/unix/sysv/linux/getrandom-internal.h @@ -0,0 +1,29 @@ +/* Internal definitions for Linux getrandom implementation. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _GETRANDOM_INTERNAL_H +#define _GETRANDOM_INTERNAL_H + +#include <pthreadP.h> + +extern void __getrandom_early_init (_Bool) attribute_hidden; + +extern void __getrandom_fork_subprocess (void) attribute_hidden; +extern void __getrandom_vdso_release (struct pthread *curp) attribute_hidden; +extern void __getrandom_reset_state (struct pthread *curp) attribute_hidden; +#endif diff --git a/sysdeps/unix/sysv/linux/getrandom.c b/sysdeps/unix/sysv/linux/getrandom.c index 777d1decf0..c8c578263d 100644 --- a/sysdeps/unix/sysv/linux/getrandom.c +++ b/sysdeps/unix/sysv/linux/getrandom.c @@ -21,12 +21,314 @@ #include <unistd.h> #include <sysdep-cancel.h> +static inline ssize_t +getrandom_syscall (void *buffer, size_t length, unsigned int flags, + bool cancel) +{ + return cancel + ? SYSCALL_CANCEL (getrandom, buffer, length, flags) + : INLINE_SYSCALL_CALL (getrandom, buffer, length, flags); +} + +#ifdef HAVE_GETRANDOM_VSYSCALL +# include <assert.h> +# include <ldsodefs.h> +# include <libc-lock.h> +# include <list.h> +# include <setvmaname.h> +# include <sys/mman.h> +# include <sys/sysinfo.h> +# include <tls-internal.h> + +/* These values will be initialized at loading time by calling the + _dl_vdso_getrandom with a special value. The 'state_size' is the opaque + state size per-thread allocated with a mmap using 'mmap_prot' and + 'mmap_flags' argument. */ +static uint32_t state_size; +static uint32_t state_size_cache_aligned; +static uint32_t mmap_prot; +static uint32_t mmap_flags; + +/* The function below are used on reentracy handling with (i.e. SA_NODEFER). + Before allocating a new state or issue the vDSO, atomically read the + current thread buffer, and if this is already reserved (is_reserved_ptr) + fallback to the syscall. Otherwise, reserve the buffer by atomically + setting the LSB of the opaque state pointer. The bit is cleared after the + vDSO is called, or before issuing the fallback syscall. */ + +static inline void *reserve_ptr (void *p) +{ + return (void *) ((uintptr_t) (p) | 1UL); +} + +static inline void *release_ptr (void *p) +{ + return (void *) ((uintptr_t) (p) & ~1UL); +} + +static inline bool is_reserved_ptr (void *p) +{ + return (uintptr_t) (p) & 1UL; +} + +static struct +{ + __libc_lock_define (, lock); + + void **states; /* Queue of opaque states allocated with the kernel + provided flags and used on getrandom vDSO call. */ + size_t len; /* Number of available free states in the queue. */ + size_t total; /* Number of states allocated from the kernel. */ + size_t cap; /* Total number of states that 'states' can hold before + needed to be resized. */ +} grnd_alloc = { + .lock = LLL_LOCK_INITIALIZER +}; + +static bool +vgetrandom_get_state_alloc (void) +{ + /* Start by allocating one page for the opaque states. */ + size_t block_size = ALIGN_UP (state_size_cache_aligned, GLRO(dl_pagesize)); + size_t states_per_page = GLRO (dl_pagesize) / state_size_cache_aligned; + void *block = __mmap (NULL, GLRO(dl_pagesize), mmap_prot, mmap_flags, -1, 0); + if (block == MAP_FAILED) + return false; + __set_vma_name (block, block_size, " glibc: getrandom"); + + if (grnd_alloc.total + states_per_page > grnd_alloc.cap) + { + /* Use a new mmap instead of trying to mremap. It avoids a + potential multithread fork issue where fork is called just after + mremap returns but before assigning to the grnd_alloc.states, + thus making the its value invalid in the child. */ + void *old_states = grnd_alloc.states; + size_t new_states_size = ALIGN_UP ((grnd_alloc.total + states_per_page) + * sizeof (*grnd_alloc.states), + GLRO(dl_pagesize)); + + /* There is no need to memcpy any opaque state information because + all the allocated opaque states are assigned to running threads + (meaning that if we iterate over them we can reconstruct the state + list). */ + void **states = __mmap (NULL, new_states_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (states == MAP_FAILED) + { + __munmap (block, block_size); + return false; + } + + /* Atomically replace the old state, so if a fork happens the child + process will see a consistent free state buffer. The size might + not be updated, but it does not really matter since the buffer is + always increased. */ + grnd_alloc.states = states; + atomic_thread_fence_seq_cst (); + if (old_states != NULL) + __munmap (old_states, grnd_alloc.cap * sizeof (*grnd_alloc.states)); + + __set_vma_name (states, new_states_size, " glibc: getrandom states"); + grnd_alloc.cap = new_states_size / sizeof (*grnd_alloc.states); + atomic_thread_fence_seq_cst (); + } + + for (size_t i = 0; i < states_per_page; ++i) + { + /* There is no need to handle states that straddle a page because + we allocate only one page. */ + grnd_alloc.states[i] = block; + block += state_size_cache_aligned; + } + /* Concurrent fork should not observe the previous pointer value. */ + grnd_alloc.len = states_per_page; + grnd_alloc.total += states_per_page; + atomic_thread_fence_seq_cst (); + + return true; +} + +/* Allocate an opaque state for vgetrandom. If the grnd_alloc does not have + any, mmap() another page of them using the vgetrandom parameters. */ +static void * +vgetrandom_get_state (void) +{ + void *state = NULL; + + /* The signal blocking avoid the potential issue where _Fork() (which is + async-signal-safe) is called with the lock taken. The function is + called only once during thread lifetime, so the overhead should be + minimal. */ + internal_sigset_t set; + internal_signal_block_all (&set); + __libc_lock_lock (grnd_alloc.lock); + + if (grnd_alloc.len > 0 || vgetrandom_get_state_alloc ()) + state = grnd_alloc.states[--grnd_alloc.len]; + + __libc_lock_unlock (grnd_alloc.lock); + internal_signal_restore_set (&set); + + return state; +} + +/* Returns true when vgetrandom is used successfully. Returns false if the + syscall fallback should be issued in the case the vDSO is not present, in + the case of reentrancy, or if any memory allocation fails. */ +static ssize_t +getrandom_vdso (void *buffer, size_t length, unsigned int flags, bool cancel) +{ + if (__glibc_unlikely (state_size == 0)) + return getrandom_syscall (buffer, length, flags, cancel); + + struct pthread *self = THREAD_SELF; + + void *state = atomic_load_relaxed (&self->getrandom_buf); + if (is_reserved_ptr (state)) + return getrandom_syscall (buffer, length, flags, cancel); + atomic_store_relaxed (&self->getrandom_buf, reserve_ptr (state)); + __atomic_signal_fence (__ATOMIC_ACQ_REL); + + bool r = false; + if (state == NULL) + { + state = vgetrandom_get_state (); + if (state == NULL) + goto out; + } + + /* Since the vDSO implementation does not issue the syscall with the + cancellation bridge (__syscall_cancel_arch), use GRND_NONBLOCK so there + is no potential unbounded blocking in the kernel. It should be a rare + situation, only at system startup when RNG is not initialized. */ + ssize_t ret = GLRO (dl_vdso_getrandom) (buffer, + length, + flags | GRND_NONBLOCK, + state, + state_size); + if (INTERNAL_SYSCALL_ERROR_P (ret)) + { + /* Fallback to the syscall if the kernel would block. */ + int err = INTERNAL_SYSCALL_ERRNO (ret); + if (err == EAGAIN && !(flags & GRND_NONBLOCK)) + goto out; + + __set_errno (err); + ret = -1; + } + r = true; + +out: + __atomic_signal_fence (__ATOMIC_ACQ_REL); + atomic_store_relaxed (&self->getrandom_buf, state); + return r ? ret : getrandom_syscall (buffer, length, flags, cancel); +} +#endif + +void +__getrandom_early_init (_Bool initial) +{ +#ifdef HAVE_GETRANDOM_VSYSCALL + /* libcs loaded for audit modules, dlmopen, etc. fallback to syscall. */ + if (initial && (GLRO (dl_vdso_getrandom) != NULL)) + { + /* Used to query the vDSO for the required mmap flags and the opaque + per-thread state size. Defined by linux/random.h. */ + struct vgetrandom_opaque_params + { + uint32_t size_of_opaque_state; + uint32_t mmap_prot; + uint32_t mmap_flags; + uint32_t reserved[13]; + } params; + if (GLRO(dl_vdso_getrandom) (NULL, 0, 0, ¶ms, ~0UL) == 0) + { + /* Align each opaque state to L1 data cache size to avoid false + sharing. If the size can not be obtained, use the kernel + provided one. */ + state_size = params.size_of_opaque_state; + + long int ld1sz = __sysconf (_SC_LEVEL1_DCACHE_LINESIZE); + if (ld1sz <= 0) + ld1sz = 1; + state_size_cache_aligned = ALIGN_UP (state_size, ld1sz); + /* Do not enable vDSO if the required opaque state size is larger + than a page because we only allocate one page per time to hold + the states. */ + if (state_size_cache_aligned > GLRO(dl_pagesize)) + { + state_size = 0; + return; + } + mmap_prot = params.mmap_prot; + mmap_flags = params.mmap_flags; + } + } +#endif +} + +/* Re-add the state state from CURP on the free list. This function is + called after fork returns in the child, so no locking is required. */ +void +__getrandom_reset_state (struct pthread *curp) +{ +#ifdef HAVE_GETRANDOM_VSYSCALL + if (grnd_alloc.states == NULL || curp->getrandom_buf == NULL) + return; + assert (grnd_alloc.len < grnd_alloc.cap); + grnd_alloc.states[grnd_alloc.len++] = release_ptr (curp->getrandom_buf); + curp->getrandom_buf = NULL; +#endif +} + +/* Called when a thread terminates, and adds its random buffer back into the + allocator pool for use in a future thread. This is called by + pthread_create during thread termination, and after signal has been + blocked. */ +void +__getrandom_vdso_release (struct pthread *curp) +{ +#ifdef HAVE_GETRANDOM_VSYSCALL + if (curp->getrandom_buf == NULL) + return; + + __libc_lock_lock (grnd_alloc.lock); + grnd_alloc.states[grnd_alloc.len++] = curp->getrandom_buf; + __libc_lock_unlock (grnd_alloc.lock); +#endif +} + +/* Reset the internal lock state in case another thread has locked while + this thread calls fork. The stale thread states will be handled by + reclaim_stacks which calls __getrandom_reset_state on each thread. */ +void +__getrandom_fork_subprocess (void) +{ +#ifdef HAVE_GETRANDOM_VSYSCALL + grnd_alloc.lock = LLL_LOCK_INITIALIZER; +#endif +} + +ssize_t +__getrandom_nocancel (void *buffer, size_t length, unsigned int flags) +{ +#ifdef HAVE_GETRANDOM_VSYSCALL + return getrandom_vdso (buffer, length, flags, false); +#else + return getrandom_syscall (buffer, length, flags, false); +#endif +} + /* Write up to LENGTH bytes of randomness starting at BUFFER. Return the number of bytes written, or -1 on error. */ ssize_t __getrandom (void *buffer, size_t length, unsigned int flags) { - return SYSCALL_CANCEL (getrandom, buffer, length, flags); +#ifdef HAVE_GETRANDOM_VSYSCALL + return getrandom_vdso (buffer, length, flags, true); +#else + return getrandom_syscall (buffer, length, flags, true); +#endif } libc_hidden_def (__getrandom) weak_alias (__getrandom, getrandom) diff --git a/sysdeps/unix/sysv/linux/gettimeofday.c b/sysdeps/unix/sysv/linux/gettimeofday.c index 7ab147c614..5ca8916611 100644 --- a/sysdeps/unix/sysv/linux/gettimeofday.c +++ b/sysdeps/unix/sysv/linux/gettimeofday.c @@ -37,11 +37,11 @@ __gettimeofday_syscall (struct timeval *restrict tv, void *restrict tz) } # undef INIT_ARCH -# define INIT_ARCH() \ - void *vdso_gettimeofday = dl_vdso_vsym (HAVE_GETTIMEOFDAY_VSYSCALL) +# define INIT_ARCH() libc_ifunc (__gettimeofday, - vdso_gettimeofday ? VDSO_IFUNC_RET (vdso_gettimeofday) - : (void *) __gettimeofday_syscall) + GLRO(dl_vdso_gettimeofday) != NULL + ? VDSO_IFUNC_RET (GLRO(dl_vdso_gettimeofday)) + : (void *) __gettimeofday_syscall) # else int diff --git a/sysdeps/unix/sysv/linux/internal-signals.h b/sysdeps/unix/sysv/linux/internal-signals.h index a6fae59aaa..6e3a3d7692 100644 --- a/sysdeps/unix/sysv/linux/internal-signals.h +++ b/sysdeps/unix/sysv/linux/internal-signals.h @@ -90,6 +90,15 @@ internal_signal_restore_set (const internal_sigset_t *set) __NSIG_BYTES); } +static inline void +internal_signal_unblock_signal (int sig) +{ + internal_sigset_t set; + internal_sigemptyset (&set); + internal_sigaddset (&set, sig); + INTERNAL_SYSCALL_CALL (rt_sigprocmask, SIG_UNBLOCK, &set, NULL, + __NSIG_BYTES); +} /* It is used on timer_create code directly on sigwaitinfo call, so it can not use the internal_sigset_t definitions. */ diff --git a/sysdeps/unix/sysv/linux/internal-sigset.h b/sysdeps/unix/sysv/linux/internal-sigset.h index 5d7020b42d..4b19affd75 100644 --- a/sysdeps/unix/sysv/linux/internal-sigset.h +++ b/sysdeps/unix/sysv/linux/internal-sigset.h @@ -21,7 +21,7 @@ #include <sigsetops.h> -typedef struct +typedef struct _internal_sigset_t { unsigned long int __val[__NSIG_WORDS]; } internal_sigset_t; diff --git a/sysdeps/unix/sysv/linux/loongarch/bits/rseq.h b/sysdeps/unix/sysv/linux/loongarch/bits/rseq.h new file mode 100644 index 0000000000..7468a05243 --- /dev/null +++ b/sysdeps/unix/sysv/linux/loongarch/bits/rseq.h @@ -0,0 +1,36 @@ +/* Restartable Sequences Linux LoongArch architecture header. + Copyright (C) 2024 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYS_RSEQ_H +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead." +#endif + +/* RSEQ_SIG is a signature required before each abort handler code. + + It is a 32-bit value that maps to actual architecture code compiled + into applications and libraries. It needs to be defined for each + architecture. When choosing this value, it needs to be taken into + account that generating invalid instructions may have ill effects on + tools like objdump, and may also have impact on the CPU speculative + execution efficiency in some cases. + + RSEQ_SIG uses the following break instruction: + + 0x002a0010 break 0x10 +*/ + +#define RSEQ_SIG 0x002a0010 diff --git a/sysdeps/unix/sysv/linux/loongarch/sysdep.h b/sysdeps/unix/sysv/linux/loongarch/sysdep.h index eb0ba790da..e2d853ae3e 100644 --- a/sysdeps/unix/sysv/linux/loongarch/sysdep.h +++ b/sysdeps/unix/sysv/linux/loongarch/sysdep.h @@ -119,6 +119,7 @@ #define HAVE_CLOCK_GETTIME64_VSYSCALL "__vdso_clock_gettime" #define HAVE_GETTIMEOFDAY_VSYSCALL "__vdso_gettimeofday" #define HAVE_GETCPU_VSYSCALL "__vdso_getcpu" +#define HAVE_GETRANDOM_VSYSCALL "__vdso_getrandom" #define HAVE_CLONE3_WRAPPER 1 diff --git a/sysdeps/unix/sysv/linux/nios2/arch-syscall.h b/sysdeps/unix/sysv/linux/nios2/arch-syscall.h index f94e212995..b1d0fb6f6e 100644 --- a/sysdeps/unix/sysv/linux/nios2/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/nios2/arch-syscall.h @@ -24,6 +24,7 @@ #define __NR_clock_settime 112 #define __NR_clock_settime64 404 #define __NR_clone 220 +#define __NR_clone3 435 #define __NR_close 57 #define __NR_close_range 436 #define __NR_connect 203 diff --git a/sysdeps/unix/sysv/linux/not-cancel.h b/sysdeps/unix/sysv/linux/not-cancel.h index 2a7585b73f..12f26912d3 100644 --- a/sysdeps/unix/sysv/linux/not-cancel.h +++ b/sysdeps/unix/sysv/linux/not-cancel.h @@ -27,6 +27,7 @@ #include <sys/syscall.h> #include <sys/wait.h> #include <time.h> +#include <sys/random.h> /* Non cancellable open syscall. */ __typeof (open) __open_nocancel; @@ -84,15 +85,17 @@ __writev_nocancel_nostatus (int fd, const struct iovec *iov, int iovcnt) } static inline ssize_t -__getrandom_nocancel (void *buf, size_t buflen, unsigned int flags) +__getrandom_nocancel_direct (void *buf, size_t buflen, unsigned int flags) { return INLINE_SYSCALL_CALL (getrandom, buf, buflen, flags); } +__typeof (getrandom) __getrandom_nocancel attribute_hidden; + /* Non cancellable getrandom syscall that does not also set errno in case of failure. */ static inline ssize_t -__getrandom_nocancel_nostatus (void *buf, size_t buflen, unsigned int flags) +__getrandom_nocancel_nostatus_direct (void *buf, size_t buflen, unsigned int flags) { return INTERNAL_SYSCALL_CALL (getrandom, buf, buflen, flags); } diff --git a/sysdeps/unix/sysv/linux/pkey_set.c b/sysdeps/unix/sysv/linux/pkey_set.c index 30463ef89b..9cb14af0ec 100644 --- a/sysdeps/unix/sysv/linux/pkey_set.c +++ b/sysdeps/unix/sysv/linux/pkey_set.c @@ -20,7 +20,7 @@ #include <sys/mman.h> int -__pkey_set (int key, unsigned int access_rights) +__pkey_set (int key, unsigned int access_restrictions) { __set_errno (ENOSYS); return -1; diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S index 164311d2bd..e57cb6e82e 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S @@ -56,7 +56,6 @@ ENTRY (__clone) /* Save fn, args, stack across syscall. */ mr r30,r3 /* Function in r30. */ - mr r29,r5 /* Flags in r29. */ mr r31,r6 /* Argument in r31. */ /* 'flags' argument is first parameter to clone syscall. @@ -77,14 +76,12 @@ ENTRY (__clone) CHECK_SCV_SUPPORT r28 0f /* This is equivalent to DO_CALL_SCV, but we cannot use the macro here because it uses CFI directives and we just called cfi_endproc. */ - mflr r9 - std r9,FRAME_LR_SAVE(r1) + mflr r29 .machine "push" .machine "power9" scv 0 .machine "pop" - ld r9,FRAME_LR_SAVE(r1) - mtlr r9 + mtlr r29 /* Check for child process. */ /* When using scv, error is indicated by negative r3. */ diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S index 900c354c9c..913f341d7d 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S @@ -39,14 +39,12 @@ because it uses CFI directives and we just called cfi_endproc. */ # define DO_CLONE3_SVC_CALL(jumpfalse) \ CHECK_SCV_SUPPORT r28 jumpfalse; \ - mflr r9; \ - std r9, FRAME_LR_SAVE(r1); \ + mflr r31; \ .machine "push"; \ .machine "power9"; \ scv 0; \ .machine "pop"; \ - ld r9, FRAME_LR_SAVE(r1); \ - mtlr r9; \ + mtlr r31; \ /* With scv an, an error is a value -4095 <= x < 0. */ \ cmpdi cr1, r3, 0; \ b 1f; @@ -66,13 +64,15 @@ ENTRY(__clone3) /* Save some regs in the "red zone". */ #ifdef USE_PPC_SCV - std r28, -24(r1) - cfi_offset (r28, -24) + std r28, -32(r1) + cfi_offset (r28, -32) #endif - std r29, -16(r1) - std r30, -8(r1) - cfi_offset (r29, -16) - cfi_offset (r30, -8) + std r29, -24(r1) + std r30, -16(r1) + std r31, -8(r1) + cfi_offset (r29, -24) + cfi_offset (r30, -16) + cfi_offset (r31, -8) /* Save func and arg across syscall. */ mr r30, r5 /* Function in r30. */ @@ -132,16 +132,19 @@ L(parent): /* Parent. Restore registers & return. */ #ifdef USE_PPC_SCV - cfi_offset (r28, -24) - ld r28, -24(r1) + cfi_offset (r28, -32) + ld r28, -32(r1) cfi_restore (r28) #endif - cfi_offset (r29,-16) - cfi_offset (r30,-8) - ld r29, -16(r1) - ld r30, -8(r1) + cfi_offset (r29,-24) + cfi_offset (r30,-16) + cfi_offset (r31,-8) + ld r29, -24(r1) + ld r30, -16(r1) + ld r31, -8(r1) cfi_restore (r29) cfi_restore (r30) + cfi_restore (r31) #ifdef USE_PPC_SCV beq cr1, 0f diff --git a/sysdeps/unix/sysv/linux/powerpc/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/sysdep.h index a69b7db338..62b77a79f0 100644 --- a/sysdeps/unix/sysv/linux/powerpc/sysdep.h +++ b/sysdeps/unix/sysv/linux/powerpc/sysdep.h @@ -32,6 +32,18 @@ #undef SYS_ify #define SYS_ify(syscall_name) __NR_##syscall_name +#define tostring(s) #s +#define stringify(s) tostring(s) + +#ifdef _ARCH_PWR4 +/* Power4 and later cpus introduced a faster instruction to copy one + CR field, rather than the slower microcoded mfcr which copies all + CR fields. */ +# define MFCR0(REG) "mfocrf " stringify(REG) ",0x80" +#else +# define MFCR0(REG) "mfcr " stringify(REG) +#endif + /* Define a macro which expands inline into the wrapper code for a system call. This use is for internal calls that do not need to handle errors normally. It will never touch errno. This returns just what the kernel @@ -52,7 +64,7 @@ __asm__ __volatile__ \ ("mtctr %0\n\t" \ "bctrl\n\t" \ - "mfcr %0\n\t" \ + MFCR0(%0) "\n\t" \ "0:" \ : "+r" (r0), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), \ "+r" (r7), "+r" (r8) \ @@ -83,11 +95,10 @@ "scv 0\n\t" \ ".machine \"pop\"\n\t" \ "0:" \ - : "=&r" (r0), \ - "=&r" (r3), "=&r" (r4), "=&r" (r5), \ - "=&r" (r6), "=&r" (r7), "=&r" (r8) \ - : ASM_INPUT_##nr \ - : "r9", "r10", "r11", "r12", \ + : "+r" (r0), \ + "+r" (r3), "+r" (r4), "+r" (r5), \ + "+r" (r6), "+r" (r7), "+r" (r8) \ + : : "r9", "r10", "r11", "r12", \ "cr0", "cr1", "cr5", "cr6", "cr7", \ "xer", "lr", "ctr", "memory"); \ r3; \ @@ -97,13 +108,12 @@ ({ \ __asm__ __volatile__ \ ("sc\n\t" \ - "mfcr %0\n\t" \ + MFCR0(%0) "\n\t" \ "0:" \ - : "=&r" (r0), \ - "=&r" (r3), "=&r" (r4), "=&r" (r5), \ - "=&r" (r6), "=&r" (r7), "=&r" (r8) \ - : ASM_INPUT_##nr \ - : "r9", "r10", "r11", "r12", \ + : "+r" (r0), \ + "+r" (r3), "+r" (r4), "+r" (r5), \ + "+r" (r6), "+r" (r7), "+r" (r8) \ + : : "r9", "r10", "r11", "r12", \ "xer", "cr0", "ctr", "memory"); \ r0 & (1 << 28) ? -r3 : r3; \ }) @@ -199,14 +209,6 @@ __illegally_sized_syscall_arg6 (); \ r8 = _arg6 -#define ASM_INPUT_0 "0" (r0) -#define ASM_INPUT_1 ASM_INPUT_0, "1" (r3) -#define ASM_INPUT_2 ASM_INPUT_1, "2" (r4) -#define ASM_INPUT_3 ASM_INPUT_2, "3" (r5) -#define ASM_INPUT_4 ASM_INPUT_3, "4" (r6) -#define ASM_INPUT_5 ASM_INPUT_4, "5" (r7) -#define ASM_INPUT_6 ASM_INPUT_5, "6" (r8) - /* List of system calls which are supported as vsyscalls. */ #define VDSO_NAME "LINUX_2.6.15" #define VDSO_HASH 123718565 @@ -223,5 +225,6 @@ #define HAVE_TIME_VSYSCALL "__kernel_time" #define HAVE_GETTIMEOFDAY_VSYSCALL "__kernel_gettimeofday" #define HAVE_GET_TBFREQ "__kernel_get_tbfreq" +#define HAVE_GETRANDOM_VSYSCALL "__kernel_getrandom" #endif /* _LINUX_POWERPC_SYSDEP_H */ diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h index 7ea935b4ad..37a8f630b6 100644 --- a/sysdeps/unix/sysv/linux/rseq-internal.h +++ b/sysdeps/unix/sysv/linux/rseq-internal.h @@ -51,11 +51,21 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq) /* The initial implementation used only 20 bytes out of 32, but still expected size 32. */ size = RSEQ_AREA_SIZE_INITIAL; + + /* Initialize the rseq fields that are read by the kernel on + registration, there is no guarantee that struct pthread is + cleared on all architectures. */ + THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_UNINITIALIZED); + THREAD_SETMEM (self, rseq_area.rseq_cs, 0); + THREAD_SETMEM (self, rseq_area.flags, 0); + int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area, size, 0, RSEQ_SIG); if (!INTERNAL_SYSCALL_ERROR_P (ret)) return true; } + /* When rseq is disabled by tunables or the registration fails, inform + userspace by setting 'cpu_id' to RSEQ_CPU_ID_REGISTRATION_FAILED. */ THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); return false; } diff --git a/sysdeps/unix/sysv/linux/s390/sysdep.h b/sysdeps/unix/sysv/linux/s390/sysdep.h index 9b3000ca62..9698c57a03 100644 --- a/sysdeps/unix/sysv/linux/s390/sysdep.h +++ b/sysdeps/unix/sysv/linux/s390/sysdep.h @@ -72,6 +72,7 @@ #ifdef __s390x__ #define HAVE_CLOCK_GETRES64_VSYSCALL "__kernel_clock_getres" #define HAVE_CLOCK_GETTIME64_VSYSCALL "__kernel_clock_gettime" +#define HAVE_GETRANDOM_VSYSCALL "__kernel_getrandom" #else #define HAVE_CLOCK_GETRES_VSYSCALL "__kernel_clock_getres" #define HAVE_CLOCK_GETTIME_VSYSCALL "__kernel_clock_gettime" diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S index c9cf9bb055..c84244f56b 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S @@ -29,7 +29,11 @@ ENTRY (__clone) save %sp,-96,%sp save %sp,-96,%sp +#ifdef __sparcv9 flushw +#else + ta 3 +#endif restore cfi_def_cfa_register(%fp) cfi_window_save diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S b/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S index 0db93c77bf..45c6ae7a86 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S @@ -48,13 +48,13 @@ __syscall_cancel_arch_start: bne 2f #endif /* Issue a 6 argument syscall. */ - mov %i1, %g1 - mov %i2, %o0 + mov %i2, %o0 mov %i3, %o1 mov %i4, %o2 mov %i5, %o3 ld [%fp+92], %o4 ld [%fp+96], %o5 + mov %i1, %g1 ta 0x10 .globl __syscall_cancel_arch_end diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h index d2d68f5312..c2ffbb5c8f 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h @@ -107,6 +107,7 @@ ENTRY(name); \ #else /* __ASSEMBLER__ */ #define __SYSCALL_STRING \ + "mov %[scn], %%g1;" \ "ta 0x10;" \ "bcc 1f;" \ " nop;" \ @@ -114,7 +115,7 @@ ENTRY(name); \ "1:" #define __SYSCALL_CLOBBERS \ - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S b/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S index 21b0728d5a..6c8d1330cb 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S @@ -46,13 +46,13 @@ __syscall_cancel_arch_start: andcc %g2, TCB_CANCELED_BITMASK, %g0 bne,pn %xcc, 2f /* Issue a 6 argument syscall. */ - mov %i1, %g1 - mov %i2, %o0 + mov %i2, %o0 mov %i3, %o1 mov %i4, %o2 mov %i5, %o3 ldx [%fp + STACK_BIAS + 176], %o4 ldx [%fp + STACK_BIAS + 184], %o5 + mov %i1, %g1 ta 0x6d .global __syscall_cancel_arch_end diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h index 96047424e9..5598fab08a 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h @@ -106,6 +106,7 @@ ENTRY(name); \ #else /* __ASSEMBLER__ */ #define __SYSCALL_STRING \ + "mov %[scn], %%g1;" \ "ta 0x6d;" \ "bcc,pt %%xcc, 1f;" \ " nop;" \ @@ -113,7 +114,7 @@ ENTRY(name); \ "1:" #define __SYSCALL_CLOBBERS \ - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ diff --git a/sysdeps/unix/sysv/linux/sparc/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sysdep.h index dcabb57fe2..c287740a8c 100644 --- a/sysdeps/unix/sysv/linux/sparc/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sysdep.h @@ -50,97 +50,109 @@ #undef INTERNAL_SYSCALL_NCS #define INTERNAL_SYSCALL_NCS(name, nr, args...) \ - internal_syscall##nr(__SYSCALL_STRING, name, args) + _internal_syscall##nr(__SYSCALL_STRING, "p", name, args) -#define internal_syscall0(string,name,dummy...) \ +#define _internal_syscall0(string,nc,name,dummy...) \ ({ \ - register long int __g1 __asm__ ("g1") = (name); \ register long __o0 __asm__ ("o0"); \ + long int _name = (long int) (name); \ __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1) : \ + [scn] nc (_name) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall0(string,name,args...) \ + _internal_syscall0(string, "i", name, args) -#define internal_syscall1(string,name,arg1) \ +#define _internal_syscall1(string,nc,name,arg1) \ ({ \ long int _arg1 = (long int) (arg1); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0) : \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall1(string,name,args...) \ + _internal_syscall1(string, "i", name, args) -#define internal_syscall2(string,name,arg1,arg2) \ +#define _internal_syscall2(string,nc,name,arg1,arg2) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1) : \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall2(string,name,args...) \ + _internal_syscall2(string, "i", name, args) -#define internal_syscall3(string,name,arg1,arg2,arg3) \ +#define _internal_syscall3(string,nc,name,arg1,arg2,arg3) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall3(string,name,args...) \ + _internal_syscall3(string, "i", name, args) -#define internal_syscall4(string,name,arg1,arg2,arg3,arg4) \ +#define _internal_syscall4(string,nc,name,arg1,arg2,arg3,arg4) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall4(string,name,args...) \ + _internal_syscall4(string, "i", name, args) -#define internal_syscall5(string,name,arg1,arg2,arg3,arg4,arg5) \ +#define _internal_syscall5(string,nc,name,arg1,arg2,arg3,arg4,arg5) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall5(string,name,args...) \ + _internal_syscall5(string, "i", name, args) -#define internal_syscall6(string,name,arg1,arg2,arg3,arg4,arg5,arg6) \ +#define _internal_syscall6(string,nc,name,arg1,arg2,arg3,arg4,arg5,arg6)\ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ @@ -148,20 +160,22 @@ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ long int _arg6 = (long int) (arg6); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ register long int __o5 __asm__ ("o5") = _arg6; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4), \ "r" (__o5) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall6(string,name,args...) \ + _internal_syscall6(string, "i", name, args) #define INLINE_CLONE_SYSCALL(arg1,arg2,arg3,arg4,arg5) \ ({ \ @@ -170,15 +184,15 @@ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ + long int _name = __NR_clone; \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ - register long int __g1 __asm__ ("g1") = __NR_clone; \ __asm __volatile (__SYSCALL_STRING : \ "=r" (__o0), "=r" (__o1) : \ - "r" (__g1), "0" (__o0), "1" (__o1), \ + [scn] "i" (_name), "0" (__o0), "1" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ if (__glibc_unlikely ((unsigned long int) (__o0) > -4096UL)) \ diff --git a/sysdeps/unix/sysv/linux/spawni.c b/sysdeps/unix/sysv/linux/spawni.c index f57e92815e..1edf62c6d7 100644 --- a/sysdeps/unix/sysv/linux/spawni.c +++ b/sysdeps/unix/sysv/linux/spawni.c @@ -383,7 +383,11 @@ __spawnix (int *pid, const char *file, args.pidfd = 0; args.xflags = xflags; - internal_signal_block_all (&args.oldmask); + /* Avoid the potential issues if caller sets a SIG_IGN for SIGABRT, calls + abort, and another thread issues posix_spawn just after the sigaction + returns. With default options (not setting POSIX_SPAWN_SETSIGDEF), the + process can still see SIG_DFL for SIGABRT, where it should be SIG_IGN. */ + __abort_lock_rdlock (&args.oldmask); /* The clone flags used will create a new child that will run in the same memory space (CLONE_VM) and the execution of calling thread will be @@ -474,7 +478,7 @@ __spawnix (int *pid, const char *file, if ((ec == 0) && (pid != NULL)) *pid = use_pidfd ? args.pidfd : new_pid; - internal_signal_restore_set (&args.oldmask); + __abort_lock_unlock (&args.oldmask); __pthread_setcancelstate (state, NULL); diff --git a/sysdeps/unix/sysv/linux/sys/pidfd.h b/sysdeps/unix/sysv/linux/sys/pidfd.h index 9f88d297e8..85d976939b 100644 --- a/sysdeps/unix/sysv/linux/sys/pidfd.h +++ b/sysdeps/unix/sysv/linux/sys/pidfd.h @@ -20,6 +20,7 @@ #include <fcntl.h> #include <bits/types/siginfo_t.h> +#include <sys/ioctl.h> #define PIDFD_NONBLOCK O_NONBLOCK #define PIDFD_THREAD O_EXCL @@ -28,6 +29,19 @@ #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) +#define PIDFS_IOCTL_MAGIC 0xFF + +#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) +#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) +#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) +#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) +#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) +#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) +#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) +#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) +#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) +#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) + /* Returns a file descriptor that refers to the process PID. The close-on-exec is set on the file descriptor. */ extern int pidfd_open (__pid_t __pid, unsigned int __flags) __THROW; diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list index 7871f93b94..aa5b479e2a 100644 --- a/sysdeps/unix/sysv/linux/syscall-names.list +++ b/sysdeps/unix/sysv/linux/syscall-names.list @@ -21,8 +21,8 @@ # This file can list all potential system calls. The names are only # used if the installed kernel headers also provide them. -# The list of system calls is current as of Linux 6.10. -kernel 6.10 +# The list of system calls is current as of Linux 6.11. +kernel 6.11 FAST_atomic_update FAST_cmpxchg @@ -653,6 +653,7 @@ uname unlink unlinkat unshare +uretprobe uselib userfaultfd usr26 diff --git a/sysdeps/unix/sysv/linux/time.c b/sysdeps/unix/sysv/linux/time.c index f8b0cee41c..a56ef6fa30 100644 --- a/sysdeps/unix/sysv/linux/time.c +++ b/sysdeps/unix/sysv/linux/time.c @@ -33,11 +33,10 @@ time_syscall (time_t *t) } # undef INIT_ARCH -# define INIT_ARCH() \ - void *vdso_time = dl_vdso_vsym (HAVE_TIME_VSYSCALL); +# define INIT_ARCH() libc_ifunc (time, - vdso_time ? VDSO_IFUNC_RET (vdso_time) - : (void *) time_syscall); + GLRO(dl_vdso_time) != NULL ? VDSO_IFUNC_RET (GLRO(dl_vdso_time)) + : (void *) time_syscall); # else time_t diff --git a/sysdeps/unix/sysv/linux/tst-mman-consts.py b/sysdeps/unix/sysv/linux/tst-mman-consts.py index a1137eb1d5..2191bea36f 100644 --- a/sysdeps/unix/sysv/linux/tst-mman-consts.py +++ b/sysdeps/unix/sysv/linux/tst-mman-consts.py @@ -33,7 +33,7 @@ def main(): help='C compiler (including options) to use') args = parser.parse_args() linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) - linux_version_glibc = (6, 10) + linux_version_glibc = (6, 11) sys.exit(glibcextract.compare_macro_consts( '#define _GNU_SOURCE 1\n' '#include <sys/mman.h>\n', diff --git a/sysdeps/unix/sysv/linux/tst-mount-consts.py b/sysdeps/unix/sysv/linux/tst-mount-consts.py index 675f1790b6..b71d8a489d 100755 --- a/sysdeps/unix/sysv/linux/tst-mount-consts.py +++ b/sysdeps/unix/sysv/linux/tst-mount-consts.py @@ -42,7 +42,7 @@ def main(): # Constants in glibc were updated to match Linux v6.10. When glibc # constants are updated this value should be updated to match the # released kernel version from which the constants were taken. - linux_version_glibc = (6, 10) + linux_version_glibc = (6, 11) def check(cte, exclude=None): return glibcextract.compare_macro_consts( '#include <sys/mount.h>\n', diff --git a/sysdeps/unix/sysv/linux/tst-pidfd-consts.py b/sysdeps/unix/sysv/linux/tst-pidfd-consts.py index 9824fd214d..9d53102cbd 100644 --- a/sysdeps/unix/sysv/linux/tst-pidfd-consts.py +++ b/sysdeps/unix/sysv/linux/tst-pidfd-consts.py @@ -39,7 +39,7 @@ def main(): sys.exit (77) linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) - linux_version_glibc = (6, 10) + linux_version_glibc = (6, 11) sys.exit(glibcextract.compare_macro_consts( '#include <sys/pidfd.h>\n', '#include <asm/fcntl.h>\n' diff --git a/sysdeps/unix/sysv/linux/tst-sched-consts.py b/sysdeps/unix/sysv/linux/tst-sched-consts.py index 70071dcd97..f06ac400bd 100644 --- a/sysdeps/unix/sysv/linux/tst-sched-consts.py +++ b/sysdeps/unix/sysv/linux/tst-sched-consts.py @@ -33,7 +33,7 @@ def main(): help='C compiler (including options) to use') args = parser.parse_args() linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) - linux_version_glibc = (6, 10) + linux_version_glibc = (6, 11) sys.exit(glibcextract.compare_macro_consts( '#define _GNU_SOURCE 1\n' '#include <sched.h>\n', diff --git a/sysdeps/unix/sysv/linux/tst-syscall-restart.c b/sysdeps/unix/sysv/linux/tst-syscall-restart.c new file mode 100644 index 0000000000..0ee7dc8517 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-syscall-restart.c @@ -0,0 +1,112 @@ +/* Test if a syscall is correctly restarted. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <support/xsignal.h> +#include <support/check.h> +#include <support/process_state.h> +#include <support/xunistd.h> +#include <support/xthread.h> +#include <sys/wait.h> + +static int +check_pid (pid_t pid) +{ + /* Wait until the child has called pause and it blocking on kernel. */ + support_process_state_wait (pid, support_process_state_sleeping); + + TEST_COMPARE (kill (pid, SIGSTOP), 0); + + /* Adding process_state_tracing_stop ('t') allows the test to work under + trace programs such as ptrace. */ + support_process_state_wait (pid, support_process_state_stopped + | support_process_state_tracing_stop); + + TEST_COMPARE (kill (pid, SIGCONT), 0); + + enum support_process_state state + = support_process_state_wait (pid, support_process_state_sleeping + | support_process_state_zombie); + + TEST_COMPARE (state, support_process_state_sleeping); + + TEST_COMPARE (kill (pid, SIGTERM), 0); + + siginfo_t info; + TEST_COMPARE (waitid (P_PID, pid, &info, WEXITED), 0); + TEST_COMPARE (info.si_signo, SIGCHLD); + TEST_COMPARE (info.si_code, CLD_KILLED); + TEST_COMPARE (info.si_status, SIGTERM); + TEST_COMPARE (info.si_pid, pid); + + return 0; +} + +static void * +tf (void *closure) +{ + pause (); + return NULL; +} + +static void +child_mt (void) +{ + /* Let only the created thread to handle signals. */ + sigset_t set; + sigfillset (&set); + xpthread_sigmask (SIG_BLOCK, &set, NULL); + + sigdelset (&set, SIGSTOP); + sigdelset (&set, SIGCONT); + sigdelset (&set, SIGTERM); + + pthread_attr_t attr; + xpthread_attr_init (&attr); + TEST_COMPARE (pthread_attr_setsigmask_np (&attr, &set), 0); + + xpthread_join (xpthread_create (&attr, tf, NULL)); +} + +static void +do_test_syscall (bool multithread) +{ + pid_t pid = xfork (); + if (pid == 0) + { + if (multithread) + child_mt (); + else + pause (); + _exit (127); + } + + check_pid (pid); +} + +static int +do_test (void) +{ + /* Check for both single and multi thread, since they use different syscall + mechanisms. */ + do_test_syscall (false); + do_test_syscall (true); + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h index 5d86e75dd5..dfc10d0c7e 100644 --- a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h @@ -359,6 +359,7 @@ #define __NR_unlink 87 #define __NR_unlinkat 263 #define __NR_unshare 272 +#define __NR_uretprobe 335 #define __NR_uselib 134 #define __NR_userfaultfd 323 #define __NR_ustat 136 diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h index a2b021bd86..7dc072ae2d 100644 --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h @@ -376,6 +376,7 @@ # define HAVE_TIME_VSYSCALL "__vdso_time" # define HAVE_GETCPU_VSYSCALL "__vdso_getcpu" # define HAVE_CLOCK_GETRES64_VSYSCALL "__vdso_clock_getres" +# define HAVE_GETRANDOM_VSYSCALL "__vdso_getrandom" # define HAVE_CLONE3_WRAPPER 1 diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h index 6c35068d8e..9340daa967 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h @@ -351,6 +351,7 @@ #define __NR_unlink 1073741911 #define __NR_unlinkat 1073742087 #define __NR_unshare 1073742096 +#define __NR_uretprobe 1073742159 #define __NR_userfaultfd 1073742147 #define __NR_ustat 1073741960 #define __NR_utime 1073741956 |