diff options
Diffstat (limited to 'sysdeps/unix/sysv')
60 files changed, 823 insertions, 343 deletions
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index 7df51a325c..527c7a5ae8 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -230,6 +230,7 @@ tests += \ tst-scm_rights \ tst-sigtimedwait \ tst-sync_file_range \ + tst-syscall-restart \ tst-sysconf-iov_max \ tst-sysvmsg-linux \ tst-sysvsem-linux \ diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist index b685106954..98687cae0d 100644 --- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist @@ -128,3 +128,8 @@ GLIBC_2.40 _ZGVsMxvv_hypot F GLIBC_2.40 _ZGVsMxvv_hypotf F GLIBC_2.40 _ZGVsMxvv_pow F GLIBC_2.40 _ZGVsMxvv_powf F +GLIBC_2.41 _ZGVnN2v_logp1 F +GLIBC_2.41 _ZGVnN2v_logp1f F +GLIBC_2.41 _ZGVnN4v_logp1f F +GLIBC_2.41 _ZGVsMxv_logp1 F +GLIBC_2.41 _ZGVsMxv_logp1f F diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h index bbbe35723c..974b503b2f 100644 --- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h +++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h @@ -164,6 +164,7 @@ # define HAVE_CLOCK_GETRES64_VSYSCALL "__kernel_clock_getres" # define HAVE_CLOCK_GETTIME64_VSYSCALL "__kernel_clock_gettime" # define HAVE_GETTIMEOFDAY_VSYSCALL "__kernel_gettimeofday" +# define HAVE_GETRANDOM_VSYSCALL "__kernel_getrandom" # define HAVE_CLONE3_WRAPPER 1 diff --git a/sysdeps/unix/sysv/linux/arc/Makefile b/sysdeps/unix/sysv/linux/arc/Makefile index eca9a9fd3a..dd3da52c39 100644 --- a/sysdeps/unix/sysv/linux/arc/Makefile +++ b/sysdeps/unix/sysv/linux/arc/Makefile @@ -11,12 +11,3 @@ ifeq ($(subdir),misc) sysdep_headers += sys/cachectl.h sysdep_routines += cacheflush endif - -abi-variants := arcle arcbe - -ifeq (,$(filter $(default-abi),$(abi-variants))) -$(error Unknown ABI $(default-abi), must be one of $(abi-variants)) -endif - -abi-arcle-condition := !defined __BIG_ENDIAN__ -abi-arcbe-condition := defined __BIG_ENDIAN__ diff --git a/sysdeps/unix/sysv/linux/arc/shlib-versions b/sysdeps/unix/sysv/linux/arc/shlib-versions index 343c0a0450..62c1bd57b3 100644 --- a/sysdeps/unix/sysv/linux/arc/shlib-versions +++ b/sysdeps/unix/sysv/linux/arc/shlib-versions @@ -1,7 +1,3 @@ DEFAULT GLIBC_2.32 -%ifdef HAVE_ARC_BE -ld=ld-linux-arceb.so.2 -%else ld=ld-linux-arc.so.2 -%endif diff --git a/sysdeps/unix/sysv/linux/arm/bits/hwcap.h b/sysdeps/unix/sysv/linux/arm/bits/hwcap.h index b104de473c..7b573facb9 100644 --- a/sysdeps/unix/sysv/linux/arm/bits/hwcap.h +++ b/sysdeps/unix/sysv/linux/arm/bits/hwcap.h @@ -49,3 +49,11 @@ #define HWCAP_ASIMDFHM 33554432 #define HWCAP_ASIMDBF16 67108864 #define HWCAP_I8MM 134217728 + +#define HWCAP2_AES 1 +#define HWCAP2_PMULL 2 +#define HWCAP2_SHA1 4 +#define HWCAP2_SHA2 8 +#define HWCAP2_CRC32 16 +#define HWCAP2_SB 32 +#define HWCAP2_SSBS 64 diff --git a/sysdeps/unix/sysv/linux/bits/mman-linux.h b/sysdeps/unix/sysv/linux/bits/mman-linux.h index 522333c50a..161a885096 100644 --- a/sysdeps/unix/sysv/linux/bits/mman-linux.h +++ b/sysdeps/unix/sysv/linux/bits/mman-linux.h @@ -43,6 +43,7 @@ #define MAP_PRIVATE 0x02 /* Changes are private. */ #define MAP_SHARED_VALIDATE 0x03 /* Share changes and validate extension flags. */ +#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */ #define MAP_TYPE 0x0f /* Mask for type of mapping. */ /* Other flags. */ diff --git a/sysdeps/unix/sysv/linux/bits/mman-shared.h b/sysdeps/unix/sysv/linux/bits/mman-shared.h index d8ed4436b6..7a9874204d 100644 --- a/sysdeps/unix/sysv/linux/bits/mman-shared.h +++ b/sysdeps/unix/sysv/linux/bits/mman-shared.h @@ -42,7 +42,7 @@ # define MLOCK_ONFAULT 1U # endif -/* Access rights for pkey_alloc. */ +/* Access restrictions for pkey_alloc. */ # ifndef PKEY_DISABLE_ACCESS # define PKEY_DISABLE_ACCESS 0x1 # define PKEY_DISABLE_WRITE 0x2 @@ -59,16 +59,16 @@ int memfd_create (const char *__name, unsigned int __flags) __THROW; int mlock2 (const void *__addr, size_t __length, unsigned int __flags) __THROW; /* Allocate a new protection key, with the PKEY_DISABLE_* bits - specified in ACCESS_RIGHTS. The protection key mask for the + specified in ACCESS_RESTRICTIONS. The protection key mask for the current thread is updated to match the access privilege for the new key. */ -int pkey_alloc (unsigned int __flags, unsigned int __access_rights) __THROW; +int pkey_alloc (unsigned int __flags, unsigned int __access_restrictions) __THROW; -/* Update the access rights for the current thread for KEY, which must +/* Update the access restrictions for the current thread for KEY, which must have been allocated using pkey_alloc. */ -int pkey_set (int __key, unsigned int __access_rights) __THROW; +int pkey_set (int __key, unsigned int __access_restrictions) __THROW; -/* Return the access rights for the current thread for KEY, which must +/* Return the access restrictions for the current thread for KEY, which must have been allocated using pkey_alloc. */ int pkey_get (int __key) __THROW; diff --git a/sysdeps/unix/sysv/linux/bits/sched.h b/sysdeps/unix/sysv/linux/bits/sched.h index a02cb69de7..f03f5f5ab3 100644 --- a/sysdeps/unix/sysv/linux/bits/sched.h +++ b/sysdeps/unix/sysv/linux/bits/sched.h @@ -29,7 +29,7 @@ #define SCHED_FIFO 1 #define SCHED_RR 2 #ifdef __USE_GNU -# define SCHED_NORMAL SCHED_OTHER +# define SCHED_NORMAL 0 # define SCHED_BATCH 3 # define SCHED_ISO 4 # define SCHED_IDLE 5 @@ -48,8 +48,10 @@ #define SCHED_FLAG_UTIL_CLAMP_MAX 0x40 /* Combinations of sched_flags fields. */ -#define SCHED_FLAG_KEEP_ALL 0x18 -#define SCHED_FLAG_UTIL_CLAMP 0x60 +#define SCHED_FLAG_KEEP_ALL \ + (SCHED_FLAG_KEEP_POLICY | SCHED_FLAG_KEEP_PARAMS) +#define SCHED_FLAG_UTIL_CLAMP \ + (SCHED_FLAG_UTIL_CLAMP_MIN | SCHED_FLAG_UTIL_CLAMP_MAX) /* Use "" to work around incorrect macro expansion of the __has_include argument (GCC PR 80005). */ diff --git a/sysdeps/unix/sysv/linux/bits/uio-ext.h b/sysdeps/unix/sysv/linux/bits/uio-ext.h index ead7a09156..85ed21bac5 100644 --- a/sysdeps/unix/sysv/linux/bits/uio-ext.h +++ b/sysdeps/unix/sysv/linux/bits/uio-ext.h @@ -48,6 +48,8 @@ extern ssize_t process_vm_writev (pid_t __pid, const struct iovec *__lvec, #define RWF_NOWAIT 0x00000008 /* per-IO nonblocking mode. */ #define RWF_APPEND 0x00000010 /* per-IO O_APPEND. */ #define RWF_NOAPPEND 0x00000020 /* per-IO negation of O_APPEND */ +#define RWF_ATOMIC 0x00000040 /* Write is to be issued with torn-write + prevention. */ __END_DECLS diff --git a/sysdeps/unix/sysv/linux/configure b/sysdeps/unix/sysv/linux/configure index 4d642e953b..8cda8e9451 100644..100755 --- a/sysdeps/unix/sysv/linux/configure +++ b/sysdeps/unix/sysv/linux/configure @@ -1,3 +1,4 @@ +ac_default_prefix=/usr # This file is generated from configure.ac by Autoconf. DO NOT EDIT! # Local configure fragment for sysdeps/unix/sysv/linux. @@ -116,7 +117,9 @@ if test -n "$sysheaders"; then CPPFLAGS=$OLD_CPPFLAGS fi -if test "$prefix" = "/usr/local" -o "$prefix" = "/usr/local/" -o "$prefix" = "NONE"; then + + +if test "$prefix" = "/usr/local" -o "$prefix" = "/usr/local/"; then if test $enable_sanity = yes; then echo "\ *** On GNU/Linux systems the GNU C Library should not be installed into diff --git a/sysdeps/unix/sysv/linux/configure.ac b/sysdeps/unix/sysv/linux/configure.ac index c177e082a9..4c43cda877 100644 --- a/sysdeps/unix/sysv/linux/configure.ac +++ b/sysdeps/unix/sysv/linux/configure.ac @@ -71,7 +71,9 @@ if test -n "$sysheaders"; then CPPFLAGS=$OLD_CPPFLAGS fi -if test "$prefix" = "/usr/local" -o "$prefix" = "/usr/local/" -o "$prefix" = "NONE"; then +AC_PREFIX_DEFAULT([/usr]) + +if test "$prefix" = "/usr/local" -o "$prefix" = "/usr/local/"; then if test $enable_sanity = yes; then echo "\ *** On GNU/Linux systems the GNU C Library should not be installed into diff --git a/sysdeps/unix/sysv/linux/dl-vdso-setup.c b/sysdeps/unix/sysv/linux/dl-vdso-setup.c index 3a44944dbb..476c6db75a 100644 --- a/sysdeps/unix/sysv/linux/dl-vdso-setup.c +++ b/sysdeps/unix/sysv/linux/dl-vdso-setup.c @@ -66,6 +66,11 @@ PROCINFO_CLASS int (*_dl_vdso_clock_getres) (clockid_t, PROCINFO_CLASS int (*_dl_vdso_clock_getres_time64) (clockid_t, struct __timespec64 *) RELRO; # endif +# ifdef HAVE_GETRANDOM_VSYSCALL +PROCINFO_CLASS ssize_t (*_dl_vdso_getrandom) (void *buffer, size_t len, + unsigned int flags, void *state, + size_t state_len) RELRO; +# endif /* PowerPC specific ones. */ # ifdef HAVE_GET_TBFREQ diff --git a/sysdeps/unix/sysv/linux/dl-vdso-setup.h b/sysdeps/unix/sysv/linux/dl-vdso-setup.h index 8aee5a8212..cde99f608c 100644 --- a/sysdeps/unix/sysv/linux/dl-vdso-setup.h +++ b/sysdeps/unix/sysv/linux/dl-vdso-setup.h @@ -50,6 +50,9 @@ setup_vdso_pointers (void) #ifdef HAVE_RISCV_HWPROBE GLRO(dl_vdso_riscv_hwprobe) = dl_vdso_vsym (HAVE_RISCV_HWPROBE); #endif +#ifdef HAVE_GETRANDOM_VSYSCALL + GLRO(dl_vdso_getrandom) = dl_vdso_vsym (HAVE_GETRANDOM_VSYSCALL); +#endif } #endif diff --git a/sysdeps/unix/sysv/linux/getdents64.c b/sysdeps/unix/sysv/linux/getdents64.c index 227fbf21ae..795bd935f0 100644 --- a/sysdeps/unix/sysv/linux/getdents64.c +++ b/sysdeps/unix/sysv/linux/getdents64.c @@ -33,100 +33,3 @@ __getdents64 (int fd, void *buf, size_t nbytes) } libc_hidden_def (__getdents64) weak_alias (__getdents64, getdents64) - -#if _DIRENT_MATCHES_DIRENT64 -strong_alias (__getdents64, __getdents) -#else -# include <shlib-compat.h> - -# if SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2) -# include <olddirent.h> -# include <unistd.h> - -static ssize_t -handle_overflow (int fd, __off64_t offset, ssize_t count) -{ - /* If this is the first entry in the buffer, we can report the - error. */ - if (offset == 0) - { - __set_errno (EOVERFLOW); - return -1; - } - - /* Otherwise, seek to the overflowing entry, so that the next call - will report the error, and return the data read so far. */ - if (__lseek64 (fd, offset, SEEK_SET) != 0) - return -1; - return count; -} - -ssize_t -__old_getdents64 (int fd, char *buf, size_t nbytes) -{ - /* We do not move the individual directory entries. This is only - possible if the target type (struct __old_dirent64) is smaller - than the source type. */ - _Static_assert (offsetof (struct __old_dirent64, d_name) - <= offsetof (struct dirent64, d_name), - "__old_dirent64 is larger than dirent64"); - _Static_assert (__alignof__ (struct __old_dirent64) - <= __alignof__ (struct dirent64), - "alignment of __old_dirent64 is larger than dirent64"); - - ssize_t retval = INLINE_SYSCALL_CALL (getdents64, fd, buf, nbytes); - if (retval > 0) - { - /* This is the marker for the first entry. Offset 0 is reserved - for the first entry (see rewinddir). Here, we use it as a - marker for the first entry in the buffer. We never actually - seek to offset 0 because handle_overflow reports the error - directly, so it does not matter that the offset is incorrect - if entries have been read from the descriptor before (so that - the descriptor is not actually at offset 0). */ - __off64_t previous_offset = 0; - - char *p = buf; - char *end = buf + retval; - while (p < end) - { - struct dirent64 *source = (struct dirent64 *) p; - - /* Copy out the fixed-size data. */ - __ino_t ino = source->d_ino; - __off64_t offset = source->d_off; - unsigned int reclen = source->d_reclen; - unsigned char type = source->d_type; - - /* Check for ino_t overflow. */ - if (__glibc_unlikely (ino != source->d_ino)) - return handle_overflow (fd, previous_offset, p - buf); - - /* Convert to the target layout. Use a separate struct and - memcpy to side-step aliasing issues. */ - struct __old_dirent64 result; - result.d_ino = ino; - result.d_off = offset; - result.d_reclen = reclen; - result.d_type = type; - - /* Write the fixed-sized part of the result to the - buffer. */ - size_t result_name_offset = offsetof (struct __old_dirent64, d_name); - memcpy (p, &result, result_name_offset); - - /* Adjust the position of the name if necessary. Copy - everything until the end of the record, including the - terminating NUL byte. */ - if (result_name_offset != offsetof (struct dirent64, d_name)) - memmove (p + result_name_offset, source->d_name, - reclen - offsetof (struct dirent64, d_name)); - - p += reclen; - previous_offset = offset; - } - } - return retval; -} -# endif /* SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2) */ -#endif /* _DIRENT_MATCHES_DIRENT64 */ diff --git a/sysdeps/unix/sysv/linux/getrandom-internal.h b/sysdeps/unix/sysv/linux/getrandom-internal.h new file mode 100644 index 0000000000..37e6c9bc15 --- /dev/null +++ b/sysdeps/unix/sysv/linux/getrandom-internal.h @@ -0,0 +1,29 @@ +/* Internal definitions for Linux getrandom implementation. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _GETRANDOM_INTERNAL_H +#define _GETRANDOM_INTERNAL_H + +#include <pthreadP.h> + +extern void __getrandom_early_init (_Bool) attribute_hidden; + +extern void __getrandom_fork_subprocess (void) attribute_hidden; +extern void __getrandom_vdso_release (struct pthread *curp) attribute_hidden; +extern void __getrandom_reset_state (struct pthread *curp) attribute_hidden; +#endif diff --git a/sysdeps/unix/sysv/linux/getrandom.c b/sysdeps/unix/sysv/linux/getrandom.c index 777d1decf0..c8c578263d 100644 --- a/sysdeps/unix/sysv/linux/getrandom.c +++ b/sysdeps/unix/sysv/linux/getrandom.c @@ -21,12 +21,314 @@ #include <unistd.h> #include <sysdep-cancel.h> +static inline ssize_t +getrandom_syscall (void *buffer, size_t length, unsigned int flags, + bool cancel) +{ + return cancel + ? SYSCALL_CANCEL (getrandom, buffer, length, flags) + : INLINE_SYSCALL_CALL (getrandom, buffer, length, flags); +} + +#ifdef HAVE_GETRANDOM_VSYSCALL +# include <assert.h> +# include <ldsodefs.h> +# include <libc-lock.h> +# include <list.h> +# include <setvmaname.h> +# include <sys/mman.h> +# include <sys/sysinfo.h> +# include <tls-internal.h> + +/* These values will be initialized at loading time by calling the + _dl_vdso_getrandom with a special value. The 'state_size' is the opaque + state size per-thread allocated with a mmap using 'mmap_prot' and + 'mmap_flags' argument. */ +static uint32_t state_size; +static uint32_t state_size_cache_aligned; +static uint32_t mmap_prot; +static uint32_t mmap_flags; + +/* The function below are used on reentracy handling with (i.e. SA_NODEFER). + Before allocating a new state or issue the vDSO, atomically read the + current thread buffer, and if this is already reserved (is_reserved_ptr) + fallback to the syscall. Otherwise, reserve the buffer by atomically + setting the LSB of the opaque state pointer. The bit is cleared after the + vDSO is called, or before issuing the fallback syscall. */ + +static inline void *reserve_ptr (void *p) +{ + return (void *) ((uintptr_t) (p) | 1UL); +} + +static inline void *release_ptr (void *p) +{ + return (void *) ((uintptr_t) (p) & ~1UL); +} + +static inline bool is_reserved_ptr (void *p) +{ + return (uintptr_t) (p) & 1UL; +} + +static struct +{ + __libc_lock_define (, lock); + + void **states; /* Queue of opaque states allocated with the kernel + provided flags and used on getrandom vDSO call. */ + size_t len; /* Number of available free states in the queue. */ + size_t total; /* Number of states allocated from the kernel. */ + size_t cap; /* Total number of states that 'states' can hold before + needed to be resized. */ +} grnd_alloc = { + .lock = LLL_LOCK_INITIALIZER +}; + +static bool +vgetrandom_get_state_alloc (void) +{ + /* Start by allocating one page for the opaque states. */ + size_t block_size = ALIGN_UP (state_size_cache_aligned, GLRO(dl_pagesize)); + size_t states_per_page = GLRO (dl_pagesize) / state_size_cache_aligned; + void *block = __mmap (NULL, GLRO(dl_pagesize), mmap_prot, mmap_flags, -1, 0); + if (block == MAP_FAILED) + return false; + __set_vma_name (block, block_size, " glibc: getrandom"); + + if (grnd_alloc.total + states_per_page > grnd_alloc.cap) + { + /* Use a new mmap instead of trying to mremap. It avoids a + potential multithread fork issue where fork is called just after + mremap returns but before assigning to the grnd_alloc.states, + thus making the its value invalid in the child. */ + void *old_states = grnd_alloc.states; + size_t new_states_size = ALIGN_UP ((grnd_alloc.total + states_per_page) + * sizeof (*grnd_alloc.states), + GLRO(dl_pagesize)); + + /* There is no need to memcpy any opaque state information because + all the allocated opaque states are assigned to running threads + (meaning that if we iterate over them we can reconstruct the state + list). */ + void **states = __mmap (NULL, new_states_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (states == MAP_FAILED) + { + __munmap (block, block_size); + return false; + } + + /* Atomically replace the old state, so if a fork happens the child + process will see a consistent free state buffer. The size might + not be updated, but it does not really matter since the buffer is + always increased. */ + grnd_alloc.states = states; + atomic_thread_fence_seq_cst (); + if (old_states != NULL) + __munmap (old_states, grnd_alloc.cap * sizeof (*grnd_alloc.states)); + + __set_vma_name (states, new_states_size, " glibc: getrandom states"); + grnd_alloc.cap = new_states_size / sizeof (*grnd_alloc.states); + atomic_thread_fence_seq_cst (); + } + + for (size_t i = 0; i < states_per_page; ++i) + { + /* There is no need to handle states that straddle a page because + we allocate only one page. */ + grnd_alloc.states[i] = block; + block += state_size_cache_aligned; + } + /* Concurrent fork should not observe the previous pointer value. */ + grnd_alloc.len = states_per_page; + grnd_alloc.total += states_per_page; + atomic_thread_fence_seq_cst (); + + return true; +} + +/* Allocate an opaque state for vgetrandom. If the grnd_alloc does not have + any, mmap() another page of them using the vgetrandom parameters. */ +static void * +vgetrandom_get_state (void) +{ + void *state = NULL; + + /* The signal blocking avoid the potential issue where _Fork() (which is + async-signal-safe) is called with the lock taken. The function is + called only once during thread lifetime, so the overhead should be + minimal. */ + internal_sigset_t set; + internal_signal_block_all (&set); + __libc_lock_lock (grnd_alloc.lock); + + if (grnd_alloc.len > 0 || vgetrandom_get_state_alloc ()) + state = grnd_alloc.states[--grnd_alloc.len]; + + __libc_lock_unlock (grnd_alloc.lock); + internal_signal_restore_set (&set); + + return state; +} + +/* Returns true when vgetrandom is used successfully. Returns false if the + syscall fallback should be issued in the case the vDSO is not present, in + the case of reentrancy, or if any memory allocation fails. */ +static ssize_t +getrandom_vdso (void *buffer, size_t length, unsigned int flags, bool cancel) +{ + if (__glibc_unlikely (state_size == 0)) + return getrandom_syscall (buffer, length, flags, cancel); + + struct pthread *self = THREAD_SELF; + + void *state = atomic_load_relaxed (&self->getrandom_buf); + if (is_reserved_ptr (state)) + return getrandom_syscall (buffer, length, flags, cancel); + atomic_store_relaxed (&self->getrandom_buf, reserve_ptr (state)); + __atomic_signal_fence (__ATOMIC_ACQ_REL); + + bool r = false; + if (state == NULL) + { + state = vgetrandom_get_state (); + if (state == NULL) + goto out; + } + + /* Since the vDSO implementation does not issue the syscall with the + cancellation bridge (__syscall_cancel_arch), use GRND_NONBLOCK so there + is no potential unbounded blocking in the kernel. It should be a rare + situation, only at system startup when RNG is not initialized. */ + ssize_t ret = GLRO (dl_vdso_getrandom) (buffer, + length, + flags | GRND_NONBLOCK, + state, + state_size); + if (INTERNAL_SYSCALL_ERROR_P (ret)) + { + /* Fallback to the syscall if the kernel would block. */ + int err = INTERNAL_SYSCALL_ERRNO (ret); + if (err == EAGAIN && !(flags & GRND_NONBLOCK)) + goto out; + + __set_errno (err); + ret = -1; + } + r = true; + +out: + __atomic_signal_fence (__ATOMIC_ACQ_REL); + atomic_store_relaxed (&self->getrandom_buf, state); + return r ? ret : getrandom_syscall (buffer, length, flags, cancel); +} +#endif + +void +__getrandom_early_init (_Bool initial) +{ +#ifdef HAVE_GETRANDOM_VSYSCALL + /* libcs loaded for audit modules, dlmopen, etc. fallback to syscall. */ + if (initial && (GLRO (dl_vdso_getrandom) != NULL)) + { + /* Used to query the vDSO for the required mmap flags and the opaque + per-thread state size. Defined by linux/random.h. */ + struct vgetrandom_opaque_params + { + uint32_t size_of_opaque_state; + uint32_t mmap_prot; + uint32_t mmap_flags; + uint32_t reserved[13]; + } params; + if (GLRO(dl_vdso_getrandom) (NULL, 0, 0, ¶ms, ~0UL) == 0) + { + /* Align each opaque state to L1 data cache size to avoid false + sharing. If the size can not be obtained, use the kernel + provided one. */ + state_size = params.size_of_opaque_state; + + long int ld1sz = __sysconf (_SC_LEVEL1_DCACHE_LINESIZE); + if (ld1sz <= 0) + ld1sz = 1; + state_size_cache_aligned = ALIGN_UP (state_size, ld1sz); + /* Do not enable vDSO if the required opaque state size is larger + than a page because we only allocate one page per time to hold + the states. */ + if (state_size_cache_aligned > GLRO(dl_pagesize)) + { + state_size = 0; + return; + } + mmap_prot = params.mmap_prot; + mmap_flags = params.mmap_flags; + } + } +#endif +} + +/* Re-add the state state from CURP on the free list. This function is + called after fork returns in the child, so no locking is required. */ +void +__getrandom_reset_state (struct pthread *curp) +{ +#ifdef HAVE_GETRANDOM_VSYSCALL + if (grnd_alloc.states == NULL || curp->getrandom_buf == NULL) + return; + assert (grnd_alloc.len < grnd_alloc.cap); + grnd_alloc.states[grnd_alloc.len++] = release_ptr (curp->getrandom_buf); + curp->getrandom_buf = NULL; +#endif +} + +/* Called when a thread terminates, and adds its random buffer back into the + allocator pool for use in a future thread. This is called by + pthread_create during thread termination, and after signal has been + blocked. */ +void +__getrandom_vdso_release (struct pthread *curp) +{ +#ifdef HAVE_GETRANDOM_VSYSCALL + if (curp->getrandom_buf == NULL) + return; + + __libc_lock_lock (grnd_alloc.lock); + grnd_alloc.states[grnd_alloc.len++] = curp->getrandom_buf; + __libc_lock_unlock (grnd_alloc.lock); +#endif +} + +/* Reset the internal lock state in case another thread has locked while + this thread calls fork. The stale thread states will be handled by + reclaim_stacks which calls __getrandom_reset_state on each thread. */ +void +__getrandom_fork_subprocess (void) +{ +#ifdef HAVE_GETRANDOM_VSYSCALL + grnd_alloc.lock = LLL_LOCK_INITIALIZER; +#endif +} + +ssize_t +__getrandom_nocancel (void *buffer, size_t length, unsigned int flags) +{ +#ifdef HAVE_GETRANDOM_VSYSCALL + return getrandom_vdso (buffer, length, flags, false); +#else + return getrandom_syscall (buffer, length, flags, false); +#endif +} + /* Write up to LENGTH bytes of randomness starting at BUFFER. Return the number of bytes written, or -1 on error. */ ssize_t __getrandom (void *buffer, size_t length, unsigned int flags) { - return SYSCALL_CANCEL (getrandom, buffer, length, flags); +#ifdef HAVE_GETRANDOM_VSYSCALL + return getrandom_vdso (buffer, length, flags, true); +#else + return getrandom_syscall (buffer, length, flags, true); +#endif } libc_hidden_def (__getrandom) weak_alias (__getrandom, getrandom) diff --git a/sysdeps/unix/sysv/linux/gettimeofday.c b/sysdeps/unix/sysv/linux/gettimeofday.c index 7ab147c614..5ca8916611 100644 --- a/sysdeps/unix/sysv/linux/gettimeofday.c +++ b/sysdeps/unix/sysv/linux/gettimeofday.c @@ -37,11 +37,11 @@ __gettimeofday_syscall (struct timeval *restrict tv, void *restrict tz) } # undef INIT_ARCH -# define INIT_ARCH() \ - void *vdso_gettimeofday = dl_vdso_vsym (HAVE_GETTIMEOFDAY_VSYSCALL) +# define INIT_ARCH() libc_ifunc (__gettimeofday, - vdso_gettimeofday ? VDSO_IFUNC_RET (vdso_gettimeofday) - : (void *) __gettimeofday_syscall) + GLRO(dl_vdso_gettimeofday) != NULL + ? VDSO_IFUNC_RET (GLRO(dl_vdso_gettimeofday)) + : (void *) __gettimeofday_syscall) # else int diff --git a/sysdeps/unix/sysv/linux/include/bits/mman-shared.h b/sysdeps/unix/sysv/linux/include/bits/mman-shared.h new file mode 100644 index 0000000000..7c14b5df3c --- /dev/null +++ b/sysdeps/unix/sysv/linux/include/bits/mman-shared.h @@ -0,0 +1,16 @@ +#include <sysdeps/unix/sysv/linux/bits/mman-shared.h> + +#ifndef _ISOMAC + +extern __typeof (pkey_alloc) __pkey_alloc; +libc_hidden_proto (__pkey_alloc) +extern __typeof (pkey_free) __pkey_free; +libc_hidden_proto (__pkey_free) +extern __typeof (pkey_mprotect) __pkey_mprotect; +libc_hidden_proto (__pkey_mprotect) +extern __typeof (pkey_get) __pkey_get; +libc_hidden_proto (__pkey_get) +extern __typeof (pkey_set) __pkey_set; +libc_hidden_proto (__pkey_set) + +#endif diff --git a/sysdeps/unix/sysv/linux/internal-signals.h b/sysdeps/unix/sysv/linux/internal-signals.h index a6fae59aaa..6e3a3d7692 100644 --- a/sysdeps/unix/sysv/linux/internal-signals.h +++ b/sysdeps/unix/sysv/linux/internal-signals.h @@ -90,6 +90,15 @@ internal_signal_restore_set (const internal_sigset_t *set) __NSIG_BYTES); } +static inline void +internal_signal_unblock_signal (int sig) +{ + internal_sigset_t set; + internal_sigemptyset (&set); + internal_sigaddset (&set, sig); + INTERNAL_SYSCALL_CALL (rt_sigprocmask, SIG_UNBLOCK, &set, NULL, + __NSIG_BYTES); +} /* It is used on timer_create code directly on sigwaitinfo call, so it can not use the internal_sigset_t definitions. */ diff --git a/sysdeps/unix/sysv/linux/internal-sigset.h b/sysdeps/unix/sysv/linux/internal-sigset.h index 5d7020b42d..4b19affd75 100644 --- a/sysdeps/unix/sysv/linux/internal-sigset.h +++ b/sysdeps/unix/sysv/linux/internal-sigset.h @@ -21,7 +21,7 @@ #include <sigsetops.h> -typedef struct +typedef struct _internal_sigset_t { unsigned long int __val[__NSIG_WORDS]; } internal_sigset_t; diff --git a/sysdeps/unix/sysv/linux/loongarch/bits/rseq.h b/sysdeps/unix/sysv/linux/loongarch/bits/rseq.h new file mode 100644 index 0000000000..7468a05243 --- /dev/null +++ b/sysdeps/unix/sysv/linux/loongarch/bits/rseq.h @@ -0,0 +1,36 @@ +/* Restartable Sequences Linux LoongArch architecture header. + Copyright (C) 2024 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYS_RSEQ_H +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead." +#endif + +/* RSEQ_SIG is a signature required before each abort handler code. + + It is a 32-bit value that maps to actual architecture code compiled + into applications and libraries. It needs to be defined for each + architecture. When choosing this value, it needs to be taken into + account that generating invalid instructions may have ill effects on + tools like objdump, and may also have impact on the CPU speculative + execution efficiency in some cases. + + RSEQ_SIG uses the following break instruction: + + 0x002a0010 break 0x10 +*/ + +#define RSEQ_SIG 0x002a0010 diff --git a/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h b/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h new file mode 100644 index 0000000000..0062756b5c --- /dev/null +++ b/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h @@ -0,0 +1,21 @@ +/* Regularize <asm/unistd.h> definitions. LoongArch version. + Copyright (C) 2024 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* To avoid the messy usage of the fstat, newfstatat, and statx system calls, we +only use statx. */ +#undef __NR_fstat +#undef __NR_newfstatat diff --git a/sysdeps/unix/sysv/linux/loongarch/sysdep.h b/sysdeps/unix/sysv/linux/loongarch/sysdep.h index eb0ba790da..e2d853ae3e 100644 --- a/sysdeps/unix/sysv/linux/loongarch/sysdep.h +++ b/sysdeps/unix/sysv/linux/loongarch/sysdep.h @@ -119,6 +119,7 @@ #define HAVE_CLOCK_GETTIME64_VSYSCALL "__vdso_clock_gettime" #define HAVE_GETTIMEOFDAY_VSYSCALL "__vdso_gettimeofday" #define HAVE_GETCPU_VSYSCALL "__vdso_getcpu" +#define HAVE_GETRANDOM_VSYSCALL "__vdso_getrandom" #define HAVE_CLONE3_WRAPPER 1 diff --git a/sysdeps/unix/sysv/linux/nios2/arch-syscall.h b/sysdeps/unix/sysv/linux/nios2/arch-syscall.h index f94e212995..b1d0fb6f6e 100644 --- a/sysdeps/unix/sysv/linux/nios2/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/nios2/arch-syscall.h @@ -24,6 +24,7 @@ #define __NR_clock_settime 112 #define __NR_clock_settime64 404 #define __NR_clone 220 +#define __NR_clone3 435 #define __NR_close 57 #define __NR_close_range 436 #define __NR_connect 203 diff --git a/sysdeps/unix/sysv/linux/not-cancel.h b/sysdeps/unix/sysv/linux/not-cancel.h index 2a7585b73f..12f26912d3 100644 --- a/sysdeps/unix/sysv/linux/not-cancel.h +++ b/sysdeps/unix/sysv/linux/not-cancel.h @@ -27,6 +27,7 @@ #include <sys/syscall.h> #include <sys/wait.h> #include <time.h> +#include <sys/random.h> /* Non cancellable open syscall. */ __typeof (open) __open_nocancel; @@ -84,15 +85,17 @@ __writev_nocancel_nostatus (int fd, const struct iovec *iov, int iovcnt) } static inline ssize_t -__getrandom_nocancel (void *buf, size_t buflen, unsigned int flags) +__getrandom_nocancel_direct (void *buf, size_t buflen, unsigned int flags) { return INLINE_SYSCALL_CALL (getrandom, buf, buflen, flags); } +__typeof (getrandom) __getrandom_nocancel attribute_hidden; + /* Non cancellable getrandom syscall that does not also set errno in case of failure. */ static inline ssize_t -__getrandom_nocancel_nostatus (void *buf, size_t buflen, unsigned int flags) +__getrandom_nocancel_nostatus_direct (void *buf, size_t buflen, unsigned int flags) { return INTERNAL_SYSCALL_CALL (getrandom, buf, buflen, flags); } diff --git a/sysdeps/unix/sysv/linux/olddirent.h b/sysdeps/unix/sysv/linux/olddirent.h index 239f790648..065ca41a6e 100644 --- a/sysdeps/unix/sysv/linux/olddirent.h +++ b/sysdeps/unix/sysv/linux/olddirent.h @@ -34,8 +34,6 @@ extern struct __old_dirent64 *__old_readdir64 (DIR *__dirp); libc_hidden_proto (__old_readdir64); extern int __old_readdir64_r (DIR *__dirp, struct __old_dirent64 *__entry, struct __old_dirent64 **__result); -extern __ssize_t __old_getdents64 (int __fd, char *__buf, size_t __nbytes) - attribute_hidden; int __old_scandir64 (const char * __dir, struct __old_dirent64 *** __namelist, int (*__selector) (const struct __old_dirent64 *), diff --git a/sysdeps/unix/sysv/linux/pkey_get.c b/sysdeps/unix/sysv/linux/pkey_get.c index 743bd09268..ed9b6297d1 100644 --- a/sysdeps/unix/sysv/linux/pkey_get.c +++ b/sysdeps/unix/sysv/linux/pkey_get.c @@ -17,10 +17,13 @@ <https://www.gnu.org/licenses/>. */ #include <errno.h> +#include <sys/mman.h> int -pkey_get (int key) +__pkey_get (int key) { __set_errno (ENOSYS); return -1; } +libc_hidden_def (__pkey_get) +weak_alias (__pkey_get, pkey_get) diff --git a/sysdeps/unix/sysv/linux/pkey_mprotect.c b/sysdeps/unix/sysv/linux/pkey_mprotect.c index b7afa7946d..e321a7eb1b 100644 --- a/sysdeps/unix/sysv/linux/pkey_mprotect.c +++ b/sysdeps/unix/sysv/linux/pkey_mprotect.c @@ -22,7 +22,7 @@ #include <sysdep.h> int -pkey_mprotect (void *addr, size_t len, int prot, int pkey) +__pkey_mprotect (void *addr, size_t len, int prot, int pkey) { if (pkey == -1) /* If the key is -1, the system call is precisely equivalent to @@ -30,3 +30,5 @@ pkey_mprotect (void *addr, size_t len, int prot, int pkey) return __mprotect (addr, len, prot); return INLINE_SYSCALL_CALL (pkey_mprotect, addr, len, prot, pkey); } +libc_hidden_def (__pkey_mprotect) +weak_alias (__pkey_mprotect, pkey_mprotect) diff --git a/sysdeps/unix/sysv/linux/pkey_set.c b/sysdeps/unix/sysv/linux/pkey_set.c index d5d59e3031..9cb14af0ec 100644 --- a/sysdeps/unix/sysv/linux/pkey_set.c +++ b/sysdeps/unix/sysv/linux/pkey_set.c @@ -17,10 +17,13 @@ <https://www.gnu.org/licenses/>. */ #include <errno.h> +#include <sys/mman.h> int -pkey_set (int key, unsigned int access_rights) +__pkey_set (int key, unsigned int access_restrictions) { __set_errno (ENOSYS); return -1; } +libc_hidden_def (__pkey_set) +weak_alias (__pkey_set, pkey_set) diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S index 164311d2bd..e57cb6e82e 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S @@ -56,7 +56,6 @@ ENTRY (__clone) /* Save fn, args, stack across syscall. */ mr r30,r3 /* Function in r30. */ - mr r29,r5 /* Flags in r29. */ mr r31,r6 /* Argument in r31. */ /* 'flags' argument is first parameter to clone syscall. @@ -77,14 +76,12 @@ ENTRY (__clone) CHECK_SCV_SUPPORT r28 0f /* This is equivalent to DO_CALL_SCV, but we cannot use the macro here because it uses CFI directives and we just called cfi_endproc. */ - mflr r9 - std r9,FRAME_LR_SAVE(r1) + mflr r29 .machine "push" .machine "power9" scv 0 .machine "pop" - ld r9,FRAME_LR_SAVE(r1) - mtlr r9 + mtlr r29 /* Check for child process. */ /* When using scv, error is indicated by negative r3. */ diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S index 900c354c9c..913f341d7d 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone3.S @@ -39,14 +39,12 @@ because it uses CFI directives and we just called cfi_endproc. */ # define DO_CLONE3_SVC_CALL(jumpfalse) \ CHECK_SCV_SUPPORT r28 jumpfalse; \ - mflr r9; \ - std r9, FRAME_LR_SAVE(r1); \ + mflr r31; \ .machine "push"; \ .machine "power9"; \ scv 0; \ .machine "pop"; \ - ld r9, FRAME_LR_SAVE(r1); \ - mtlr r9; \ + mtlr r31; \ /* With scv an, an error is a value -4095 <= x < 0. */ \ cmpdi cr1, r3, 0; \ b 1f; @@ -66,13 +64,15 @@ ENTRY(__clone3) /* Save some regs in the "red zone". */ #ifdef USE_PPC_SCV - std r28, -24(r1) - cfi_offset (r28, -24) + std r28, -32(r1) + cfi_offset (r28, -32) #endif - std r29, -16(r1) - std r30, -8(r1) - cfi_offset (r29, -16) - cfi_offset (r30, -8) + std r29, -24(r1) + std r30, -16(r1) + std r31, -8(r1) + cfi_offset (r29, -24) + cfi_offset (r30, -16) + cfi_offset (r31, -8) /* Save func and arg across syscall. */ mr r30, r5 /* Function in r30. */ @@ -132,16 +132,19 @@ L(parent): /* Parent. Restore registers & return. */ #ifdef USE_PPC_SCV - cfi_offset (r28, -24) - ld r28, -24(r1) + cfi_offset (r28, -32) + ld r28, -32(r1) cfi_restore (r28) #endif - cfi_offset (r29,-16) - cfi_offset (r30,-8) - ld r29, -16(r1) - ld r30, -8(r1) + cfi_offset (r29,-24) + cfi_offset (r30,-16) + cfi_offset (r31,-8) + ld r29, -24(r1) + ld r30, -16(r1) + ld r31, -8(r1) cfi_restore (r29) cfi_restore (r30) + cfi_restore (r31) #ifdef USE_PPC_SCV beq cr1, 0f diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c index 217ecd004f..ac9ea3a1c1 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_get.c @@ -21,7 +21,7 @@ #include <sys/mman.h> int -pkey_get (int key) +__pkey_get (int key) { if (key < 0 || key > PKEY_MAX) { @@ -40,3 +40,5 @@ pkey_get (int key) return PKEY_DISABLE_WRITE; return 0; } +libc_hidden_def (__pkey_get) +weak_alias (__pkey_get, pkey_get) diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c index cc67b0acff..ac10b5de22 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/pkey_set.c @@ -21,7 +21,7 @@ #include <sys/mman.h> int -pkey_set (int key, unsigned int rights) +__pkey_set (int key, unsigned int rights) { if (key < 0 || key > PKEY_MAX || rights > 3) { @@ -46,3 +46,5 @@ pkey_set (int key, unsigned int rights) pkey_write (amr); return 0; } +libc_hidden_def (__pkey_set) +weak_alias (__pkey_set, pkey_set) diff --git a/sysdeps/unix/sysv/linux/powerpc/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/sysdep.h index a69b7db338..48f3d0d1b2 100644 --- a/sysdeps/unix/sysv/linux/powerpc/sysdep.h +++ b/sysdeps/unix/sysv/linux/powerpc/sysdep.h @@ -223,5 +223,6 @@ #define HAVE_TIME_VSYSCALL "__kernel_time" #define HAVE_GETTIMEOFDAY_VSYSCALL "__kernel_gettimeofday" #define HAVE_GET_TBFREQ "__kernel_get_tbfreq" +#define HAVE_GETRANDOM_VSYSCALL "__kernel_getrandom" #endif /* _LINUX_POWERPC_SYSDEP_H */ diff --git a/sysdeps/unix/sysv/linux/readdir64.c b/sysdeps/unix/sysv/linux/readdir64.c index e6f5108c0a..e6b8867b7a 100644 --- a/sysdeps/unix/sysv/linux/readdir64.c +++ b/sysdeps/unix/sysv/linux/readdir64.c @@ -26,17 +26,13 @@ #undef __readdir #undef readdir -/* Read a directory entry from DIRP. */ -struct dirent64 * -__readdir64 (DIR *dirp) +/* Read a directory entry from DIRP. No locking. */ +static struct dirent64 * +__readdir64_unlocked (DIR *dirp) { struct dirent64 *dp; int saved_errno = errno; -#if IS_IN (libc) - __libc_lock_lock (dirp->lock); -#endif - if (dirp->offset >= dirp->size) { /* We've emptied out our buffer. Refill it. */ @@ -53,9 +49,6 @@ __readdir64 (DIR *dirp) do not set errno in that case, to indicate success. */ if (bytes == 0 || errno == ENOENT) __set_errno (saved_errno); -#if IS_IN (libc) - __libc_lock_unlock (dirp->lock); -#endif return NULL; } dirp->size = (size_t) bytes; @@ -68,10 +61,16 @@ __readdir64 (DIR *dirp) dirp->offset += dp->d_reclen; dirp->filepos = dp->d_off; -#if IS_IN (libc) - __libc_lock_unlock (dirp->lock); -#endif + return dp; +} +/* Read a directory entry from DIRP. */ +struct dirent64 * +__readdir64 (DIR *dirp) +{ + __libc_lock_lock (dirp->lock); + struct dirent64 *dp = __readdir64_unlocked (dirp); + __libc_lock_unlock (dirp->lock); return dp; } libc_hidden_def (__readdir64) @@ -99,45 +98,54 @@ __old_readdir64 (DIR *dirp) struct __old_dirent64 *dp; int saved_errno = errno; -#if IS_IN (libc) __libc_lock_lock (dirp->lock); -#endif - if (dirp->offset >= dirp->size) + while (1) { - /* We've emptied out our buffer. Refill it. */ + errno = 0; + struct dirent64 *newdp = __readdir64_unlocked (dirp); + if (newdp == NULL) + { + if (errno == 0 && dirp->errcode != 0) + __set_errno (dirp->errcode); + else if (errno == 0) + __set_errno (saved_errno); + dp = NULL; + break; + } - size_t maxread = dirp->allocation; - ssize_t bytes; + /* Convert to the target layout. Use a separate struct and + memcpy to side-step aliasing issues. */ + struct __old_dirent64 result; + result.d_ino = newdp->d_ino; + result.d_off = newdp->d_off; + result.d_reclen = newdp->d_reclen; + result.d_type = newdp->d_type; - bytes = __old_getdents64 (dirp->fd, dirp->data, maxread); - if (bytes <= 0) + /* Check for ino_t overflow. */ + if (__glibc_unlikely (result.d_ino != newdp->d_ino)) { - /* Linux may fail with ENOENT on some file systems if the - directory inode is marked as dead (deleted). POSIX - treats this as a regular end-of-directory condition, so - do not set errno in that case, to indicate success. */ - if (bytes == 0 || errno == ENOENT) - __set_errno (saved_errno); -#if IS_IN (libc) - __libc_lock_unlock (dirp->lock); -#endif - return NULL; + dirp->errcode = ENAMETOOLONG; + continue; } - dirp->size = (size_t) bytes; - /* Reset the offset into the buffer. */ - dirp->offset = 0; - } + /* Overwrite the fixed-sized part. */ + dp = (struct __old_dirent64 *) newdp; + memcpy (dp, &result, offsetof (struct __old_dirent64, d_name)); - dp = (struct __old_dirent64 *) &dirp->data[dirp->offset]; - dirp->offset += dp->d_reclen; - dirp->filepos = dp->d_off; + /* Move the name. */ + _Static_assert (offsetof (struct __old_dirent64, d_name) + <= offsetof (struct dirent64, d_name), + "old struct must be smaller"); + if (offsetof (struct __old_dirent64, d_name) + != offsetof (struct dirent64, d_name)) + memmove (dp->d_name, newdp->d_name, strlen (newdp->d_name) + 1); -#if IS_IN (libc) - __libc_lock_unlock (dirp->lock); -#endif + __set_errno (saved_errno); + break; + } + __libc_lock_unlock (dirp->lock); return dp; } libc_hidden_def (__old_readdir64) diff --git a/sysdeps/unix/sysv/linux/readdir64_r.c b/sysdeps/unix/sysv/linux/readdir64_r.c index e87882ee06..c42a161ffc 100644 --- a/sysdeps/unix/sysv/linux/readdir64_r.c +++ b/sysdeps/unix/sysv/linux/readdir64_r.c @@ -37,7 +37,7 @@ __readdir64_r (DIR *dirp, struct dirent64 *entry, struct dirent64 **result) __libc_lock_lock (dirp->lock); - do + while (1) { if (dirp->offset >= dirp->size) { @@ -79,26 +79,21 @@ __readdir64_r (DIR *dirp, struct dirent64 *entry, struct dirent64 **result) dirp->filepos = dp->d_off; - if (reclen > offsetof (struct dirent64, d_name) + NAME_MAX + 1) + if (reclen <= offsetof (struct dirent64, d_name) + NAME_MAX + 1) + break; + + /* The record is very long. It could still fit into the + caller-supplied buffer if we can skip padding at the end. */ + size_t namelen = _D_EXACT_NAMLEN (dp); + if (namelen <= NAME_MAX) { - /* The record is very long. It could still fit into the - caller-supplied buffer if we can skip padding at the - end. */ - size_t namelen = _D_EXACT_NAMLEN (dp); - if (namelen <= NAME_MAX) - reclen = offsetof (struct dirent64, d_name) + namelen + 1; - else - { - /* The name is too long. Ignore this file. */ - dirp->errcode = ENAMETOOLONG; - dp->d_ino = 0; - continue; - } + reclen = offsetof (struct dirent64, d_name) + namelen + 1; + break; } - /* Skip deleted and ignored files. */ + /* The name is too long. Ignore this file. */ + dirp->errcode = ENAMETOOLONG; } - while (dp->d_ino == 0); if (dp != NULL) { @@ -135,91 +130,37 @@ attribute_compat_text_section __old_readdir64_r (DIR *dirp, struct __old_dirent64 *entry, struct __old_dirent64 **result) { - struct __old_dirent64 *dp; - size_t reclen; - const int saved_errno = errno; - int ret; - - __libc_lock_lock (dirp->lock); - - do + while (1) { - if (dirp->offset >= dirp->size) - { - /* We've emptied out our buffer. Refill it. */ - - size_t maxread = dirp->allocation; - ssize_t bytes; - - maxread = dirp->allocation; - - bytes = __old_getdents64 (dirp->fd, dirp->data, maxread); - if (bytes <= 0) - { - /* On some systems getdents fails with ENOENT when the - open directory has been rmdir'd already. POSIX.1 - requires that we treat this condition like normal EOF. */ - if (bytes < 0 && errno == ENOENT) - { - bytes = 0; - __set_errno (saved_errno); - } - if (bytes < 0) - dirp->errcode = errno; - - dp = NULL; - break; - } - dirp->size = (size_t) bytes; + struct dirent64 new_entry; + struct dirent64 *newp; + int ret = __readdir64_r (dirp, &new_entry, &newp); - /* Reset the offset into the buffer. */ - dirp->offset = 0; + if (ret != 0) + return ret; + else if (newp == NULL) + { + *result = NULL; + return 0; } - - dp = (struct __old_dirent64 *) &dirp->data[dirp->offset]; - - reclen = dp->d_reclen; - - dirp->offset += reclen; - - dirp->filepos = dp->d_off; - - if (reclen > offsetof (struct __old_dirent64, d_name) + NAME_MAX + 1) + else { - /* The record is very long. It could still fit into the - caller-supplied buffer if we can skip padding at the - end. */ - size_t namelen = _D_EXACT_NAMLEN (dp); - if (namelen <= NAME_MAX) - reclen = offsetof (struct __old_dirent64, d_name) + namelen + 1; - else + entry->d_ino = newp->d_ino; + if (entry->d_ino != newp->d_ino) { - /* The name is too long. Ignore this file. */ - dirp->errcode = ENAMETOOLONG; - dp->d_ino = 0; + dirp->errcode = EOVERFLOW; continue; } + size_t namelen = strlen (newp->d_name); + entry->d_off = newp->d_off; + entry->d_reclen = (offsetof (struct __old_dirent64, d_name) + + namelen + 1); + entry->d_type = newp->d_type; + memcpy (entry->d_name, newp->d_name, namelen + 1); + *result = entry; + return 0; } - - /* Skip deleted and ignored files. */ } - while (dp->d_ino == 0); - - if (dp != NULL) - { - *result = memcpy (entry, dp, reclen); - entry->d_reclen = reclen; - ret = 0; - } - else - { - *result = NULL; - ret = dirp->errcode; - } - - __libc_lock_unlock (dirp->lock); - - return ret; } compat_symbol (libc, __old_readdir64_r, readdir64_r, GLIBC_2_1); diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h index 7ea935b4ad..37a8f630b6 100644 --- a/sysdeps/unix/sysv/linux/rseq-internal.h +++ b/sysdeps/unix/sysv/linux/rseq-internal.h @@ -51,11 +51,21 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq) /* The initial implementation used only 20 bytes out of 32, but still expected size 32. */ size = RSEQ_AREA_SIZE_INITIAL; + + /* Initialize the rseq fields that are read by the kernel on + registration, there is no guarantee that struct pthread is + cleared on all architectures. */ + THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_UNINITIALIZED); + THREAD_SETMEM (self, rseq_area.rseq_cs, 0); + THREAD_SETMEM (self, rseq_area.flags, 0); + int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area, size, 0, RSEQ_SIG); if (!INTERNAL_SYSCALL_ERROR_P (ret)) return true; } + /* When rseq is disabled by tunables or the registration fails, inform + userspace by setting 'cpu_id' to RSEQ_CPU_ID_REGISTRATION_FAILED. */ THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); return false; } diff --git a/sysdeps/unix/sysv/linux/s390/sysdep.h b/sysdeps/unix/sysv/linux/s390/sysdep.h index 9b3000ca62..9698c57a03 100644 --- a/sysdeps/unix/sysv/linux/s390/sysdep.h +++ b/sysdeps/unix/sysv/linux/s390/sysdep.h @@ -72,6 +72,7 @@ #ifdef __s390x__ #define HAVE_CLOCK_GETRES64_VSYSCALL "__kernel_clock_getres" #define HAVE_CLOCK_GETTIME64_VSYSCALL "__kernel_clock_gettime" +#define HAVE_GETRANDOM_VSYSCALL "__kernel_getrandom" #else #define HAVE_CLOCK_GETRES_VSYSCALL "__kernel_clock_getres" #define HAVE_CLOCK_GETTIME_VSYSCALL "__kernel_clock_gettime" diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S index c9cf9bb055..c84244f56b 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S @@ -29,7 +29,11 @@ ENTRY (__clone) save %sp,-96,%sp save %sp,-96,%sp +#ifdef __sparcv9 flushw +#else + ta 3 +#endif restore cfi_def_cfa_register(%fp) cfi_window_save diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S b/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S index aa5c658ce1..45c6ae7a86 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/syscall_cancel.S @@ -42,15 +42,19 @@ __syscall_cancel_arch_start: __syscall_do_cancel() */ ld [%i0], %g2 andcc %g2, TCB_CANCELED_BITMASK, %g0 +#ifdef __sparcv9 bne,pn %icc, 2f +#else + bne 2f +#endif /* Issue a 6 argument syscall. */ - mov %i1, %g1 - mov %i2, %o0 + mov %i2, %o0 mov %i3, %o1 mov %i4, %o2 mov %i5, %o3 ld [%fp+92], %o4 ld [%fp+96], %o5 + mov %i1, %g1 ta 0x10 .globl __syscall_cancel_arch_end @@ -60,8 +64,13 @@ __syscall_cancel_arch_end: sub %g0, %o0, %o0 1: mov %o0, %i0 +#ifdef __sparcv9 return %i7+8 nop +#else + jmp %i7+8 + restore +#endif 2: call __syscall_do_cancel, 0 diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h index d2d68f5312..c2ffbb5c8f 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h @@ -107,6 +107,7 @@ ENTRY(name); \ #else /* __ASSEMBLER__ */ #define __SYSCALL_STRING \ + "mov %[scn], %%g1;" \ "ta 0x10;" \ "bcc 1f;" \ " nop;" \ @@ -114,7 +115,7 @@ ENTRY(name); \ "1:" #define __SYSCALL_CLOBBERS \ - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S b/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S index 21b0728d5a..6c8d1330cb 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/syscall_cancel.S @@ -46,13 +46,13 @@ __syscall_cancel_arch_start: andcc %g2, TCB_CANCELED_BITMASK, %g0 bne,pn %xcc, 2f /* Issue a 6 argument syscall. */ - mov %i1, %g1 - mov %i2, %o0 + mov %i2, %o0 mov %i3, %o1 mov %i4, %o2 mov %i5, %o3 ldx [%fp + STACK_BIAS + 176], %o4 ldx [%fp + STACK_BIAS + 184], %o5 + mov %i1, %g1 ta 0x6d .global __syscall_cancel_arch_end diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h index 96047424e9..5598fab08a 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h @@ -106,6 +106,7 @@ ENTRY(name); \ #else /* __ASSEMBLER__ */ #define __SYSCALL_STRING \ + "mov %[scn], %%g1;" \ "ta 0x6d;" \ "bcc,pt %%xcc, 1f;" \ " nop;" \ @@ -113,7 +114,7 @@ ENTRY(name); \ "1:" #define __SYSCALL_CLOBBERS \ - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ diff --git a/sysdeps/unix/sysv/linux/sparc/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sysdep.h index dcabb57fe2..c287740a8c 100644 --- a/sysdeps/unix/sysv/linux/sparc/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sysdep.h @@ -50,97 +50,109 @@ #undef INTERNAL_SYSCALL_NCS #define INTERNAL_SYSCALL_NCS(name, nr, args...) \ - internal_syscall##nr(__SYSCALL_STRING, name, args) + _internal_syscall##nr(__SYSCALL_STRING, "p", name, args) -#define internal_syscall0(string,name,dummy...) \ +#define _internal_syscall0(string,nc,name,dummy...) \ ({ \ - register long int __g1 __asm__ ("g1") = (name); \ register long __o0 __asm__ ("o0"); \ + long int _name = (long int) (name); \ __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1) : \ + [scn] nc (_name) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall0(string,name,args...) \ + _internal_syscall0(string, "i", name, args) -#define internal_syscall1(string,name,arg1) \ +#define _internal_syscall1(string,nc,name,arg1) \ ({ \ long int _arg1 = (long int) (arg1); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0) : \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall1(string,name,args...) \ + _internal_syscall1(string, "i", name, args) -#define internal_syscall2(string,name,arg1,arg2) \ +#define _internal_syscall2(string,nc,name,arg1,arg2) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1) : \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall2(string,name,args...) \ + _internal_syscall2(string, "i", name, args) -#define internal_syscall3(string,name,arg1,arg2,arg3) \ +#define _internal_syscall3(string,nc,name,arg1,arg2,arg3) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall3(string,name,args...) \ + _internal_syscall3(string, "i", name, args) -#define internal_syscall4(string,name,arg1,arg2,arg3,arg4) \ +#define _internal_syscall4(string,nc,name,arg1,arg2,arg3,arg4) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall4(string,name,args...) \ + _internal_syscall4(string, "i", name, args) -#define internal_syscall5(string,name,arg1,arg2,arg3,arg4,arg5) \ +#define _internal_syscall5(string,nc,name,arg1,arg2,arg3,arg4,arg5) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall5(string,name,args...) \ + _internal_syscall5(string, "i", name, args) -#define internal_syscall6(string,name,arg1,arg2,arg3,arg4,arg5,arg6) \ +#define _internal_syscall6(string,nc,name,arg1,arg2,arg3,arg4,arg5,arg6)\ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ @@ -148,20 +160,22 @@ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ long int _arg6 = (long int) (arg6); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ register long int __o5 __asm__ ("o5") = _arg6; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4), \ "r" (__o5) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall6(string,name,args...) \ + _internal_syscall6(string, "i", name, args) #define INLINE_CLONE_SYSCALL(arg1,arg2,arg3,arg4,arg5) \ ({ \ @@ -170,15 +184,15 @@ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ + long int _name = __NR_clone; \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ - register long int __g1 __asm__ ("g1") = __NR_clone; \ __asm __volatile (__SYSCALL_STRING : \ "=r" (__o0), "=r" (__o1) : \ - "r" (__g1), "0" (__o0), "1" (__o1), \ + [scn] "i" (_name), "0" (__o0), "1" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ if (__glibc_unlikely ((unsigned long int) (__o0) > -4096UL)) \ diff --git a/sysdeps/unix/sysv/linux/spawni.c b/sysdeps/unix/sysv/linux/spawni.c index f57e92815e..1edf62c6d7 100644 --- a/sysdeps/unix/sysv/linux/spawni.c +++ b/sysdeps/unix/sysv/linux/spawni.c @@ -383,7 +383,11 @@ __spawnix (int *pid, const char *file, args.pidfd = 0; args.xflags = xflags; - internal_signal_block_all (&args.oldmask); + /* Avoid the potential issues if caller sets a SIG_IGN for SIGABRT, calls + abort, and another thread issues posix_spawn just after the sigaction + returns. With default options (not setting POSIX_SPAWN_SETSIGDEF), the + process can still see SIG_DFL for SIGABRT, where it should be SIG_IGN. */ + __abort_lock_rdlock (&args.oldmask); /* The clone flags used will create a new child that will run in the same memory space (CLONE_VM) and the execution of calling thread will be @@ -474,7 +478,7 @@ __spawnix (int *pid, const char *file, if ((ec == 0) && (pid != NULL)) *pid = use_pidfd ? args.pidfd : new_pid; - internal_signal_restore_set (&args.oldmask); + __abort_lock_unlock (&args.oldmask); __pthread_setcancelstate (state, NULL); diff --git a/sysdeps/unix/sysv/linux/sys/pidfd.h b/sysdeps/unix/sysv/linux/sys/pidfd.h index 9f88d297e8..85d976939b 100644 --- a/sysdeps/unix/sysv/linux/sys/pidfd.h +++ b/sysdeps/unix/sysv/linux/sys/pidfd.h @@ -20,6 +20,7 @@ #include <fcntl.h> #include <bits/types/siginfo_t.h> +#include <sys/ioctl.h> #define PIDFD_NONBLOCK O_NONBLOCK #define PIDFD_THREAD O_EXCL @@ -28,6 +29,19 @@ #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) +#define PIDFS_IOCTL_MAGIC 0xFF + +#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) +#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) +#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) +#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) +#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) +#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) +#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) +#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) +#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) +#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) + /* Returns a file descriptor that refers to the process PID. The close-on-exec is set on the file descriptor. */ extern int pidfd_open (__pid_t __pid, unsigned int __flags) __THROW; diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list index 7871f93b94..aa5b479e2a 100644 --- a/sysdeps/unix/sysv/linux/syscall-names.list +++ b/sysdeps/unix/sysv/linux/syscall-names.list @@ -21,8 +21,8 @@ # This file can list all potential system calls. The names are only # used if the installed kernel headers also provide them. -# The list of system calls is current as of Linux 6.10. -kernel 6.10 +# The list of system calls is current as of Linux 6.11. +kernel 6.11 FAST_atomic_update FAST_cmpxchg @@ -653,6 +653,7 @@ uname unlink unlinkat unshare +uretprobe uselib userfaultfd usr26 diff --git a/sysdeps/unix/sysv/linux/syscalls.list b/sysdeps/unix/sysv/linux/syscalls.list index 9ac42c3436..f1cfe8dc13 100644 --- a/sysdeps/unix/sysv/linux/syscalls.list +++ b/sysdeps/unix/sysv/linux/syscalls.list @@ -102,8 +102,8 @@ name_to_handle_at EXTRA name_to_handle_at i:isppi name_to_handle_at setns EXTRA setns i:ii setns memfd_create EXTRA memfd_create i:si memfd_create -pkey_alloc EXTRA pkey_alloc i:ii pkey_alloc -pkey_free EXTRA pkey_free i:i pkey_free +pkey_alloc EXTRA pkey_alloc i:ii __pkey_alloc pkey_alloc +pkey_free EXTRA pkey_free i:i __pkey_free pkey_free gettid EXTRA gettid Ei: __gettid gettid tgkill EXTRA tgkill i:iii __tgkill tgkill close_range - close_range i:iii __close_range close_range diff --git a/sysdeps/unix/sysv/linux/time.c b/sysdeps/unix/sysv/linux/time.c index f8b0cee41c..a56ef6fa30 100644 --- a/sysdeps/unix/sysv/linux/time.c +++ b/sysdeps/unix/sysv/linux/time.c @@ -33,11 +33,10 @@ time_syscall (time_t *t) } # undef INIT_ARCH -# define INIT_ARCH() \ - void *vdso_time = dl_vdso_vsym (HAVE_TIME_VSYSCALL); +# define INIT_ARCH() libc_ifunc (time, - vdso_time ? VDSO_IFUNC_RET (vdso_time) - : (void *) time_syscall); + GLRO(dl_vdso_time) != NULL ? VDSO_IFUNC_RET (GLRO(dl_vdso_time)) + : (void *) time_syscall); # else time_t diff --git a/sysdeps/unix/sysv/linux/tst-mman-consts.py b/sysdeps/unix/sysv/linux/tst-mman-consts.py index a1137eb1d5..2191bea36f 100644 --- a/sysdeps/unix/sysv/linux/tst-mman-consts.py +++ b/sysdeps/unix/sysv/linux/tst-mman-consts.py @@ -33,7 +33,7 @@ def main(): help='C compiler (including options) to use') args = parser.parse_args() linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) - linux_version_glibc = (6, 10) + linux_version_glibc = (6, 11) sys.exit(glibcextract.compare_macro_consts( '#define _GNU_SOURCE 1\n' '#include <sys/mman.h>\n', diff --git a/sysdeps/unix/sysv/linux/tst-mount-consts.py b/sysdeps/unix/sysv/linux/tst-mount-consts.py index 675f1790b6..b71d8a489d 100755 --- a/sysdeps/unix/sysv/linux/tst-mount-consts.py +++ b/sysdeps/unix/sysv/linux/tst-mount-consts.py @@ -42,7 +42,7 @@ def main(): # Constants in glibc were updated to match Linux v6.10. When glibc # constants are updated this value should be updated to match the # released kernel version from which the constants were taken. - linux_version_glibc = (6, 10) + linux_version_glibc = (6, 11) def check(cte, exclude=None): return glibcextract.compare_macro_consts( '#include <sys/mount.h>\n', diff --git a/sysdeps/unix/sysv/linux/tst-pidfd-consts.py b/sysdeps/unix/sysv/linux/tst-pidfd-consts.py index 9824fd214d..9d53102cbd 100644 --- a/sysdeps/unix/sysv/linux/tst-pidfd-consts.py +++ b/sysdeps/unix/sysv/linux/tst-pidfd-consts.py @@ -39,7 +39,7 @@ def main(): sys.exit (77) linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) - linux_version_glibc = (6, 10) + linux_version_glibc = (6, 11) sys.exit(glibcextract.compare_macro_consts( '#include <sys/pidfd.h>\n', '#include <asm/fcntl.h>\n' diff --git a/sysdeps/unix/sysv/linux/tst-sched-consts.py b/sysdeps/unix/sysv/linux/tst-sched-consts.py index 70071dcd97..f06ac400bd 100644 --- a/sysdeps/unix/sysv/linux/tst-sched-consts.py +++ b/sysdeps/unix/sysv/linux/tst-sched-consts.py @@ -33,7 +33,7 @@ def main(): help='C compiler (including options) to use') args = parser.parse_args() linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) - linux_version_glibc = (6, 10) + linux_version_glibc = (6, 11) sys.exit(glibcextract.compare_macro_consts( '#define _GNU_SOURCE 1\n' '#include <sched.h>\n', diff --git a/sysdeps/unix/sysv/linux/tst-syscall-restart.c b/sysdeps/unix/sysv/linux/tst-syscall-restart.c new file mode 100644 index 0000000000..0ee7dc8517 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-syscall-restart.c @@ -0,0 +1,112 @@ +/* Test if a syscall is correctly restarted. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <support/xsignal.h> +#include <support/check.h> +#include <support/process_state.h> +#include <support/xunistd.h> +#include <support/xthread.h> +#include <sys/wait.h> + +static int +check_pid (pid_t pid) +{ + /* Wait until the child has called pause and it blocking on kernel. */ + support_process_state_wait (pid, support_process_state_sleeping); + + TEST_COMPARE (kill (pid, SIGSTOP), 0); + + /* Adding process_state_tracing_stop ('t') allows the test to work under + trace programs such as ptrace. */ + support_process_state_wait (pid, support_process_state_stopped + | support_process_state_tracing_stop); + + TEST_COMPARE (kill (pid, SIGCONT), 0); + + enum support_process_state state + = support_process_state_wait (pid, support_process_state_sleeping + | support_process_state_zombie); + + TEST_COMPARE (state, support_process_state_sleeping); + + TEST_COMPARE (kill (pid, SIGTERM), 0); + + siginfo_t info; + TEST_COMPARE (waitid (P_PID, pid, &info, WEXITED), 0); + TEST_COMPARE (info.si_signo, SIGCHLD); + TEST_COMPARE (info.si_code, CLD_KILLED); + TEST_COMPARE (info.si_status, SIGTERM); + TEST_COMPARE (info.si_pid, pid); + + return 0; +} + +static void * +tf (void *closure) +{ + pause (); + return NULL; +} + +static void +child_mt (void) +{ + /* Let only the created thread to handle signals. */ + sigset_t set; + sigfillset (&set); + xpthread_sigmask (SIG_BLOCK, &set, NULL); + + sigdelset (&set, SIGSTOP); + sigdelset (&set, SIGCONT); + sigdelset (&set, SIGTERM); + + pthread_attr_t attr; + xpthread_attr_init (&attr); + TEST_COMPARE (pthread_attr_setsigmask_np (&attr, &set), 0); + + xpthread_join (xpthread_create (&attr, tf, NULL)); +} + +static void +do_test_syscall (bool multithread) +{ + pid_t pid = xfork (); + if (pid == 0) + { + if (multithread) + child_mt (); + else + pause (); + _exit (127); + } + + check_pid (pid); +} + +static int +do_test (void) +{ + /* Check for both single and multi thread, since they use different syscall + mechanisms. */ + do_test_syscall (false); + do_test_syscall (true); + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/unix/sysv/linux/x86/pkey_get.c b/sysdeps/unix/sysv/linux/x86/pkey_get.c index eb988a5c45..0fb52ba18c 100644 --- a/sysdeps/unix/sysv/linux/x86/pkey_get.c +++ b/sysdeps/unix/sysv/linux/x86/pkey_get.c @@ -18,9 +18,10 @@ #include <arch-pkey.h> #include <errno.h> +#include <sys/mman.h> int -pkey_get (int key) +__pkey_get (int key) { if (key < 0 || key > 15) { @@ -31,3 +32,5 @@ pkey_get (int key) return (pkru >> (2 * key)) & 3; return 0; } +libc_hidden_def (__pkey_get) +weak_alias (__pkey_get, pkey_get) diff --git a/sysdeps/unix/sysv/linux/x86/pkey_set.c b/sysdeps/unix/sysv/linux/x86/pkey_set.c index 3fd2c95580..51e3d68d92 100644 --- a/sysdeps/unix/sysv/linux/x86/pkey_set.c +++ b/sysdeps/unix/sysv/linux/x86/pkey_set.c @@ -18,9 +18,10 @@ #include <arch-pkey.h> #include <errno.h> +#include <sys/mman.h> int -pkey_set (int key, unsigned int rights) +__pkey_set (int key, unsigned int rights) { if (key < 0 || key > 15 || rights > 3) { @@ -33,3 +34,5 @@ pkey_set (int key, unsigned int rights) pkey_write (pkru); return 0; } +libc_hidden_def (__pkey_set) +weak_alias (__pkey_set, pkey_set) diff --git a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h index 5d86e75dd5..dfc10d0c7e 100644 --- a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h @@ -359,6 +359,7 @@ #define __NR_unlink 87 #define __NR_unlinkat 263 #define __NR_unshare 272 +#define __NR_uretprobe 335 #define __NR_uselib 134 #define __NR_userfaultfd 323 #define __NR_ustat 136 diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h index a2b021bd86..7dc072ae2d 100644 --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h @@ -376,6 +376,7 @@ # define HAVE_TIME_VSYSCALL "__vdso_time" # define HAVE_GETCPU_VSYSCALL "__vdso_getcpu" # define HAVE_CLOCK_GETRES64_VSYSCALL "__vdso_clock_getres" +# define HAVE_GETRANDOM_VSYSCALL "__vdso_getrandom" # define HAVE_CLONE3_WRAPPER 1 diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h index 6c35068d8e..9340daa967 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h @@ -351,6 +351,7 @@ #define __NR_unlink 1073741911 #define __NR_unlinkat 1073742087 #define __NR_unshare 1073742096 +#define __NR_uretprobe 1073742159 #define __NR_userfaultfd 1073742147 #define __NR_ustat 1073741960 #define __NR_utime 1073741956 |