From 7fe9e2e089f4990b7d18d0798f591ab276b15f2b Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Fri, 5 Jun 2015 10:50:38 +0200 Subject: posix_fallocate: Emulation fixes and documentation [BZ #15661] Handle signed integer overflow correctly. Detect and reject O_APPEND. Document drawbacks of emulation. This does not completely address bug 15661, but improves the situation somewhat. --- sysdeps/posix/posix_fallocate.c | 67 ++++++++++++++++++++++++++++----------- sysdeps/posix/posix_fallocate64.c | 67 ++++++++++++++++++++++++++++----------- 2 files changed, 96 insertions(+), 38 deletions(-) (limited to 'sysdeps/posix') diff --git a/sysdeps/posix/posix_fallocate.c b/sysdeps/posix/posix_fallocate.c index d15d60372f..e7fe201b68 100644 --- a/sysdeps/posix/posix_fallocate.c +++ b/sysdeps/posix/posix_fallocate.c @@ -18,26 +18,36 @@ #include #include #include +#include +#include #include #include -/* Reserve storage for the data of the file associated with FD. */ +/* Reserve storage for the data of the file associated with FD. This + emulation is far from perfect, but the kernel cannot do not much + better for network file systems, either. */ int posix_fallocate (int fd, __off_t offset, __off_t len) { struct stat64 st; - struct statfs f; - /* `off_t' is a signed type. Therefore we can determine whether - OFFSET + LEN is too large if it is a negative value. */ if (offset < 0 || len < 0) return EINVAL; - if (offset + len < 0) + + /* Perform overflow check. The outer cast relies on a GCC + extension. */ + if ((__off_t) ((uint64_t) offset) + ((uint64_t) len) < 0) return EFBIG; - /* First thing we have to make sure is that this is really a regular - file. */ + /* pwrite below will not do the right thing in O_APPEND mode. */ + { + int flags = __fcntl (fd, F_GETFL, 0); + if (flags < 0 || (flags & O_APPEND) != 0) + return EBADF; + } + + /* We have to make sure that this is really a regular file. */ if (__fxstat64 (_STAT_VER, fd, &st) != 0) return EBADF; if (S_ISFIFO (st.st_mode)) @@ -47,6 +57,8 @@ posix_fallocate (int fd, __off_t offset, __off_t len) if (len == 0) { + /* This is racy, but there is no good way to satisfy a + zero-length allocation request. */ if (st.st_size < offset) { int ret = __ftruncate (fd, offset); @@ -58,19 +70,36 @@ posix_fallocate (int fd, __off_t offset, __off_t len) return 0; } - /* We have to know the block size of the filesystem to get at least some - sort of performance. */ - if (__fstatfs (fd, &f) != 0) - return errno; - - /* Try to play safe. */ - if (f.f_bsize == 0) - f.f_bsize = 512; - - /* Write something to every block. */ - for (offset += (len - 1) % f.f_bsize; len > 0; offset += f.f_bsize) + /* Minimize data transfer for network file systems, by issuing + single-byte write requests spaced by the file system block size. + (Most local file systems have fallocate support, so this fallback + code is not used there.) */ + + unsigned increment; + { + struct statfs64 f; + + if (__fstatfs64 (fd, &f) != 0) + return errno; + if (f.f_bsize == 0) + increment = 512; + else if (f.f_bsize < 4096) + increment = f.f_bsize; + else + /* NFS does not propagate the block size of the underlying + storage and may report a much larger value which would still + leave holes after the loop below, so we cap the increment at + 4096. */ + increment = 4096; + } + + /* Write a null byte to every block. This is racy; we currently + lack a better option. Compare-and-swap against a file mapping + might additional local races, but requires interposition of a + signal handler to catch SIGBUS. */ + for (offset += (len - 1) % increment; len > 0; offset += increment) { - len -= f.f_bsize; + len -= increment; if (offset < st.st_size) { diff --git a/sysdeps/posix/posix_fallocate64.c b/sysdeps/posix/posix_fallocate64.c index b845df7a80..ee32679a05 100644 --- a/sysdeps/posix/posix_fallocate64.c +++ b/sysdeps/posix/posix_fallocate64.c @@ -18,26 +18,36 @@ #include #include #include +#include +#include #include #include -/* Reserve storage for the data of the file associated with FD. */ +/* Reserve storage for the data of the file associated with FD. This + emulation is far from perfect, but the kernel cannot do not much + better for network file systems, either. */ int __posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len) { struct stat64 st; - struct statfs64 f; - /* `off64_t' is a signed type. Therefore we can determine whether - OFFSET + LEN is too large if it is a negative value. */ if (offset < 0 || len < 0) return EINVAL; - if (offset + len < 0) + + /* Perform overflow check. The outer cast relies on a GCC + extension. */ + if ((__off64_t) ((uint64_t) offset) + ((uint64_t) len) < 0) return EFBIG; - /* First thing we have to make sure is that this is really a regular - file. */ + /* pwrite64 below will not do the right thing in O_APPEND mode. */ + { + int flags = __fcntl (fd, F_GETFL, 0); + if (flags < 0 || (flags & O_APPEND) != 0) + return EBADF; + } + + /* We have to make sure that this is really a regular file. */ if (__fxstat64 (_STAT_VER, fd, &st) != 0) return EBADF; if (S_ISFIFO (st.st_mode)) @@ -47,6 +57,8 @@ __posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len) if (len == 0) { + /* This is racy, but there is no good way to satisfy a + zero-length allocation request. */ if (st.st_size < offset) { int ret = __ftruncate64 (fd, offset); @@ -58,19 +70,36 @@ __posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len) return 0; } - /* We have to know the block size of the filesystem to get at least some - sort of performance. */ - if (__fstatfs64 (fd, &f) != 0) - return errno; - - /* Try to play safe. */ - if (f.f_bsize == 0) - f.f_bsize = 512; - - /* Write something to every block. */ - for (offset += (len - 1) % f.f_bsize; len > 0; offset += f.f_bsize) + /* Minimize data transfer for network file systems, by issuing + single-byte write requests spaced by the file system block size. + (Most local file systems have fallocate support, so this fallback + code is not used there.) */ + + unsigned increment; + { + struct statfs64 f; + + if (__fstatfs64 (fd, &f) != 0) + return errno; + if (f.f_bsize == 0) + increment = 512; + else if (f.f_bsize < 4096) + increment = f.f_bsize; + else + /* NFS clients do not propagate the block size of the underlying + storage and may report a much larger value which would still + leave holes after the loop below, so we cap the increment at + 4096. */ + increment = 4096; + } + + /* Write a null byte to every block. This is racy; we currently + lack a better option. Compare-and-swap against a file mapping + might address local races, but requires interposition of a signal + handler to catch SIGBUS. */ + for (offset += (len - 1) % increment; len > 0; offset += increment) { - len -= f.f_bsize; + len -= increment; if (offset < st.st_size) { -- cgit 1.4.1