Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'vfs-6.10.rw' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs rw iterator updates from Christian Brauner:
"The core fs signalfd, userfaultfd, and timerfd subsystems did still
use f_op->read() instead of f_op->read_iter(). Convert them over since
we should aim to get rid of f_op->read() at some point.

Aside from that io_uring and others want to mark files as FMODE_NOWAIT
so it can make use of per-IO nonblocking hints to enable more
efficient IO. Converting those users to f_op->read_iter() allows them
to be marked with FMODE_NOWAIT"

* tag 'vfs-6.10.rw' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
signalfd: convert to ->read_iter()
userfaultfd: convert to ->read_iter()
timerfd: convert to ->read_iter()
new helper: copy_to_iter_full()

+93 -50
+28 -16
fs/signalfd.c
··· 68 68 /* 69 69 * Copied from copy_siginfo_to_user() in kernel/signal.c 70 70 */ 71 - static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, 72 - kernel_siginfo_t const *kinfo) 71 + static int signalfd_copyinfo(struct iov_iter *to, kernel_siginfo_t const *kinfo) 73 72 { 74 73 struct signalfd_siginfo new; 75 74 ··· 145 146 break; 146 147 } 147 148 148 - if (copy_to_user(uinfo, &new, sizeof(struct signalfd_siginfo))) 149 + if (!copy_to_iter_full(&new, sizeof(struct signalfd_siginfo), to)) 149 150 return -EFAULT; 150 151 151 - return sizeof(*uinfo); 152 + return sizeof(struct signalfd_siginfo); 152 153 } 153 154 154 155 static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info, ··· 198 199 * error code. The "count" parameter must be at least the size of a 199 200 * "struct signalfd_siginfo". 200 201 */ 201 - static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, 202 - loff_t *ppos) 202 + static ssize_t signalfd_read_iter(struct kiocb *iocb, struct iov_iter *to) 203 203 { 204 + struct file *file = iocb->ki_filp; 204 205 struct signalfd_ctx *ctx = file->private_data; 205 - struct signalfd_siginfo __user *siginfo; 206 - int nonblock = file->f_flags & O_NONBLOCK; 206 + size_t count = iov_iter_count(to); 207 207 ssize_t ret, total = 0; 208 208 kernel_siginfo_t info; 209 + bool nonblock; 209 210 210 211 count /= sizeof(struct signalfd_siginfo); 211 212 if (!count) 212 213 return -EINVAL; 213 214 214 - siginfo = (struct signalfd_siginfo __user *) buf; 215 + nonblock = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT; 215 216 do { 216 217 ret = signalfd_dequeue(ctx, &info, nonblock); 217 218 if (unlikely(ret <= 0)) 218 219 break; 219 - ret = signalfd_copyinfo(siginfo, &info); 220 + ret = signalfd_copyinfo(to, &info); 220 221 if (ret < 0) 221 222 break; 222 - siginfo++; 223 223 total += ret; 224 224 nonblock = 1; 225 225 } while (--count); ··· 244 246 #endif 245 247 .release = signalfd_release, 246 248 .poll = signalfd_poll, 247 - .read = signalfd_read, 249 + .read_iter = signalfd_read_iter, 248 250 .llseek = noop_llseek, 249 251 }; 250 252 ··· 263 265 signotset(mask); 264 266 265 267 if (ufd == -1) { 268 + struct file *file; 269 + 266 270 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 267 271 if (!ctx) 268 272 return -ENOMEM; 269 273 270 274 ctx->sigmask = *mask; 271 275 276 + ufd = get_unused_fd_flags(flags & O_CLOEXEC); 277 + if (ufd < 0) { 278 + kfree(ctx); 279 + return ufd; 280 + } 281 + 282 + file = anon_inode_getfile("[signalfd]", &signalfd_fops, ctx, 283 + O_RDWR | (flags & O_NONBLOCK)); 284 + if (IS_ERR(file)) { 285 + put_unused_fd(ufd); 286 + kfree(ctx); 287 + return ufd; 288 + } 289 + file->f_mode |= FMODE_NOWAIT; 290 + 272 291 /* 273 292 * When we call this, the initialization must be complete, since 274 293 * anon_inode_getfd() will install the fd. 275 294 */ 276 - ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx, 277 - O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK))); 278 - if (ufd < 0) 279 - kfree(ctx); 295 + fd_install(ufd, file); 280 296 } else { 281 297 struct fd f = fdget(ufd); 282 298 if (!f.file)
+26 -10
fs/timerfd.c
··· 262 262 return events; 263 263 } 264 264 265 - static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, 266 - loff_t *ppos) 265 + static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to) 267 266 { 267 + struct file *file = iocb->ki_filp; 268 268 struct timerfd_ctx *ctx = file->private_data; 269 269 ssize_t res; 270 270 u64 ticks = 0; 271 271 272 - if (count < sizeof(ticks)) 272 + if (iov_iter_count(to) < sizeof(ticks)) 273 273 return -EINVAL; 274 + 274 275 spin_lock_irq(&ctx->wqh.lock); 275 - if (file->f_flags & O_NONBLOCK) 276 + if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT) 276 277 res = -EAGAIN; 277 278 else 278 279 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); ··· 313 312 ctx->ticks = 0; 314 313 } 315 314 spin_unlock_irq(&ctx->wqh.lock); 316 - if (ticks) 317 - res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks); 315 + if (ticks) { 316 + res = copy_to_iter(&ticks, sizeof(ticks), to); 317 + if (!res) 318 + res = -EFAULT; 319 + } 318 320 return res; 319 321 } 320 322 ··· 388 384 static const struct file_operations timerfd_fops = { 389 385 .release = timerfd_release, 390 386 .poll = timerfd_poll, 391 - .read = timerfd_read, 387 + .read_iter = timerfd_read_iter, 392 388 .llseek = noop_llseek, 393 389 .show_fdinfo = timerfd_show, 394 390 .unlocked_ioctl = timerfd_ioctl, ··· 411 407 { 412 408 int ufd; 413 409 struct timerfd_ctx *ctx; 410 + struct file *file; 414 411 415 412 /* Check the TFD_* constants for consistency. */ 416 413 BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); ··· 448 443 449 444 ctx->moffs = ktime_mono_to_real(0); 450 445 451 - ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 452 - O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); 453 - if (ufd < 0) 446 + ufd = get_unused_fd_flags(flags & TFD_SHARED_FCNTL_FLAGS); 447 + if (ufd < 0) { 454 448 kfree(ctx); 449 + return ufd; 450 + } 455 451 452 + file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx, 453 + O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); 454 + if (IS_ERR(file)) { 455 + put_unused_fd(ufd); 456 + kfree(ctx); 457 + return PTR_ERR(file); 458 + } 459 + 460 + file->f_mode |= FMODE_NOWAIT; 461 + fd_install(ufd, file); 456 462 return ufd; 457 463 } 458 464
+28 -16
fs/userfaultfd.c
··· 31 31 #include <linux/hugetlb.h> 32 32 #include <linux/swapops.h> 33 33 #include <linux/miscdevice.h> 34 + #include <linux/uio.h> 34 35 35 36 static int sysctl_unprivileged_userfaultfd __read_mostly; 36 37 ··· 283 282 /* 284 283 * Verify the pagetables are still not ok after having reigstered into 285 284 * the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any 286 - * userfault that has already been resolved, if userfaultfd_read and 285 + * userfault that has already been resolved, if userfaultfd_read_iter and 287 286 * UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different 288 287 * threads. 289 288 */ ··· 1182 1181 return ret; 1183 1182 } 1184 1183 1185 - static ssize_t userfaultfd_read(struct file *file, char __user *buf, 1186 - size_t count, loff_t *ppos) 1184 + static ssize_t userfaultfd_read_iter(struct kiocb *iocb, struct iov_iter *to) 1187 1185 { 1186 + struct file *file = iocb->ki_filp; 1188 1187 struct userfaultfd_ctx *ctx = file->private_data; 1189 1188 ssize_t _ret, ret = 0; 1190 1189 struct uffd_msg msg; 1191 - int no_wait = file->f_flags & O_NONBLOCK; 1192 1190 struct inode *inode = file_inode(file); 1191 + bool no_wait; 1193 1192 1194 1193 if (!userfaultfd_is_initialized(ctx)) 1195 1194 return -EINVAL; 1196 1195 1196 + no_wait = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT; 1197 1197 for (;;) { 1198 - if (count < sizeof(msg)) 1198 + if (iov_iter_count(to) < sizeof(msg)) 1199 1199 return ret ? ret : -EINVAL; 1200 1200 _ret = userfaultfd_ctx_read(ctx, no_wait, &msg, inode); 1201 1201 if (_ret < 0) 1202 1202 return ret ? ret : _ret; 1203 - if (copy_to_user((__u64 __user *) buf, &msg, sizeof(msg))) 1203 + _ret = !copy_to_iter_full(&msg, sizeof(msg), to); 1204 + if (_ret) 1204 1205 return ret ? ret : -EFAULT; 1205 1206 ret += sizeof(msg); 1206 - buf += sizeof(msg); 1207 - count -= sizeof(msg); 1208 1207 /* 1209 1208 * Allow to read more than one fault at time but only 1210 1209 * block if waiting for the very first one. 1211 1210 */ 1212 - no_wait = O_NONBLOCK; 1211 + no_wait = true; 1213 1212 } 1214 1213 } 1215 1214 ··· 2177 2176 #endif 2178 2177 .release = userfaultfd_release, 2179 2178 .poll = userfaultfd_poll, 2180 - .read = userfaultfd_read, 2179 + .read_iter = userfaultfd_read_iter, 2181 2180 .unlocked_ioctl = userfaultfd_ioctl, 2182 2181 .compat_ioctl = compat_ptr_ioctl, 2183 2182 .llseek = noop_llseek, ··· 2197 2196 static int new_userfaultfd(int flags) 2198 2197 { 2199 2198 struct userfaultfd_ctx *ctx; 2199 + struct file *file; 2200 2200 int fd; 2201 2201 2202 2202 BUG_ON(!current->mm); ··· 2221 2219 init_rwsem(&ctx->map_changing_lock); 2222 2220 atomic_set(&ctx->mmap_changing, 0); 2223 2221 ctx->mm = current->mm; 2224 - /* prevent the mm struct to be freed */ 2225 - mmgrab(ctx->mm); 2222 + 2223 + fd = get_unused_fd_flags(flags & UFFD_SHARED_FCNTL_FLAGS); 2224 + if (fd < 0) 2225 + goto err_out; 2226 2226 2227 2227 /* Create a new inode so that the LSM can block the creation. */ 2228 - fd = anon_inode_create_getfd("[userfaultfd]", &userfaultfd_fops, ctx, 2228 + file = anon_inode_create_getfile("[userfaultfd]", &userfaultfd_fops, ctx, 2229 2229 O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL); 2230 - if (fd < 0) { 2231 - mmdrop(ctx->mm); 2232 - kmem_cache_free(userfaultfd_ctx_cachep, ctx); 2230 + if (IS_ERR(file)) { 2231 + put_unused_fd(fd); 2232 + fd = PTR_ERR(file); 2233 + goto err_out; 2233 2234 } 2235 + /* prevent the mm struct to be freed */ 2236 + mmgrab(ctx->mm); 2237 + file->f_mode |= FMODE_NOWAIT; 2238 + fd_install(fd, file); 2239 + return fd; 2240 + err_out: 2241 + kmem_cache_free(userfaultfd_ctx_cachep, ctx); 2234 2242 return fd; 2235 2243 } 2236 2244
+10
include/linux/uio.h
··· 206 206 } 207 207 208 208 static __always_inline __must_check 209 + bool copy_to_iter_full(const void *addr, size_t bytes, struct iov_iter *i) 210 + { 211 + size_t copied = copy_to_iter(addr, bytes, i); 212 + if (likely(copied == bytes)) 213 + return true; 214 + iov_iter_revert(i, copied); 215 + return false; 216 + } 217 + 218 + static __always_inline __must_check 209 219 bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 210 220 { 211 221 size_t copied = copy_from_iter(addr, bytes, i);
+1 -8
include/net/udp.h
··· 379 379 static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, 380 380 struct iov_iter *to) 381 381 { 382 - int n; 383 - 384 - n = copy_to_iter(skb->data + off, len, to); 385 - if (n == len) 386 - return 0; 387 - 388 - iov_iter_revert(to, n); 389 - return -EFAULT; 382 + return copy_to_iter_full(skb->data + off, len, to) ? 0 : -EFAULT; 390 383 } 391 384 392 385 /*