Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'work.aio-1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull aio updates from Al Viro:
"Majority of AIO stuff this cycle. aio-fsync and aio-poll, mostly.

The only thing I'm holding back for a day or so is Adam's aio ioprio -
his last-minute fixup is trivial (missing stub in !CONFIG_BLOCK case),
but let it sit in -next for decency sake..."

* 'work.aio-1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (46 commits)
aio: sanitize the limit checking in io_submit(2)
aio: fold do_io_submit() into callers
aio: shift copyin of iocb into io_submit_one()
aio_read_events_ring(): make a bit more readable
aio: all callers of aio_{read,write,fsync,poll} treat 0 and -EIOCBQUEUED the same way
aio: take list removal to (some) callers of aio_complete()
aio: add missing break for the IOCB_CMD_FDSYNC case
random: convert to ->poll_mask
timerfd: convert to ->poll_mask
eventfd: switch to ->poll_mask
pipe: convert to ->poll_mask
crypto: af_alg: convert to ->poll_mask
net/rxrpc: convert to ->poll_mask
net/iucv: convert to ->poll_mask
net/phonet: convert to ->poll_mask
net/nfc: convert to ->poll_mask
net/caif: convert to ->poll_mask
net/bluetooth: convert to ->poll_mask
net/sctp: convert to ->poll_mask
net/tipc: convert to ->poll_mask
...

+891 -637
+7 -2
Documentation/filesystems/Locking
··· 440 440 ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); 441 441 int (*iterate) (struct file *, struct dir_context *); 442 442 int (*iterate_shared) (struct file *, struct dir_context *); 443 - unsigned int (*poll) (struct file *, struct poll_table_struct *); 443 + __poll_t (*poll) (struct file *, struct poll_table_struct *); 444 + struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); 445 + __poll_t (*poll_mask) (struct file *, __poll_t); 444 446 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); 445 447 long (*compat_ioctl) (struct file *, unsigned int, unsigned long); 446 448 int (*mmap) (struct file *, struct vm_area_struct *); ··· 473 471 }; 474 472 475 473 locking rules: 476 - All may block. 474 + All except for ->poll_mask may block. 477 475 478 476 ->llseek() locking has moved from llseek to the individual llseek 479 477 implementations. If your fs is not using generic_file_llseek, you ··· 504 502 ->setlease operations should call generic_setlease() before or after setting 505 503 the lease within the individual filesystem to record the result of the 506 504 operation 505 + 506 + ->poll_mask can be called with or without the waitqueue lock for the waitqueue 507 + returned from ->get_poll_head. 507 508 508 509 --------------------------- dquot_operations ------------------------------- 509 510 prototypes:
+14 -1
Documentation/filesystems/vfs.txt
··· 856 856 ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); 857 857 ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); 858 858 int (*iterate) (struct file *, struct dir_context *); 859 - unsigned int (*poll) (struct file *, struct poll_table_struct *); 859 + __poll_t (*poll) (struct file *, struct poll_table_struct *); 860 + struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); 861 + __poll_t (*poll_mask) (struct file *, __poll_t); 860 862 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); 861 863 long (*compat_ioctl) (struct file *, unsigned int, unsigned long); 862 864 int (*mmap) (struct file *, struct vm_area_struct *); ··· 902 900 poll: called by the VFS when a process wants to check if there is 903 901 activity on this file and (optionally) go to sleep until there 904 902 is activity. Called by the select(2) and poll(2) system calls 903 + 904 + get_poll_head: Returns the struct wait_queue_head that callers can 905 + wait on. Callers need to check the returned events using ->poll_mask 906 + once woken. Can return NULL to indicate polling is not supported, 907 + or any error code using the ERR_PTR convention to indicate that a 908 + grave error occured and ->poll_mask shall not be called. 909 + 910 + poll_mask: return the mask of EPOLL* values describing the file descriptor 911 + state. Called either before going to sleep on the waitqueue returned by 912 + get_poll_head, or after it has been woken. If ->get_poll_head and 913 + ->poll_mask are implemented ->poll does not need to be implement. 905 914 906 915 unlocked_ioctl: called by the ioctl(2) system call. 907 916
+1
arch/x86/entry/syscalls/syscall_32.tbl
··· 396 396 382 i386 pkey_free sys_pkey_free __ia32_sys_pkey_free 397 397 383 i386 statx sys_statx __ia32_sys_statx 398 398 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 399 + 385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents
+1
arch/x86/entry/syscalls/syscall_64.tbl
··· 341 341 330 common pkey_alloc __x64_sys_pkey_alloc 342 342 331 common pkey_free __x64_sys_pkey_free 343 343 332 common statx __x64_sys_statx 344 + 333 common io_pgetevents __x64_sys_io_pgetevents 344 345 345 346 # 346 347 # x32-specific system call numbers start at 512 to avoid cache impact
+3 -11
crypto/af_alg.c
··· 347 347 .sendpage = sock_no_sendpage, 348 348 .sendmsg = sock_no_sendmsg, 349 349 .recvmsg = sock_no_recvmsg, 350 - .poll = sock_no_poll, 351 350 352 351 .bind = alg_bind, 353 352 .release = af_alg_release, ··· 1060 1061 } 1061 1062 EXPORT_SYMBOL_GPL(af_alg_async_cb); 1062 1063 1063 - /** 1064 - * af_alg_poll - poll system call handler 1065 - */ 1066 - __poll_t af_alg_poll(struct file *file, struct socket *sock, 1067 - poll_table *wait) 1064 + __poll_t af_alg_poll_mask(struct socket *sock, __poll_t events) 1068 1065 { 1069 1066 struct sock *sk = sock->sk; 1070 1067 struct alg_sock *ask = alg_sk(sk); 1071 1068 struct af_alg_ctx *ctx = ask->private; 1072 - __poll_t mask; 1073 - 1074 - sock_poll_wait(file, sk_sleep(sk), wait); 1075 - mask = 0; 1069 + __poll_t mask = 0; 1076 1070 1077 1071 if (!ctx->more || ctx->used) 1078 1072 mask |= EPOLLIN | EPOLLRDNORM; ··· 1075 1083 1076 1084 return mask; 1077 1085 } 1078 - EXPORT_SYMBOL_GPL(af_alg_poll); 1086 + EXPORT_SYMBOL_GPL(af_alg_poll_mask); 1079 1087 1080 1088 /** 1081 1089 * af_alg_alloc_areq - allocate struct af_alg_async_req
+2 -2
crypto/algif_aead.c
··· 375 375 .sendmsg = aead_sendmsg, 376 376 .sendpage = af_alg_sendpage, 377 377 .recvmsg = aead_recvmsg, 378 - .poll = af_alg_poll, 378 + .poll_mask = af_alg_poll_mask, 379 379 }; 380 380 381 381 static int aead_check_key(struct socket *sock) ··· 471 471 .sendmsg = aead_sendmsg_nokey, 472 472 .sendpage = aead_sendpage_nokey, 473 473 .recvmsg = aead_recvmsg_nokey, 474 - .poll = af_alg_poll, 474 + .poll_mask = af_alg_poll_mask, 475 475 }; 476 476 477 477 static void *aead_bind(const char *name, u32 type, u32 mask)
-2
crypto/algif_hash.c
··· 288 288 .mmap = sock_no_mmap, 289 289 .bind = sock_no_bind, 290 290 .setsockopt = sock_no_setsockopt, 291 - .poll = sock_no_poll, 292 291 293 292 .release = af_alg_release, 294 293 .sendmsg = hash_sendmsg, ··· 395 396 .mmap = sock_no_mmap, 396 397 .bind = sock_no_bind, 397 398 .setsockopt = sock_no_setsockopt, 398 - .poll = sock_no_poll, 399 399 400 400 .release = af_alg_release, 401 401 .sendmsg = hash_sendmsg_nokey,
-1
crypto/algif_rng.c
··· 106 106 .bind = sock_no_bind, 107 107 .accept = sock_no_accept, 108 108 .setsockopt = sock_no_setsockopt, 109 - .poll = sock_no_poll, 110 109 .sendmsg = sock_no_sendmsg, 111 110 .sendpage = sock_no_sendpage, 112 111
+2 -2
crypto/algif_skcipher.c
··· 205 205 .sendmsg = skcipher_sendmsg, 206 206 .sendpage = af_alg_sendpage, 207 207 .recvmsg = skcipher_recvmsg, 208 - .poll = af_alg_poll, 208 + .poll_mask = af_alg_poll_mask, 209 209 }; 210 210 211 211 static int skcipher_check_key(struct socket *sock) ··· 301 301 .sendmsg = skcipher_sendmsg_nokey, 302 302 .sendpage = skcipher_sendpage_nokey, 303 303 .recvmsg = skcipher_recvmsg_nokey, 304 - .poll = af_alg_poll, 304 + .poll_mask = af_alg_poll_mask, 305 305 }; 306 306 307 307 static void *skcipher_bind(const char *name, u32 type, u32 mask)
+17 -14
drivers/char/random.c
··· 402 402 /* 403 403 * Static global variables 404 404 */ 405 - static DECLARE_WAIT_QUEUE_HEAD(random_read_wait); 406 - static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); 405 + static DECLARE_WAIT_QUEUE_HEAD(random_wait); 407 406 static struct fasync_struct *fasync; 408 407 409 408 static DEFINE_SPINLOCK(random_ready_list_lock); ··· 721 722 722 723 /* should we wake readers? */ 723 724 if (entropy_bits >= random_read_wakeup_bits && 724 - wq_has_sleeper(&random_read_wait)) { 725 - wake_up_interruptible(&random_read_wait); 725 + wq_has_sleeper(&random_wait)) { 726 + wake_up_interruptible_poll(&random_wait, POLLIN); 726 727 kill_fasync(&fasync, SIGIO, POLL_IN); 727 728 } 728 729 /* If the input pool is getting full, send some ··· 1396 1397 trace_debit_entropy(r->name, 8 * ibytes); 1397 1398 if (ibytes && 1398 1399 (r->entropy_count >> ENTROPY_SHIFT) < random_write_wakeup_bits) { 1399 - wake_up_interruptible(&random_write_wait); 1400 + wake_up_interruptible_poll(&random_wait, POLLOUT); 1400 1401 kill_fasync(&fasync, SIGIO, POLL_OUT); 1401 1402 } 1402 1403 ··· 1838 1839 if (nonblock) 1839 1840 return -EAGAIN; 1840 1841 1841 - wait_event_interruptible(random_read_wait, 1842 + wait_event_interruptible(random_wait, 1842 1843 ENTROPY_BITS(&input_pool) >= 1843 1844 random_read_wakeup_bits); 1844 1845 if (signal_pending(current)) ··· 1875 1876 return ret; 1876 1877 } 1877 1878 1878 - static __poll_t 1879 - random_poll(struct file *file, poll_table * wait) 1879 + static struct wait_queue_head * 1880 + random_get_poll_head(struct file *file, __poll_t events) 1880 1881 { 1881 - __poll_t mask; 1882 + return &random_wait; 1883 + } 1882 1884 1883 - poll_wait(file, &random_read_wait, wait); 1884 - poll_wait(file, &random_write_wait, wait); 1885 - mask = 0; 1885 + static __poll_t 1886 + random_poll_mask(struct file *file, __poll_t events) 1887 + { 1888 + __poll_t mask = 0; 1889 + 1886 1890 if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits) 1887 1891 mask |= EPOLLIN | EPOLLRDNORM; 1888 1892 if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits) ··· 1992 1990 const struct file_operations random_fops = { 1993 1991 .read = random_read, 1994 1992 .write = random_write, 1995 - .poll = random_poll, 1993 + .get_poll_head = random_get_poll_head, 1994 + .poll_mask = random_poll_mask, 1996 1995 .unlocked_ioctl = random_ioctl, 1997 1996 .fasync = random_fasync, 1998 1997 .llseek = noop_llseek, ··· 2326 2323 * We'll be woken up again once below random_write_wakeup_thresh, 2327 2324 * or when the calling thread is about to terminate. 2328 2325 */ 2329 - wait_event_interruptible(random_write_wait, kthread_should_stop() || 2326 + wait_event_interruptible(random_wait, kthread_should_stop() || 2330 2327 ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); 2331 2328 mix_pool_bytes(poolp, buffer, count); 2332 2329 credit_entropy_bits(poolp, entropy);
+1 -2
drivers/isdn/mISDN/socket.c
··· 588 588 .getname = data_sock_getname, 589 589 .sendmsg = mISDN_sock_sendmsg, 590 590 .recvmsg = mISDN_sock_recvmsg, 591 - .poll = datagram_poll, 591 + .poll_mask = datagram_poll_mask, 592 592 .listen = sock_no_listen, 593 593 .shutdown = sock_no_shutdown, 594 594 .setsockopt = data_sock_setsockopt, ··· 745 745 .getname = sock_no_getname, 746 746 .sendmsg = sock_no_sendmsg, 747 747 .recvmsg = sock_no_recvmsg, 748 - .poll = sock_no_poll, 749 748 .listen = sock_no_listen, 750 749 .shutdown = sock_no_shutdown, 751 750 .setsockopt = sock_no_setsockopt,
+1 -1
drivers/net/ppp/pppoe.c
··· 1107 1107 .socketpair = sock_no_socketpair, 1108 1108 .accept = sock_no_accept, 1109 1109 .getname = pppoe_getname, 1110 - .poll = datagram_poll, 1110 + .poll_mask = datagram_poll_mask, 1111 1111 .listen = sock_no_listen, 1112 1112 .shutdown = sock_no_shutdown, 1113 1113 .setsockopt = sock_no_setsockopt,
-1
drivers/net/ppp/pptp.c
··· 624 624 .socketpair = sock_no_socketpair, 625 625 .accept = sock_no_accept, 626 626 .getname = pptp_getname, 627 - .poll = sock_no_poll, 628 627 .listen = sock_no_listen, 629 628 .shutdown = sock_no_shutdown, 630 629 .setsockopt = sock_no_setsockopt,
+2 -2
drivers/staging/comedi/drivers/serial2002.c
··· 113 113 long elapsed; 114 114 __poll_t mask; 115 115 116 - mask = f->f_op->poll(f, &table.pt); 116 + mask = vfs_poll(f, &table.pt); 117 117 if (mask & (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | 118 118 EPOLLHUP | EPOLLERR)) { 119 119 break; ··· 136 136 137 137 result = -1; 138 138 if (!IS_ERR(f)) { 139 - if (f->f_op->poll) { 139 + if (file_can_poll(f)) { 140 140 serial2002_tty_read_poll_wait(f, timeout); 141 141 142 142 if (kernel_read(f, &ch, 1, &pos) == 1)
+1 -1
drivers/staging/ipx/af_ipx.c
··· 1965 1965 .socketpair = sock_no_socketpair, 1966 1966 .accept = sock_no_accept, 1967 1967 .getname = ipx_getname, 1968 - .poll = datagram_poll, 1968 + .poll_mask = datagram_poll_mask, 1969 1969 .ioctl = ipx_ioctl, 1970 1970 #ifdef CONFIG_COMPAT 1971 1971 .compat_ioctl = ipx_compat_ioctl,
+1 -1
drivers/vfio/virqfd.c
··· 166 166 init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); 167 167 init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); 168 168 169 - events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt); 169 + events = vfs_poll(irqfd.file, &virqfd->pt); 170 170 171 171 /* 172 172 * Check if there was an event already pending on the eventfd
+1 -1
drivers/vhost/vhost.c
··· 208 208 if (poll->wqh) 209 209 return 0; 210 210 211 - mask = file->f_op->poll(file, &poll->table); 211 + mask = vfs_poll(file, &poll->table); 212 212 if (mask) 213 213 vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask)); 214 214 if (mask & EPOLLERR) {
+532 -279
fs/aio.c
··· 5 5 * Implements an efficient asynchronous io interface. 6 6 * 7 7 * Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved. 8 + * Copyright 2018 Christoph Hellwig. 8 9 * 9 10 * See ../COPYING for licensing terms. 10 11 */ ··· 46 45 #include <linux/uaccess.h> 47 46 48 47 #include "internal.h" 48 + 49 + #define KIOCB_KEY 0 49 50 50 51 #define AIO_RING_MAGIC 0xa10a10a1 51 52 #define AIO_RING_COMPAT_FEATURES 1 ··· 159 156 unsigned id; 160 157 }; 161 158 162 - /* 163 - * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either 164 - * cancelled or completed (this makes a certain amount of sense because 165 - * successful cancellation - io_cancel() - does deliver the completion to 166 - * userspace). 167 - * 168 - * And since most things don't implement kiocb cancellation and we'd really like 169 - * kiocb completion to be lockless when possible, we use ki_cancel to 170 - * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED 171 - * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel(). 172 - */ 173 - #define KIOCB_CANCELLED ((void *) (~0ULL)) 159 + struct fsync_iocb { 160 + struct work_struct work; 161 + struct file *file; 162 + bool datasync; 163 + }; 164 + 165 + struct poll_iocb { 166 + struct file *file; 167 + __poll_t events; 168 + struct wait_queue_head *head; 169 + 170 + union { 171 + struct wait_queue_entry wait; 172 + struct work_struct work; 173 + }; 174 + }; 174 175 175 176 struct aio_kiocb { 176 - struct kiocb common; 177 + union { 178 + struct kiocb rw; 179 + struct fsync_iocb fsync; 180 + struct poll_iocb poll; 181 + }; 177 182 178 183 struct kioctx *ki_ctx; 179 184 kiocb_cancel_fn *ki_cancel; ··· 275 264 276 265 kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 277 266 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 278 - 279 - pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); 280 - 281 267 return 0; 282 268 } 283 269 __initcall(aio_setup); ··· 560 552 561 553 void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) 562 554 { 563 - struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common); 555 + struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw); 564 556 struct kioctx *ctx = req->ki_ctx; 565 557 unsigned long flags; 566 558 559 + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) 560 + return; 561 + 567 562 spin_lock_irqsave(&ctx->ctx_lock, flags); 568 - 569 - if (!req->ki_list.next) 570 - list_add(&req->ki_list, &ctx->active_reqs); 571 - 563 + list_add_tail(&req->ki_list, &ctx->active_reqs); 572 564 req->ki_cancel = cancel; 573 - 574 565 spin_unlock_irqrestore(&ctx->ctx_lock, flags); 575 566 } 576 567 EXPORT_SYMBOL(kiocb_set_cancel_fn); 577 - 578 - static int kiocb_cancel(struct aio_kiocb *kiocb) 579 - { 580 - kiocb_cancel_fn *old, *cancel; 581 - 582 - /* 583 - * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it 584 - * actually has a cancel function, hence the cmpxchg() 585 - */ 586 - 587 - cancel = READ_ONCE(kiocb->ki_cancel); 588 - do { 589 - if (!cancel || cancel == KIOCB_CANCELLED) 590 - return -EINVAL; 591 - 592 - old = cancel; 593 - cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); 594 - } while (cancel != old); 595 - 596 - return cancel(&kiocb->common); 597 - } 598 568 599 569 /* 600 570 * free_ioctx() should be RCU delayed to synchronize against the RCU ··· 620 634 while (!list_empty(&ctx->active_reqs)) { 621 635 req = list_first_entry(&ctx->active_reqs, 622 636 struct aio_kiocb, ki_list); 623 - kiocb_cancel(req); 637 + req->ki_cancel(&req->rw); 624 638 list_del_init(&req->ki_list); 625 639 } 626 640 ··· 1027 1041 goto out_put; 1028 1042 1029 1043 percpu_ref_get(&ctx->reqs); 1030 - 1044 + INIT_LIST_HEAD(&req->ki_list); 1031 1045 req->ki_ctx = ctx; 1032 1046 return req; 1033 1047 out_put: 1034 1048 put_reqs_available(ctx, 1); 1035 1049 return NULL; 1036 - } 1037 - 1038 - static void kiocb_free(struct aio_kiocb *req) 1039 - { 1040 - if (req->common.ki_filp) 1041 - fput(req->common.ki_filp); 1042 - if (req->ki_eventfd != NULL) 1043 - eventfd_ctx_put(req->ki_eventfd); 1044 - kmem_cache_free(kiocb_cachep, req); 1045 1050 } 1046 1051 1047 1052 static struct kioctx *lookup_ioctx(unsigned long ctx_id) ··· 1065 1088 /* aio_complete 1066 1089 * Called when the io request on the given iocb is complete. 1067 1090 */ 1068 - static void aio_complete(struct kiocb *kiocb, long res, long res2) 1091 + static void aio_complete(struct aio_kiocb *iocb, long res, long res2) 1069 1092 { 1070 - struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common); 1071 1093 struct kioctx *ctx = iocb->ki_ctx; 1072 1094 struct aio_ring *ring; 1073 1095 struct io_event *ev_page, *event; 1074 1096 unsigned tail, pos, head; 1075 1097 unsigned long flags; 1076 - 1077 - if (kiocb->ki_flags & IOCB_WRITE) { 1078 - struct file *file = kiocb->ki_filp; 1079 - 1080 - /* 1081 - * Tell lockdep we inherited freeze protection from submission 1082 - * thread. 1083 - */ 1084 - if (S_ISREG(file_inode(file)->i_mode)) 1085 - __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE); 1086 - file_end_write(file); 1087 - } 1088 - 1089 - /* 1090 - * Special case handling for sync iocbs: 1091 - * - events go directly into the iocb for fast handling 1092 - * - the sync task with the iocb in its stack holds the single iocb 1093 - * ref, no other paths have a way to get another ref 1094 - * - the sync task helpfully left a reference to itself in the iocb 1095 - */ 1096 - BUG_ON(is_sync_kiocb(kiocb)); 1097 - 1098 - if (iocb->ki_list.next) { 1099 - unsigned long flags; 1100 - 1101 - spin_lock_irqsave(&ctx->ctx_lock, flags); 1102 - list_del(&iocb->ki_list); 1103 - spin_unlock_irqrestore(&ctx->ctx_lock, flags); 1104 - } 1105 1098 1106 1099 /* 1107 1100 * Add a completion event to the ring buffer. Must be done holding ··· 1126 1179 * eventfd. The eventfd_signal() function is safe to be called 1127 1180 * from IRQ context. 1128 1181 */ 1129 - if (iocb->ki_eventfd != NULL) 1182 + if (iocb->ki_eventfd) { 1130 1183 eventfd_signal(iocb->ki_eventfd, 1); 1184 + eventfd_ctx_put(iocb->ki_eventfd); 1185 + } 1131 1186 1132 - /* everything turned out well, dispose of the aiocb. */ 1133 - kiocb_free(iocb); 1187 + kmem_cache_free(kiocb_cachep, iocb); 1134 1188 1135 1189 /* 1136 1190 * We have to order our ring_info tail store above and test ··· 1197 1249 if (head == tail) 1198 1250 break; 1199 1251 1200 - avail = min(avail, nr - ret); 1201 - avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - 1202 - ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE)); 1203 - 1204 1252 pos = head + AIO_EVENTS_OFFSET; 1205 1253 page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; 1206 1254 pos %= AIO_EVENTS_PER_PAGE; 1255 + 1256 + avail = min(avail, nr - ret); 1257 + avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos); 1207 1258 1208 1259 ev = kmap(page); 1209 1260 copy_ret = copy_to_user(event + ret, ev + pos, ··· 1274 1327 wait_event_interruptible_hrtimeout(ctx->wait, 1275 1328 aio_read_events(ctx, min_nr, nr, event, &ret), 1276 1329 until); 1277 - 1278 - if (!ret && signal_pending(current)) 1279 - ret = -EINTR; 1280 - 1281 1330 return ret; 1282 1331 } 1283 1332 ··· 1389 1446 return -EINVAL; 1390 1447 } 1391 1448 1449 + static void aio_remove_iocb(struct aio_kiocb *iocb) 1450 + { 1451 + struct kioctx *ctx = iocb->ki_ctx; 1452 + unsigned long flags; 1453 + 1454 + spin_lock_irqsave(&ctx->ctx_lock, flags); 1455 + list_del(&iocb->ki_list); 1456 + spin_unlock_irqrestore(&ctx->ctx_lock, flags); 1457 + } 1458 + 1459 + static void aio_complete_rw(struct kiocb *kiocb, long res, long res2) 1460 + { 1461 + struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw); 1462 + 1463 + if (!list_empty_careful(&iocb->ki_list)) 1464 + aio_remove_iocb(iocb); 1465 + 1466 + if (kiocb->ki_flags & IOCB_WRITE) { 1467 + struct inode *inode = file_inode(kiocb->ki_filp); 1468 + 1469 + /* 1470 + * Tell lockdep we inherited freeze protection from submission 1471 + * thread. 1472 + */ 1473 + if (S_ISREG(inode->i_mode)) 1474 + __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); 1475 + file_end_write(kiocb->ki_filp); 1476 + } 1477 + 1478 + fput(kiocb->ki_filp); 1479 + aio_complete(iocb, res, res2); 1480 + } 1481 + 1482 + static int aio_prep_rw(struct kiocb *req, struct iocb *iocb) 1483 + { 1484 + int ret; 1485 + 1486 + req->ki_filp = fget(iocb->aio_fildes); 1487 + if (unlikely(!req->ki_filp)) 1488 + return -EBADF; 1489 + req->ki_complete = aio_complete_rw; 1490 + req->ki_pos = iocb->aio_offset; 1491 + req->ki_flags = iocb_flags(req->ki_filp); 1492 + if (iocb->aio_flags & IOCB_FLAG_RESFD) 1493 + req->ki_flags |= IOCB_EVENTFD; 1494 + req->ki_hint = file_write_hint(req->ki_filp); 1495 + ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags); 1496 + if (unlikely(ret)) 1497 + fput(req->ki_filp); 1498 + return ret; 1499 + } 1500 + 1392 1501 static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec, 1393 1502 bool vectored, bool compat, struct iov_iter *iter) 1394 1503 { ··· 1460 1465 return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter); 1461 1466 } 1462 1467 1463 - static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret) 1468 + static inline void aio_rw_done(struct kiocb *req, ssize_t ret) 1464 1469 { 1465 1470 switch (ret) { 1466 1471 case -EIOCBQUEUED: 1467 - return ret; 1472 + break; 1468 1473 case -ERESTARTSYS: 1469 1474 case -ERESTARTNOINTR: 1470 1475 case -ERESTARTNOHAND: ··· 1476 1481 ret = -EINTR; 1477 1482 /*FALLTHRU*/ 1478 1483 default: 1479 - aio_complete(req, ret, 0); 1480 - return 0; 1484 + aio_complete_rw(req, ret, 0); 1481 1485 } 1482 1486 } 1483 1487 1484 1488 static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored, 1485 1489 bool compat) 1486 1490 { 1487 - struct file *file = req->ki_filp; 1488 1491 struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; 1489 1492 struct iov_iter iter; 1493 + struct file *file; 1490 1494 ssize_t ret; 1491 1495 1496 + ret = aio_prep_rw(req, iocb); 1497 + if (ret) 1498 + return ret; 1499 + file = req->ki_filp; 1500 + 1501 + ret = -EBADF; 1492 1502 if (unlikely(!(file->f_mode & FMODE_READ))) 1493 - return -EBADF; 1503 + goto out_fput; 1504 + ret = -EINVAL; 1494 1505 if (unlikely(!file->f_op->read_iter)) 1495 - return -EINVAL; 1506 + goto out_fput; 1496 1507 1497 1508 ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); 1498 1509 if (ret) 1499 - return ret; 1510 + goto out_fput; 1500 1511 ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); 1501 1512 if (!ret) 1502 - ret = aio_ret(req, call_read_iter(file, req, &iter)); 1513 + aio_rw_done(req, call_read_iter(file, req, &iter)); 1503 1514 kfree(iovec); 1515 + out_fput: 1516 + if (unlikely(ret)) 1517 + fput(file); 1504 1518 return ret; 1505 1519 } 1506 1520 1507 1521 static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored, 1508 1522 bool compat) 1509 1523 { 1510 - struct file *file = req->ki_filp; 1511 1524 struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; 1512 1525 struct iov_iter iter; 1513 - ssize_t ret; 1514 - 1515 - if (unlikely(!(file->f_mode & FMODE_WRITE))) 1516 - return -EBADF; 1517 - if (unlikely(!file->f_op->write_iter)) 1518 - return -EINVAL; 1519 - 1520 - ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); 1521 - if (ret) 1522 - return ret; 1523 - ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); 1524 - if (!ret) { 1525 - req->ki_flags |= IOCB_WRITE; 1526 - file_start_write(file); 1527 - ret = aio_ret(req, call_write_iter(file, req, &iter)); 1528 - /* 1529 - * We release freeze protection in aio_complete(). Fool lockdep 1530 - * by telling it the lock got released so that it doesn't 1531 - * complain about held lock when we return to userspace. 1532 - */ 1533 - if (S_ISREG(file_inode(file)->i_mode)) 1534 - __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); 1535 - } 1536 - kfree(iovec); 1537 - return ret; 1538 - } 1539 - 1540 - static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1541 - struct iocb *iocb, bool compat) 1542 - { 1543 - struct aio_kiocb *req; 1544 1526 struct file *file; 1545 1527 ssize_t ret; 1546 1528 1529 + ret = aio_prep_rw(req, iocb); 1530 + if (ret) 1531 + return ret; 1532 + file = req->ki_filp; 1533 + 1534 + ret = -EBADF; 1535 + if (unlikely(!(file->f_mode & FMODE_WRITE))) 1536 + goto out_fput; 1537 + ret = -EINVAL; 1538 + if (unlikely(!file->f_op->write_iter)) 1539 + goto out_fput; 1540 + 1541 + ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); 1542 + if (ret) 1543 + goto out_fput; 1544 + ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); 1545 + if (!ret) { 1546 + /* 1547 + * Open-code file_start_write here to grab freeze protection, 1548 + * which will be released by another thread in 1549 + * aio_complete_rw(). Fool lockdep by telling it the lock got 1550 + * released so that it doesn't complain about the held lock when 1551 + * we return to userspace. 1552 + */ 1553 + if (S_ISREG(file_inode(file)->i_mode)) { 1554 + __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); 1555 + __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); 1556 + } 1557 + req->ki_flags |= IOCB_WRITE; 1558 + aio_rw_done(req, call_write_iter(file, req, &iter)); 1559 + } 1560 + kfree(iovec); 1561 + out_fput: 1562 + if (unlikely(ret)) 1563 + fput(file); 1564 + return ret; 1565 + } 1566 + 1567 + static void aio_fsync_work(struct work_struct *work) 1568 + { 1569 + struct fsync_iocb *req = container_of(work, struct fsync_iocb, work); 1570 + int ret; 1571 + 1572 + ret = vfs_fsync(req->file, req->datasync); 1573 + fput(req->file); 1574 + aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0); 1575 + } 1576 + 1577 + static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) 1578 + { 1579 + if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes || 1580 + iocb->aio_rw_flags)) 1581 + return -EINVAL; 1582 + req->file = fget(iocb->aio_fildes); 1583 + if (unlikely(!req->file)) 1584 + return -EBADF; 1585 + if (unlikely(!req->file->f_op->fsync)) { 1586 + fput(req->file); 1587 + return -EINVAL; 1588 + } 1589 + 1590 + req->datasync = datasync; 1591 + INIT_WORK(&req->work, aio_fsync_work); 1592 + schedule_work(&req->work); 1593 + return 0; 1594 + } 1595 + 1596 + /* need to use list_del_init so we can check if item was present */ 1597 + static inline bool __aio_poll_remove(struct poll_iocb *req) 1598 + { 1599 + if (list_empty(&req->wait.entry)) 1600 + return false; 1601 + list_del_init(&req->wait.entry); 1602 + return true; 1603 + } 1604 + 1605 + static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) 1606 + { 1607 + fput(iocb->poll.file); 1608 + aio_complete(iocb, mangle_poll(mask), 0); 1609 + } 1610 + 1611 + static void aio_poll_work(struct work_struct *work) 1612 + { 1613 + struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work); 1614 + 1615 + if (!list_empty_careful(&iocb->ki_list)) 1616 + aio_remove_iocb(iocb); 1617 + __aio_poll_complete(iocb, iocb->poll.events); 1618 + } 1619 + 1620 + static int aio_poll_cancel(struct kiocb *iocb) 1621 + { 1622 + struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); 1623 + struct poll_iocb *req = &aiocb->poll; 1624 + struct wait_queue_head *head = req->head; 1625 + bool found = false; 1626 + 1627 + spin_lock(&head->lock); 1628 + found = __aio_poll_remove(req); 1629 + spin_unlock(&head->lock); 1630 + 1631 + if (found) { 1632 + req->events = 0; 1633 + INIT_WORK(&req->work, aio_poll_work); 1634 + schedule_work(&req->work); 1635 + } 1636 + return 0; 1637 + } 1638 + 1639 + static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 1640 + void *key) 1641 + { 1642 + struct poll_iocb *req = container_of(wait, struct poll_iocb, wait); 1643 + struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); 1644 + struct file *file = req->file; 1645 + __poll_t mask = key_to_poll(key); 1646 + 1647 + assert_spin_locked(&req->head->lock); 1648 + 1649 + /* for instances that support it check for an event match first: */ 1650 + if (mask && !(mask & req->events)) 1651 + return 0; 1652 + 1653 + mask = file->f_op->poll_mask(file, req->events); 1654 + if (!mask) 1655 + return 0; 1656 + 1657 + __aio_poll_remove(req); 1658 + 1659 + /* 1660 + * Try completing without a context switch if we can acquire ctx_lock 1661 + * without spinning. Otherwise we need to defer to a workqueue to 1662 + * avoid a deadlock due to the lock order. 1663 + */ 1664 + if (spin_trylock(&iocb->ki_ctx->ctx_lock)) { 1665 + list_del_init(&iocb->ki_list); 1666 + spin_unlock(&iocb->ki_ctx->ctx_lock); 1667 + 1668 + __aio_poll_complete(iocb, mask); 1669 + } else { 1670 + req->events = mask; 1671 + INIT_WORK(&req->work, aio_poll_work); 1672 + schedule_work(&req->work); 1673 + } 1674 + 1675 + return 1; 1676 + } 1677 + 1678 + static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) 1679 + { 1680 + struct kioctx *ctx = aiocb->ki_ctx; 1681 + struct poll_iocb *req = &aiocb->poll; 1682 + __poll_t mask; 1683 + 1684 + /* reject any unknown events outside the normal event mask. */ 1685 + if ((u16)iocb->aio_buf != iocb->aio_buf) 1686 + return -EINVAL; 1687 + /* reject fields that are not defined for poll */ 1688 + if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags) 1689 + return -EINVAL; 1690 + 1691 + req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; 1692 + req->file = fget(iocb->aio_fildes); 1693 + if (unlikely(!req->file)) 1694 + return -EBADF; 1695 + if (!file_has_poll_mask(req->file)) 1696 + goto out_fail; 1697 + 1698 + req->head = req->file->f_op->get_poll_head(req->file, req->events); 1699 + if (!req->head) 1700 + goto out_fail; 1701 + if (IS_ERR(req->head)) { 1702 + mask = EPOLLERR; 1703 + goto done; 1704 + } 1705 + 1706 + init_waitqueue_func_entry(&req->wait, aio_poll_wake); 1707 + aiocb->ki_cancel = aio_poll_cancel; 1708 + 1709 + spin_lock_irq(&ctx->ctx_lock); 1710 + spin_lock(&req->head->lock); 1711 + mask = req->file->f_op->poll_mask(req->file, req->events); 1712 + if (!mask) { 1713 + __add_wait_queue(req->head, &req->wait); 1714 + list_add_tail(&aiocb->ki_list, &ctx->active_reqs); 1715 + } 1716 + spin_unlock(&req->head->lock); 1717 + spin_unlock_irq(&ctx->ctx_lock); 1718 + done: 1719 + if (mask) 1720 + __aio_poll_complete(aiocb, mask); 1721 + return 0; 1722 + out_fail: 1723 + fput(req->file); 1724 + return -EINVAL; /* same as no support for IOCB_CMD_POLL */ 1725 + } 1726 + 1727 + static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1728 + bool compat) 1729 + { 1730 + struct aio_kiocb *req; 1731 + struct iocb iocb; 1732 + ssize_t ret; 1733 + 1734 + if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb)))) 1735 + return -EFAULT; 1736 + 1547 1737 /* enforce forwards compatibility on users */ 1548 - if (unlikely(iocb->aio_reserved2)) { 1738 + if (unlikely(iocb.aio_reserved2)) { 1549 1739 pr_debug("EINVAL: reserve field set\n"); 1550 1740 return -EINVAL; 1551 1741 } 1552 1742 1553 1743 /* prevent overflows */ 1554 1744 if (unlikely( 1555 - (iocb->aio_buf != (unsigned long)iocb->aio_buf) || 1556 - (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) || 1557 - ((ssize_t)iocb->aio_nbytes < 0) 1745 + (iocb.aio_buf != (unsigned long)iocb.aio_buf) || 1746 + (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) || 1747 + ((ssize_t)iocb.aio_nbytes < 0) 1558 1748 )) { 1559 1749 pr_debug("EINVAL: overflow check\n"); 1560 1750 return -EINVAL; ··· 1749 1569 if (unlikely(!req)) 1750 1570 return -EAGAIN; 1751 1571 1752 - req->common.ki_filp = file = fget(iocb->aio_fildes); 1753 - if (unlikely(!req->common.ki_filp)) { 1754 - ret = -EBADF; 1755 - goto out_put_req; 1756 - } 1757 - req->common.ki_pos = iocb->aio_offset; 1758 - req->common.ki_complete = aio_complete; 1759 - req->common.ki_flags = iocb_flags(req->common.ki_filp); 1760 - req->common.ki_hint = file_write_hint(file); 1761 - 1762 - if (iocb->aio_flags & IOCB_FLAG_RESFD) { 1572 + if (iocb.aio_flags & IOCB_FLAG_RESFD) { 1763 1573 /* 1764 1574 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an 1765 1575 * instance of the file* now. The file descriptor must be 1766 1576 * an eventfd() fd, and will be signaled for each completed 1767 1577 * event using the eventfd_signal() function. 1768 1578 */ 1769 - req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd); 1579 + req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd); 1770 1580 if (IS_ERR(req->ki_eventfd)) { 1771 1581 ret = PTR_ERR(req->ki_eventfd); 1772 1582 req->ki_eventfd = NULL; 1773 1583 goto out_put_req; 1774 1584 } 1775 - 1776 - req->common.ki_flags |= IOCB_EVENTFD; 1777 - } 1778 - 1779 - ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags); 1780 - if (unlikely(ret)) { 1781 - pr_debug("EINVAL: aio_rw_flags\n"); 1782 - goto out_put_req; 1783 1585 } 1784 1586 1785 1587 ret = put_user(KIOCB_KEY, &user_iocb->aio_key); ··· 1771 1609 } 1772 1610 1773 1611 req->ki_user_iocb = user_iocb; 1774 - req->ki_user_data = iocb->aio_data; 1612 + req->ki_user_data = iocb.aio_data; 1775 1613 1776 - get_file(file); 1777 - switch (iocb->aio_lio_opcode) { 1614 + switch (iocb.aio_lio_opcode) { 1778 1615 case IOCB_CMD_PREAD: 1779 - ret = aio_read(&req->common, iocb, false, compat); 1616 + ret = aio_read(&req->rw, &iocb, false, compat); 1780 1617 break; 1781 1618 case IOCB_CMD_PWRITE: 1782 - ret = aio_write(&req->common, iocb, false, compat); 1619 + ret = aio_write(&req->rw, &iocb, false, compat); 1783 1620 break; 1784 1621 case IOCB_CMD_PREADV: 1785 - ret = aio_read(&req->common, iocb, true, compat); 1622 + ret = aio_read(&req->rw, &iocb, true, compat); 1786 1623 break; 1787 1624 case IOCB_CMD_PWRITEV: 1788 - ret = aio_write(&req->common, iocb, true, compat); 1625 + ret = aio_write(&req->rw, &iocb, true, compat); 1626 + break; 1627 + case IOCB_CMD_FSYNC: 1628 + ret = aio_fsync(&req->fsync, &iocb, false); 1629 + break; 1630 + case IOCB_CMD_FDSYNC: 1631 + ret = aio_fsync(&req->fsync, &iocb, true); 1632 + break; 1633 + case IOCB_CMD_POLL: 1634 + ret = aio_poll(req, &iocb); 1789 1635 break; 1790 1636 default: 1791 - pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode); 1637 + pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode); 1792 1638 ret = -EINVAL; 1793 1639 break; 1794 1640 } 1795 - fput(file); 1796 1641 1797 - if (ret && ret != -EIOCBQUEUED) 1642 + /* 1643 + * If ret is 0, we'd either done aio_complete() ourselves or have 1644 + * arranged for that to be done asynchronously. Anything non-zero 1645 + * means that we need to destroy req ourselves. 1646 + */ 1647 + if (ret) 1798 1648 goto out_put_req; 1799 1649 return 0; 1800 1650 out_put_req: 1801 1651 put_reqs_available(ctx, 1); 1802 1652 percpu_ref_put(&ctx->reqs); 1803 - kiocb_free(req); 1653 + if (req->ki_eventfd) 1654 + eventfd_ctx_put(req->ki_eventfd); 1655 + kmem_cache_free(kiocb_cachep, req); 1804 1656 return ret; 1805 - } 1806 - 1807 - static long do_io_submit(aio_context_t ctx_id, long nr, 1808 - struct iocb __user *__user *iocbpp, bool compat) 1809 - { 1810 - struct kioctx *ctx; 1811 - long ret = 0; 1812 - int i = 0; 1813 - struct blk_plug plug; 1814 - 1815 - if (unlikely(nr < 0)) 1816 - return -EINVAL; 1817 - 1818 - if (unlikely(nr > LONG_MAX/sizeof(*iocbpp))) 1819 - nr = LONG_MAX/sizeof(*iocbpp); 1820 - 1821 - if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp))))) 1822 - return -EFAULT; 1823 - 1824 - ctx = lookup_ioctx(ctx_id); 1825 - if (unlikely(!ctx)) { 1826 - pr_debug("EINVAL: invalid context id\n"); 1827 - return -EINVAL; 1828 - } 1829 - 1830 - blk_start_plug(&plug); 1831 - 1832 - /* 1833 - * AKPM: should this return a partial result if some of the IOs were 1834 - * successfully submitted? 1835 - */ 1836 - for (i=0; i<nr; i++) { 1837 - struct iocb __user *user_iocb; 1838 - struct iocb tmp; 1839 - 1840 - if (unlikely(__get_user(user_iocb, iocbpp + i))) { 1841 - ret = -EFAULT; 1842 - break; 1843 - } 1844 - 1845 - if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) { 1846 - ret = -EFAULT; 1847 - break; 1848 - } 1849 - 1850 - ret = io_submit_one(ctx, user_iocb, &tmp, compat); 1851 - if (ret) 1852 - break; 1853 - } 1854 - blk_finish_plug(&plug); 1855 - 1856 - percpu_ref_put(&ctx->users); 1857 - return i ? i : ret; 1858 1657 } 1859 1658 1860 1659 /* sys_io_submit: ··· 1833 1710 SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, 1834 1711 struct iocb __user * __user *, iocbpp) 1835 1712 { 1836 - return do_io_submit(ctx_id, nr, iocbpp, 0); 1837 - } 1838 - 1839 - #ifdef CONFIG_COMPAT 1840 - static inline long 1841 - copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) 1842 - { 1843 - compat_uptr_t uptr; 1844 - int i; 1845 - 1846 - for (i = 0; i < nr; ++i) { 1847 - if (get_user(uptr, ptr32 + i)) 1848 - return -EFAULT; 1849 - if (put_user(compat_ptr(uptr), ptr64 + i)) 1850 - return -EFAULT; 1851 - } 1852 - return 0; 1853 - } 1854 - 1855 - #define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) 1856 - 1857 - COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, 1858 - int, nr, u32 __user *, iocb) 1859 - { 1860 - struct iocb __user * __user *iocb64; 1861 - long ret; 1713 + struct kioctx *ctx; 1714 + long ret = 0; 1715 + int i = 0; 1716 + struct blk_plug plug; 1862 1717 1863 1718 if (unlikely(nr < 0)) 1864 1719 return -EINVAL; 1865 1720 1866 - if (nr > MAX_AIO_SUBMITS) 1867 - nr = MAX_AIO_SUBMITS; 1721 + ctx = lookup_ioctx(ctx_id); 1722 + if (unlikely(!ctx)) { 1723 + pr_debug("EINVAL: invalid context id\n"); 1724 + return -EINVAL; 1725 + } 1868 1726 1869 - iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64)); 1870 - ret = copy_iocb(nr, iocb, iocb64); 1871 - if (!ret) 1872 - ret = do_io_submit(ctx_id, nr, iocb64, 1); 1873 - return ret; 1727 + if (nr > ctx->nr_events) 1728 + nr = ctx->nr_events; 1729 + 1730 + blk_start_plug(&plug); 1731 + for (i = 0; i < nr; i++) { 1732 + struct iocb __user *user_iocb; 1733 + 1734 + if (unlikely(get_user(user_iocb, iocbpp + i))) { 1735 + ret = -EFAULT; 1736 + break; 1737 + } 1738 + 1739 + ret = io_submit_one(ctx, user_iocb, false); 1740 + if (ret) 1741 + break; 1742 + } 1743 + blk_finish_plug(&plug); 1744 + 1745 + percpu_ref_put(&ctx->users); 1746 + return i ? i : ret; 1747 + } 1748 + 1749 + #ifdef CONFIG_COMPAT 1750 + COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, 1751 + int, nr, compat_uptr_t __user *, iocbpp) 1752 + { 1753 + struct kioctx *ctx; 1754 + long ret = 0; 1755 + int i = 0; 1756 + struct blk_plug plug; 1757 + 1758 + if (unlikely(nr < 0)) 1759 + return -EINVAL; 1760 + 1761 + ctx = lookup_ioctx(ctx_id); 1762 + if (unlikely(!ctx)) { 1763 + pr_debug("EINVAL: invalid context id\n"); 1764 + return -EINVAL; 1765 + } 1766 + 1767 + if (nr > ctx->nr_events) 1768 + nr = ctx->nr_events; 1769 + 1770 + blk_start_plug(&plug); 1771 + for (i = 0; i < nr; i++) { 1772 + compat_uptr_t user_iocb; 1773 + 1774 + if (unlikely(get_user(user_iocb, iocbpp + i))) { 1775 + ret = -EFAULT; 1776 + break; 1777 + } 1778 + 1779 + ret = io_submit_one(ctx, compat_ptr(user_iocb), true); 1780 + if (ret) 1781 + break; 1782 + } 1783 + blk_finish_plug(&plug); 1784 + 1785 + percpu_ref_put(&ctx->users); 1786 + return i ? i : ret; 1874 1787 } 1875 1788 #endif 1876 1789 ··· 1914 1755 * Finds a given iocb for cancellation. 1915 1756 */ 1916 1757 static struct aio_kiocb * 1917 - lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key) 1758 + lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb) 1918 1759 { 1919 1760 struct aio_kiocb *kiocb; 1920 1761 1921 1762 assert_spin_locked(&ctx->ctx_lock); 1922 - 1923 - if (key != KIOCB_KEY) 1924 - return NULL; 1925 1763 1926 1764 /* TODO: use a hash or array, this sucks. */ 1927 1765 list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { ··· 1943 1787 { 1944 1788 struct kioctx *ctx; 1945 1789 struct aio_kiocb *kiocb; 1790 + int ret = -EINVAL; 1946 1791 u32 key; 1947 - int ret; 1948 1792 1949 - ret = get_user(key, &iocb->aio_key); 1950 - if (unlikely(ret)) 1793 + if (unlikely(get_user(key, &iocb->aio_key))) 1951 1794 return -EFAULT; 1795 + if (unlikely(key != KIOCB_KEY)) 1796 + return -EINVAL; 1952 1797 1953 1798 ctx = lookup_ioctx(ctx_id); 1954 1799 if (unlikely(!ctx)) 1955 1800 return -EINVAL; 1956 1801 1957 1802 spin_lock_irq(&ctx->ctx_lock); 1958 - 1959 - kiocb = lookup_kiocb(ctx, iocb, key); 1960 - if (kiocb) 1961 - ret = kiocb_cancel(kiocb); 1962 - else 1963 - ret = -EINVAL; 1964 - 1803 + kiocb = lookup_kiocb(ctx, iocb); 1804 + if (kiocb) { 1805 + ret = kiocb->ki_cancel(&kiocb->rw); 1806 + list_del_init(&kiocb->ki_list); 1807 + } 1965 1808 spin_unlock_irq(&ctx->ctx_lock); 1966 1809 1967 1810 if (!ret) { ··· 2015 1860 struct timespec __user *, timeout) 2016 1861 { 2017 1862 struct timespec64 ts; 1863 + int ret; 2018 1864 2019 - if (timeout) { 2020 - if (unlikely(get_timespec64(&ts, timeout))) 1865 + if (timeout && unlikely(get_timespec64(&ts, timeout))) 1866 + return -EFAULT; 1867 + 1868 + ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); 1869 + if (!ret && signal_pending(current)) 1870 + ret = -EINTR; 1871 + return ret; 1872 + } 1873 + 1874 + SYSCALL_DEFINE6(io_pgetevents, 1875 + aio_context_t, ctx_id, 1876 + long, min_nr, 1877 + long, nr, 1878 + struct io_event __user *, events, 1879 + struct timespec __user *, timeout, 1880 + const struct __aio_sigset __user *, usig) 1881 + { 1882 + struct __aio_sigset ksig = { NULL, }; 1883 + sigset_t ksigmask, sigsaved; 1884 + struct timespec64 ts; 1885 + int ret; 1886 + 1887 + if (timeout && unlikely(get_timespec64(&ts, timeout))) 1888 + return -EFAULT; 1889 + 1890 + if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) 1891 + return -EFAULT; 1892 + 1893 + if (ksig.sigmask) { 1894 + if (ksig.sigsetsize != sizeof(sigset_t)) 1895 + return -EINVAL; 1896 + if (copy_from_user(&ksigmask, ksig.sigmask, sizeof(ksigmask))) 2021 1897 return -EFAULT; 1898 + sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); 1899 + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 2022 1900 } 2023 1901 2024 - return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); 1902 + ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); 1903 + if (signal_pending(current)) { 1904 + if (ksig.sigmask) { 1905 + current->saved_sigmask = sigsaved; 1906 + set_restore_sigmask(); 1907 + } 1908 + 1909 + if (!ret) 1910 + ret = -ERESTARTNOHAND; 1911 + } else { 1912 + if (ksig.sigmask) 1913 + sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1914 + } 1915 + 1916 + return ret; 2025 1917 } 2026 1918 2027 1919 #ifdef CONFIG_COMPAT ··· 2079 1877 struct compat_timespec __user *, timeout) 2080 1878 { 2081 1879 struct timespec64 t; 1880 + int ret; 2082 1881 2083 - if (timeout) { 2084 - if (compat_get_timespec64(&t, timeout)) 1882 + if (timeout && compat_get_timespec64(&t, timeout)) 1883 + return -EFAULT; 1884 + 1885 + ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); 1886 + if (!ret && signal_pending(current)) 1887 + ret = -EINTR; 1888 + return ret; 1889 + } 1890 + 1891 + 1892 + struct __compat_aio_sigset { 1893 + compat_sigset_t __user *sigmask; 1894 + compat_size_t sigsetsize; 1895 + }; 1896 + 1897 + COMPAT_SYSCALL_DEFINE6(io_pgetevents, 1898 + compat_aio_context_t, ctx_id, 1899 + compat_long_t, min_nr, 1900 + compat_long_t, nr, 1901 + struct io_event __user *, events, 1902 + struct compat_timespec __user *, timeout, 1903 + const struct __compat_aio_sigset __user *, usig) 1904 + { 1905 + struct __compat_aio_sigset ksig = { NULL, }; 1906 + sigset_t ksigmask, sigsaved; 1907 + struct timespec64 t; 1908 + int ret; 1909 + 1910 + if (timeout && compat_get_timespec64(&t, timeout)) 1911 + return -EFAULT; 1912 + 1913 + if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) 1914 + return -EFAULT; 1915 + 1916 + if (ksig.sigmask) { 1917 + if (ksig.sigsetsize != sizeof(compat_sigset_t)) 1918 + return -EINVAL; 1919 + if (get_compat_sigset(&ksigmask, ksig.sigmask)) 2085 1920 return -EFAULT; 2086 - 1921 + sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); 1922 + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 2087 1923 } 2088 1924 2089 - return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); 1925 + ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); 1926 + if (signal_pending(current)) { 1927 + if (ksig.sigmask) { 1928 + current->saved_sigmask = sigsaved; 1929 + set_restore_sigmask(); 1930 + } 1931 + if (!ret) 1932 + ret = -ERESTARTNOHAND; 1933 + } else { 1934 + if (ksig.sigmask) 1935 + sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1936 + } 1937 + 1938 + return ret; 2090 1939 } 2091 1940 #endif
+11 -4
fs/eventfd.c
··· 101 101 return 0; 102 102 } 103 103 104 - static __poll_t eventfd_poll(struct file *file, poll_table *wait) 104 + static struct wait_queue_head * 105 + eventfd_get_poll_head(struct file *file, __poll_t events) 106 + { 107 + struct eventfd_ctx *ctx = file->private_data; 108 + 109 + return &ctx->wqh; 110 + } 111 + 112 + static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask) 105 113 { 106 114 struct eventfd_ctx *ctx = file->private_data; 107 115 __poll_t events = 0; 108 116 u64 count; 109 - 110 - poll_wait(file, &ctx->wqh, wait); 111 117 112 118 /* 113 119 * All writes to ctx->count occur within ctx->wqh.lock. This read ··· 311 305 .show_fdinfo = eventfd_show_fdinfo, 312 306 #endif 313 307 .release = eventfd_release, 314 - .poll = eventfd_poll, 308 + .get_poll_head = eventfd_get_poll_head, 309 + .poll_mask = eventfd_poll_mask, 315 310 .read = eventfd_read, 316 311 .write = eventfd_write, 317 312 .llseek = noop_llseek,
+2 -3
fs/eventpoll.c
··· 884 884 885 885 pt->_key = epi->event.events; 886 886 if (!is_file_epoll(epi->ffd.file)) 887 - return epi->ffd.file->f_op->poll(epi->ffd.file, pt) & 888 - epi->event.events; 887 + return vfs_poll(epi->ffd.file, pt) & epi->event.events; 889 888 890 889 ep = epi->ffd.file->private_data; 891 890 poll_wait(epi->ffd.file, &ep->poll_wait, pt); ··· 2024 2025 2025 2026 /* The target file descriptor must support poll */ 2026 2027 error = -EPERM; 2027 - if (!tf.file->f_op->poll) 2028 + if (!file_can_poll(tf.file)) 2028 2029 goto error_tgt_fput; 2029 2030 2030 2031 /* Check if EPOLLWAKEUP is allowed */
+13 -9
fs/pipe.c
··· 509 509 } 510 510 } 511 511 512 - /* No kernel lock held - fine */ 513 - static __poll_t 514 - pipe_poll(struct file *filp, poll_table *wait) 512 + static struct wait_queue_head * 513 + pipe_get_poll_head(struct file *filp, __poll_t events) 515 514 { 516 - __poll_t mask; 517 515 struct pipe_inode_info *pipe = filp->private_data; 518 - int nrbufs; 519 516 520 - poll_wait(filp, &pipe->wait, wait); 517 + return &pipe->wait; 518 + } 519 + 520 + /* No kernel lock held - fine */ 521 + static __poll_t pipe_poll_mask(struct file *filp, __poll_t events) 522 + { 523 + struct pipe_inode_info *pipe = filp->private_data; 524 + int nrbufs = pipe->nrbufs; 525 + __poll_t mask = 0; 521 526 522 527 /* Reading only -- no need for acquiring the semaphore. */ 523 - nrbufs = pipe->nrbufs; 524 - mask = 0; 525 528 if (filp->f_mode & FMODE_READ) { 526 529 mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0; 527 530 if (!pipe->writers && filp->f_version != pipe->w_counter) ··· 1023 1020 .llseek = no_llseek, 1024 1021 .read_iter = pipe_read, 1025 1022 .write_iter = pipe_write, 1026 - .poll = pipe_poll, 1023 + .get_poll_head = pipe_get_poll_head, 1024 + .poll_mask = pipe_poll_mask, 1027 1025 .unlocked_ioctl = pipe_ioctl, 1028 1026 .release = pipe_release, 1029 1027 .fasync = pipe_fasync,
+48 -35
fs/select.c
··· 34 34 35 35 #include <linux/uaccess.h> 36 36 37 + __poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) 38 + { 39 + if (file->f_op->poll) { 40 + return file->f_op->poll(file, pt); 41 + } else if (file_has_poll_mask(file)) { 42 + unsigned int events = poll_requested_events(pt); 43 + struct wait_queue_head *head; 44 + 45 + if (pt && pt->_qproc) { 46 + head = file->f_op->get_poll_head(file, events); 47 + if (!head) 48 + return DEFAULT_POLLMASK; 49 + if (IS_ERR(head)) 50 + return EPOLLERR; 51 + pt->_qproc(file, head, pt); 52 + } 53 + 54 + return file->f_op->poll_mask(file, events); 55 + } else { 56 + return DEFAULT_POLLMASK; 57 + } 58 + } 59 + EXPORT_SYMBOL_GPL(vfs_poll); 37 60 38 61 /* 39 62 * Estimate expected accuracy in ns from a timeval. ··· 256 233 add_wait_queue(wait_address, &entry->wait); 257 234 } 258 235 259 - int poll_schedule_timeout(struct poll_wqueues *pwq, int state, 236 + static int poll_schedule_timeout(struct poll_wqueues *pwq, int state, 260 237 ktime_t *expires, unsigned long slack) 261 238 { 262 239 int rc = -EINTR; ··· 281 258 282 259 return rc; 283 260 } 284 - EXPORT_SYMBOL(poll_schedule_timeout); 285 261 286 262 /** 287 263 * poll_select_set_timeout - helper function to setup the timeout value ··· 525 503 continue; 526 504 f = fdget(i); 527 505 if (f.file) { 528 - const struct file_operations *f_op; 529 - f_op = f.file->f_op; 530 - mask = DEFAULT_POLLMASK; 531 - if (f_op->poll) { 532 - wait_key_set(wait, in, out, 533 - bit, busy_flag); 534 - mask = (*f_op->poll)(f.file, wait); 535 - } 506 + wait_key_set(wait, in, out, bit, 507 + busy_flag); 508 + mask = vfs_poll(f.file, wait); 509 + 536 510 fdput(f); 537 511 if ((mask & POLLIN_SET) && (in & bit)) { 538 512 res_in |= bit; ··· 831 813 bool *can_busy_poll, 832 814 __poll_t busy_flag) 833 815 { 834 - __poll_t mask; 835 - int fd; 816 + int fd = pollfd->fd; 817 + __poll_t mask = 0, filter; 818 + struct fd f; 836 819 837 - mask = 0; 838 - fd = pollfd->fd; 839 - if (fd >= 0) { 840 - struct fd f = fdget(fd); 841 - mask = EPOLLNVAL; 842 - if (f.file) { 843 - /* userland u16 ->events contains POLL... bitmap */ 844 - __poll_t filter = demangle_poll(pollfd->events) | 845 - EPOLLERR | EPOLLHUP; 846 - mask = DEFAULT_POLLMASK; 847 - if (f.file->f_op->poll) { 848 - pwait->_key = filter; 849 - pwait->_key |= busy_flag; 850 - mask = f.file->f_op->poll(f.file, pwait); 851 - if (mask & busy_flag) 852 - *can_busy_poll = true; 853 - } 854 - /* Mask out unneeded events. */ 855 - mask &= filter; 856 - fdput(f); 857 - } 858 - } 820 + if (fd < 0) 821 + goto out; 822 + mask = EPOLLNVAL; 823 + f = fdget(fd); 824 + if (!f.file) 825 + goto out; 826 + 827 + /* userland u16 ->events contains POLL... bitmap */ 828 + filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP; 829 + pwait->_key = filter | busy_flag; 830 + mask = vfs_poll(f.file, pwait); 831 + if (mask & busy_flag) 832 + *can_busy_poll = true; 833 + mask &= filter; /* Mask out unneeded events. */ 834 + fdput(f); 835 + 836 + out: 859 837 /* ... and so does ->revents */ 860 838 pollfd->revents = mangle_poll(mask); 861 - 862 839 return mask; 863 840 } 864 841
+11 -11
fs/timerfd.c
··· 226 226 kfree_rcu(ctx, rcu); 227 227 return 0; 228 228 } 229 - 230 - static __poll_t timerfd_poll(struct file *file, poll_table *wait) 229 + 230 + static struct wait_queue_head *timerfd_get_poll_head(struct file *file, 231 + __poll_t eventmask) 231 232 { 232 233 struct timerfd_ctx *ctx = file->private_data; 233 - __poll_t events = 0; 234 - unsigned long flags; 235 234 236 - poll_wait(file, &ctx->wqh, wait); 235 + return &ctx->wqh; 236 + } 237 237 238 - spin_lock_irqsave(&ctx->wqh.lock, flags); 239 - if (ctx->ticks) 240 - events |= EPOLLIN; 241 - spin_unlock_irqrestore(&ctx->wqh.lock, flags); 238 + static __poll_t timerfd_poll_mask(struct file *file, __poll_t eventmask) 239 + { 240 + struct timerfd_ctx *ctx = file->private_data; 242 241 243 - return events; 242 + return ctx->ticks ? EPOLLIN : 0; 244 243 } 245 244 246 245 static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, ··· 363 364 364 365 static const struct file_operations timerfd_fops = { 365 366 .release = timerfd_release, 366 - .poll = timerfd_poll, 367 + .get_poll_head = timerfd_get_poll_head, 368 + .poll_mask = timerfd_poll_mask, 367 369 .read = timerfd_read, 368 370 .llseek = noop_llseek, 369 371 .show_fdinfo = timerfd_show,
+1 -2
include/crypto/if_alg.h
··· 245 245 int offset, size_t size, int flags); 246 246 void af_alg_free_resources(struct af_alg_async_req *areq); 247 247 void af_alg_async_cb(struct crypto_async_request *_req, int err); 248 - __poll_t af_alg_poll(struct file *file, struct socket *sock, 249 - poll_table *wait); 248 + __poll_t af_alg_poll_mask(struct socket *sock, __poll_t events); 250 249 struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk, 251 250 unsigned int areqlen); 252 251 int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
-2
include/linux/aio.h
··· 8 8 struct kiocb; 9 9 struct mm_struct; 10 10 11 - #define KIOCB_KEY 0 12 - 13 11 typedef int (kiocb_cancel_fn)(struct kiocb *); 14 12 15 13 /* prototypes */
+7
include/linux/compat.h
··· 330 330 struct compat_rusage __user *); 331 331 332 332 struct compat_siginfo; 333 + struct __compat_aio_sigset; 333 334 334 335 struct compat_dirent { 335 336 u32 d_ino; ··· 554 553 compat_long_t nr, 555 554 struct io_event __user *events, 556 555 struct compat_timespec __user *timeout); 556 + asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id, 557 + compat_long_t min_nr, 558 + compat_long_t nr, 559 + struct io_event __user *events, 560 + struct compat_timespec __user *timeout, 561 + const struct __compat_aio_sigset __user *usig); 557 562 558 563 /* fs/cookies.c */ 559 564 asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t);
+2
include/linux/fs.h
··· 1711 1711 int (*iterate) (struct file *, struct dir_context *); 1712 1712 int (*iterate_shared) (struct file *, struct dir_context *); 1713 1713 __poll_t (*poll) (struct file *, struct poll_table_struct *); 1714 + struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); 1715 + __poll_t (*poll_mask) (struct file *, __poll_t); 1714 1716 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); 1715 1717 long (*compat_ioctl) (struct file *, unsigned int, unsigned long); 1716 1718 int (*mmap) (struct file *, struct vm_area_struct *);
+1
include/linux/net.h
··· 147 147 int (*getname) (struct socket *sock, 148 148 struct sockaddr *addr, 149 149 int peer); 150 + __poll_t (*poll_mask) (struct socket *sock, __poll_t events); 150 151 __poll_t (*poll) (struct file *file, struct socket *sock, 151 152 struct poll_table_struct *wait); 152 153 int (*ioctl) (struct socket *sock, unsigned int cmd,
+12 -2
include/linux/poll.h
··· 74 74 pt->_key = ~(__poll_t)0; /* all events enabled */ 75 75 } 76 76 77 + static inline bool file_has_poll_mask(struct file *file) 78 + { 79 + return file->f_op->get_poll_head && file->f_op->poll_mask; 80 + } 81 + 82 + static inline bool file_can_poll(struct file *file) 83 + { 84 + return file->f_op->poll || file_has_poll_mask(file); 85 + } 86 + 87 + __poll_t vfs_poll(struct file *file, struct poll_table_struct *pt); 88 + 77 89 struct poll_table_entry { 78 90 struct file *filp; 79 91 __poll_t key; ··· 108 96 109 97 extern void poll_initwait(struct poll_wqueues *pwq); 110 98 extern void poll_freewait(struct poll_wqueues *pwq); 111 - extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, 112 - ktime_t *expires, unsigned long slack); 113 99 extern u64 select_estimate_accuracy(struct timespec64 *tv); 114 100 115 101 #define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1)
+1 -2
include/linux/skbuff.h
··· 3250 3250 int *peeked, int *off, int *err); 3251 3251 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, 3252 3252 int *err); 3253 - __poll_t datagram_poll(struct file *file, struct socket *sock, 3254 - struct poll_table_struct *wait); 3253 + __poll_t datagram_poll_mask(struct socket *sock, __poll_t events); 3255 3254 int skb_copy_datagram_iter(const struct sk_buff *from, int offset, 3256 3255 struct iov_iter *to, int size); 3257 3256 static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
+6
include/linux/syscalls.h
··· 290 290 long nr, 291 291 struct io_event __user *events, 292 292 struct timespec __user *timeout); 293 + asmlinkage long sys_io_pgetevents(aio_context_t ctx_id, 294 + long min_nr, 295 + long nr, 296 + struct io_event __user *events, 297 + struct timespec __user *timeout, 298 + const struct __aio_sigset *sig); 293 299 294 300 /* fs/xattr.c */ 295 301 asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
+1 -1
include/net/bluetooth/bluetooth.h
··· 271 271 int flags); 272 272 int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, 273 273 size_t len, int flags); 274 - __poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); 274 + __poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events); 275 275 int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 276 276 int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); 277 277 int bt_sock_wait_ready(struct sock *sk, unsigned long flags);
+15
include/net/busy_poll.h
··· 121 121 #endif 122 122 } 123 123 124 + static inline void sock_poll_busy_loop(struct socket *sock, __poll_t events) 125 + { 126 + if (sk_can_busy_loop(sock->sk) && 127 + events && (events & POLL_BUSY_LOOP)) { 128 + /* once, only if requested by syscall */ 129 + sk_busy_loop(sock->sk, 1); 130 + } 131 + } 132 + 133 + /* if this socket can poll_ll, tell the system call */ 134 + static inline __poll_t sock_poll_busy_flag(struct socket *sock) 135 + { 136 + return sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0; 137 + } 138 + 124 139 /* used in the NIC receive handler to mark the skb */ 125 140 static inline void skb_mark_napi_id(struct sk_buff *skb, 126 141 struct napi_struct *napi)
-2
include/net/iucv/af_iucv.h
··· 153 153 atomic_t autobind_name; 154 154 }; 155 155 156 - __poll_t iucv_sock_poll(struct file *file, struct socket *sock, 157 - poll_table *wait); 158 156 void iucv_sock_link(struct iucv_sock_list *l, struct sock *s); 159 157 void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s); 160 158 void iucv_accept_enqueue(struct sock *parent, struct sock *sk);
+1 -2
include/net/sctp/sctp.h
··· 109 109 int sctp_inet_listen(struct socket *sock, int backlog); 110 110 void sctp_write_space(struct sock *sk); 111 111 void sctp_data_ready(struct sock *sk); 112 - __poll_t sctp_poll(struct file *file, struct socket *sock, 113 - poll_table *wait); 112 + __poll_t sctp_poll_mask(struct socket *sock, __poll_t events); 114 113 void sctp_sock_rfree(struct sk_buff *skb); 115 114 void sctp_copy_sock(struct sock *newsk, struct sock *sk, 116 115 struct sctp_association *asoc);
-2
include/net/sock.h
··· 1591 1591 int sock_no_socketpair(struct socket *, struct socket *); 1592 1592 int sock_no_accept(struct socket *, struct socket *, int, bool); 1593 1593 int sock_no_getname(struct socket *, struct sockaddr *, int); 1594 - __poll_t sock_no_poll(struct file *, struct socket *, 1595 - struct poll_table_struct *); 1596 1594 int sock_no_ioctl(struct socket *, unsigned int, unsigned long); 1597 1595 int sock_no_listen(struct socket *, int); 1598 1596 int sock_no_shutdown(struct socket *, int);
+1 -2
include/net/tcp.h
··· 388 388 void tcp_close(struct sock *sk, long timeout); 389 389 void tcp_init_sock(struct sock *sk); 390 390 void tcp_init_transfer(struct sock *sk, int bpf_op); 391 - __poll_t tcp_poll(struct file *file, struct socket *sock, 392 - struct poll_table_struct *wait); 391 + __poll_t tcp_poll_mask(struct socket *sock, __poll_t events); 393 392 int tcp_getsockopt(struct sock *sk, int level, int optname, 394 393 char __user *optval, int __user *optlen); 395 394 int tcp_setsockopt(struct sock *sk, int level, int optname,
+1 -1
include/net/udp.h
··· 276 276 int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); 277 277 int __udp_disconnect(struct sock *sk, int flags); 278 278 int udp_disconnect(struct sock *sk, int flags); 279 - __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); 279 + __poll_t udp_poll_mask(struct socket *sock, __poll_t events); 280 280 struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, 281 281 netdev_features_t features, 282 282 bool is_ipv6);
+3 -1
include/uapi/asm-generic/unistd.h
··· 732 732 __SYSCALL(__NR_pkey_free, sys_pkey_free) 733 733 #define __NR_statx 291 734 734 __SYSCALL(__NR_statx, sys_statx) 735 + #define __NR_io_pgetevents 292 736 + __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents) 735 737 736 738 #undef __NR_syscalls 737 - #define __NR_syscalls 292 739 + #define __NR_syscalls 293 738 740 739 741 /* 740 742 * 32 bit systems traditionally used different
+8 -4
include/uapi/linux/aio_abi.h
··· 29 29 30 30 #include <linux/types.h> 31 31 #include <linux/fs.h> 32 + #include <linux/signal.h> 32 33 #include <asm/byteorder.h> 33 34 34 35 typedef __kernel_ulong_t aio_context_t; ··· 39 38 IOCB_CMD_PWRITE = 1, 40 39 IOCB_CMD_FSYNC = 2, 41 40 IOCB_CMD_FDSYNC = 3, 42 - /* These two are experimental. 43 - * IOCB_CMD_PREADX = 4, 44 - * IOCB_CMD_POLL = 5, 45 - */ 41 + /* 4 was the experimental IOCB_CMD_PREADX */ 42 + IOCB_CMD_POLL = 5, 46 43 IOCB_CMD_NOOP = 6, 47 44 IOCB_CMD_PREADV = 7, 48 45 IOCB_CMD_PWRITEV = 8, ··· 106 107 107 108 #undef IFBIG 108 109 #undef IFLITTLE 110 + 111 + struct __aio_sigset { 112 + sigset_t __user *sigmask; 113 + size_t sigsetsize; 114 + }; 109 115 110 116 #endif /* __LINUX__AIO_ABI_H */ 111 117
-4
include/uapi/linux/types.h
··· 49 49 #define __aligned_be64 __be64 __attribute__((aligned(8))) 50 50 #define __aligned_le64 __le64 __attribute__((aligned(8))) 51 51 52 - #ifdef __CHECK_POLL 53 52 typedef unsigned __bitwise __poll_t; 54 - #else 55 - typedef unsigned __poll_t; 56 - #endif 57 53 58 54 #endif /* __ASSEMBLY__ */ 59 55 #endif /* _UAPI_LINUX_TYPES_H */
+2
kernel/sys_ni.c
··· 43 43 COND_SYSCALL_COMPAT(io_submit); 44 44 COND_SYSCALL(io_cancel); 45 45 COND_SYSCALL(io_getevents); 46 + COND_SYSCALL(io_pgetevents); 46 47 COND_SYSCALL_COMPAT(io_getevents); 48 + COND_SYSCALL_COMPAT(io_pgetevents); 47 49 48 50 /* fs/xattr.c */ 49 51
+1 -1
mm/memcontrol.c
··· 3849 3849 if (ret) 3850 3850 goto out_put_css; 3851 3851 3852 - efile.file->f_op->poll(efile.file, &event->pt); 3852 + vfs_poll(efile.file, &event->pt); 3853 3853 3854 3854 spin_lock(&memcg->event_list_lock); 3855 3855 list_add(&event->list, &memcg->event_list);
+4 -14
net/9p/trans_fd.c
··· 231 231 static __poll_t 232 232 p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err) 233 233 { 234 - __poll_t ret, n; 234 + __poll_t ret; 235 235 struct p9_trans_fd *ts = NULL; 236 236 237 237 if (client && client->status == Connected) ··· 243 243 return EPOLLERR; 244 244 } 245 245 246 - if (!ts->rd->f_op->poll) 247 - ret = DEFAULT_POLLMASK; 248 - else 249 - ret = ts->rd->f_op->poll(ts->rd, pt); 250 - 251 - if (ts->rd != ts->wr) { 252 - if (!ts->wr->f_op->poll) 253 - n = DEFAULT_POLLMASK; 254 - else 255 - n = ts->wr->f_op->poll(ts->wr, pt); 256 - ret = (ret & ~EPOLLOUT) | (n & ~EPOLLIN); 257 - } 258 - 246 + ret = vfs_poll(ts->rd, pt); 247 + if (ts->rd != ts->wr) 248 + ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN); 259 249 return ret; 260 250 } 261 251
+1 -1
net/appletalk/ddp.c
··· 1869 1869 .socketpair = sock_no_socketpair, 1870 1870 .accept = sock_no_accept, 1871 1871 .getname = atalk_getname, 1872 - .poll = datagram_poll, 1872 + .poll_mask = datagram_poll_mask, 1873 1873 .ioctl = atalk_ioctl, 1874 1874 #ifdef CONFIG_COMPAT 1875 1875 .compat_ioctl = atalk_compat_ioctl,
+3 -8
net/atm/common.c
··· 648 648 return error; 649 649 } 650 650 651 - __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) 651 + __poll_t vcc_poll_mask(struct socket *sock, __poll_t events) 652 652 { 653 653 struct sock *sk = sock->sk; 654 - struct atm_vcc *vcc; 655 - __poll_t mask; 656 - 657 - sock_poll_wait(file, sk_sleep(sk), wait); 658 - mask = 0; 659 - 660 - vcc = ATM_SD(sock); 654 + struct atm_vcc *vcc = ATM_SD(sock); 655 + __poll_t mask = 0; 661 656 662 657 /* exceptional events */ 663 658 if (sk->sk_err)
+1 -1
net/atm/common.h
··· 17 17 int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 18 18 int flags); 19 19 int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len); 20 - __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait); 20 + __poll_t vcc_poll_mask(struct socket *sock, __poll_t events); 21 21 int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 22 22 int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 23 23 int vcc_setsockopt(struct socket *sock, int level, int optname,
+1 -1
net/atm/pvc.c
··· 113 113 .socketpair = sock_no_socketpair, 114 114 .accept = sock_no_accept, 115 115 .getname = pvc_getname, 116 - .poll = vcc_poll, 116 + .poll_mask = vcc_poll_mask, 117 117 .ioctl = vcc_ioctl, 118 118 #ifdef CONFIG_COMPAT 119 119 .compat_ioctl = vcc_compat_ioctl,
+1 -1
net/atm/svc.c
··· 636 636 .socketpair = sock_no_socketpair, 637 637 .accept = svc_accept, 638 638 .getname = svc_getname, 639 - .poll = vcc_poll, 639 + .poll_mask = vcc_poll_mask, 640 640 .ioctl = svc_ioctl, 641 641 #ifdef CONFIG_COMPAT 642 642 .compat_ioctl = svc_compat_ioctl,
+1 -1
net/ax25/af_ax25.c
··· 1941 1941 .socketpair = sock_no_socketpair, 1942 1942 .accept = ax25_accept, 1943 1943 .getname = ax25_getname, 1944 - .poll = datagram_poll, 1944 + .poll_mask = datagram_poll_mask, 1945 1945 .ioctl = ax25_ioctl, 1946 1946 .listen = ax25_listen, 1947 1947 .shutdown = ax25_shutdown,
+2 -5
net/bluetooth/af_bluetooth.c
··· 437 437 return 0; 438 438 } 439 439 440 - __poll_t bt_sock_poll(struct file *file, struct socket *sock, 441 - poll_table *wait) 440 + __poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events) 442 441 { 443 442 struct sock *sk = sock->sk; 444 443 __poll_t mask = 0; 445 444 446 445 BT_DBG("sock %p, sk %p", sock, sk); 447 - 448 - poll_wait(file, sk_sleep(sk), wait); 449 446 450 447 if (sk->sk_state == BT_LISTEN) 451 448 return bt_accept_poll(sk); ··· 475 478 476 479 return mask; 477 480 } 478 - EXPORT_SYMBOL(bt_sock_poll); 481 + EXPORT_SYMBOL(bt_sock_poll_mask); 479 482 480 483 int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 481 484 {
-1
net/bluetooth/bnep/sock.c
··· 175 175 .getname = sock_no_getname, 176 176 .sendmsg = sock_no_sendmsg, 177 177 .recvmsg = sock_no_recvmsg, 178 - .poll = sock_no_poll, 179 178 .listen = sock_no_listen, 180 179 .shutdown = sock_no_shutdown, 181 180 .setsockopt = sock_no_setsockopt,
-1
net/bluetooth/cmtp/sock.c
··· 178 178 .getname = sock_no_getname, 179 179 .sendmsg = sock_no_sendmsg, 180 180 .recvmsg = sock_no_recvmsg, 181 - .poll = sock_no_poll, 182 181 .listen = sock_no_listen, 183 182 .shutdown = sock_no_shutdown, 184 183 .setsockopt = sock_no_setsockopt,
+1 -1
net/bluetooth/hci_sock.c
··· 1975 1975 .sendmsg = hci_sock_sendmsg, 1976 1976 .recvmsg = hci_sock_recvmsg, 1977 1977 .ioctl = hci_sock_ioctl, 1978 - .poll = datagram_poll, 1978 + .poll_mask = datagram_poll_mask, 1979 1979 .listen = sock_no_listen, 1980 1980 .shutdown = sock_no_shutdown, 1981 1981 .setsockopt = hci_sock_setsockopt,
-1
net/bluetooth/hidp/sock.c
··· 208 208 .getname = sock_no_getname, 209 209 .sendmsg = sock_no_sendmsg, 210 210 .recvmsg = sock_no_recvmsg, 211 - .poll = sock_no_poll, 212 211 .listen = sock_no_listen, 213 212 .shutdown = sock_no_shutdown, 214 213 .setsockopt = sock_no_setsockopt,
+1 -1
net/bluetooth/l2cap_sock.c
··· 1653 1653 .getname = l2cap_sock_getname, 1654 1654 .sendmsg = l2cap_sock_sendmsg, 1655 1655 .recvmsg = l2cap_sock_recvmsg, 1656 - .poll = bt_sock_poll, 1656 + .poll_mask = bt_sock_poll_mask, 1657 1657 .ioctl = bt_sock_ioctl, 1658 1658 .mmap = sock_no_mmap, 1659 1659 .socketpair = sock_no_socketpair,
+1 -1
net/bluetooth/rfcomm/sock.c
··· 1049 1049 .setsockopt = rfcomm_sock_setsockopt, 1050 1050 .getsockopt = rfcomm_sock_getsockopt, 1051 1051 .ioctl = rfcomm_sock_ioctl, 1052 - .poll = bt_sock_poll, 1052 + .poll_mask = bt_sock_poll_mask, 1053 1053 .socketpair = sock_no_socketpair, 1054 1054 .mmap = sock_no_mmap 1055 1055 };
+1 -1
net/bluetooth/sco.c
··· 1197 1197 .getname = sco_sock_getname, 1198 1198 .sendmsg = sco_sock_sendmsg, 1199 1199 .recvmsg = sco_sock_recvmsg, 1200 - .poll = bt_sock_poll, 1200 + .poll_mask = bt_sock_poll_mask, 1201 1201 .ioctl = bt_sock_ioctl, 1202 1202 .mmap = sock_no_mmap, 1203 1203 .socketpair = sock_no_socketpair,
+4 -8
net/caif/caif_socket.c
··· 934 934 } 935 935 936 936 /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ 937 - static __poll_t caif_poll(struct file *file, 938 - struct socket *sock, poll_table *wait) 937 + static __poll_t caif_poll_mask(struct socket *sock, __poll_t events) 939 938 { 940 939 struct sock *sk = sock->sk; 941 - __poll_t mask; 942 940 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 943 - 944 - sock_poll_wait(file, sk_sleep(sk), wait); 945 - mask = 0; 941 + __poll_t mask = 0; 946 942 947 943 /* exceptional events? */ 948 944 if (sk->sk_err) ··· 972 976 .socketpair = sock_no_socketpair, 973 977 .accept = sock_no_accept, 974 978 .getname = sock_no_getname, 975 - .poll = caif_poll, 979 + .poll_mask = caif_poll_mask, 976 980 .ioctl = sock_no_ioctl, 977 981 .listen = sock_no_listen, 978 982 .shutdown = sock_no_shutdown, ··· 993 997 .socketpair = sock_no_socketpair, 994 998 .accept = sock_no_accept, 995 999 .getname = sock_no_getname, 996 - .poll = caif_poll, 1000 + .poll_mask = caif_poll_mask, 997 1001 .ioctl = sock_no_ioctl, 998 1002 .listen = sock_no_listen, 999 1003 .shutdown = sock_no_shutdown,
+1 -1
net/can/bcm.c
··· 1657 1657 .socketpair = sock_no_socketpair, 1658 1658 .accept = sock_no_accept, 1659 1659 .getname = sock_no_getname, 1660 - .poll = datagram_poll, 1660 + .poll_mask = datagram_poll_mask, 1661 1661 .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ 1662 1662 .listen = sock_no_listen, 1663 1663 .shutdown = sock_no_shutdown,
+1 -1
net/can/raw.c
··· 843 843 .socketpair = sock_no_socketpair, 844 844 .accept = sock_no_accept, 845 845 .getname = raw_getname, 846 - .poll = datagram_poll, 846 + .poll_mask = datagram_poll_mask, 847 847 .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ 848 848 .listen = sock_no_listen, 849 849 .shutdown = sock_no_shutdown,
+4 -9
net/core/datagram.c
··· 819 819 820 820 /** 821 821 * datagram_poll - generic datagram poll 822 - * @file: file struct 823 822 * @sock: socket 824 - * @wait: poll table 823 + * @events to wait for 825 824 * 826 825 * Datagram poll: Again totally generic. This also handles 827 826 * sequenced packet sockets providing the socket receive queue ··· 830 831 * and you use a different write policy from sock_writeable() 831 832 * then please supply your own write_space callback. 832 833 */ 833 - __poll_t datagram_poll(struct file *file, struct socket *sock, 834 - poll_table *wait) 834 + __poll_t datagram_poll_mask(struct socket *sock, __poll_t events) 835 835 { 836 836 struct sock *sk = sock->sk; 837 - __poll_t mask; 838 - 839 - sock_poll_wait(file, sk_sleep(sk), wait); 840 - mask = 0; 837 + __poll_t mask = 0; 841 838 842 839 /* exceptional events? */ 843 840 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) ··· 866 871 867 872 return mask; 868 873 } 869 - EXPORT_SYMBOL(datagram_poll); 874 + EXPORT_SYMBOL(datagram_poll_mask);
-6
net/core/sock.c
··· 2567 2567 } 2568 2568 EXPORT_SYMBOL(sock_no_getname); 2569 2569 2570 - __poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) 2571 - { 2572 - return 0; 2573 - } 2574 - EXPORT_SYMBOL(sock_no_poll); 2575 - 2576 2570 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 2577 2571 { 2578 2572 return -EOPNOTSUPP;
+1 -2
net/dccp/dccp.h
··· 316 316 int flags, int *addr_len); 317 317 void dccp_shutdown(struct sock *sk, int how); 318 318 int inet_dccp_listen(struct socket *sock, int backlog); 319 - __poll_t dccp_poll(struct file *file, struct socket *sock, 320 - poll_table *wait); 319 + __poll_t dccp_poll_mask(struct socket *sock, __poll_t events); 321 320 int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); 322 321 void dccp_req_err(struct sock *sk, u64 seq); 323 322
+1 -1
net/dccp/ipv4.c
··· 984 984 .accept = inet_accept, 985 985 .getname = inet_getname, 986 986 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ 987 - .poll = dccp_poll, 987 + .poll_mask = dccp_poll_mask, 988 988 .ioctl = inet_ioctl, 989 989 /* FIXME: work on inet_listen to rename it to sock_common_listen */ 990 990 .listen = inet_dccp_listen,
+1 -1
net/dccp/ipv6.c
··· 1070 1070 .socketpair = sock_no_socketpair, 1071 1071 .accept = inet_accept, 1072 1072 .getname = inet6_getname, 1073 - .poll = dccp_poll, 1073 + .poll_mask = dccp_poll_mask, 1074 1074 .ioctl = inet6_ioctl, 1075 1075 .listen = inet_dccp_listen, 1076 1076 .shutdown = inet_shutdown,
+2 -11
net/dccp/proto.c
··· 312 312 313 313 EXPORT_SYMBOL_GPL(dccp_disconnect); 314 314 315 - /* 316 - * Wait for a DCCP event. 317 - * 318 - * Note that we don't need to lock the socket, as the upper poll layers 319 - * take care of normal races (between the test and the event) and we don't 320 - * go look at any of the socket buffers directly. 321 - */ 322 - __poll_t dccp_poll(struct file *file, struct socket *sock, 323 - poll_table *wait) 315 + __poll_t dccp_poll_mask(struct socket *sock, __poll_t events) 324 316 { 325 317 __poll_t mask; 326 318 struct sock *sk = sock->sk; 327 319 328 - sock_poll_wait(file, sk_sleep(sk), wait); 329 320 if (sk->sk_state == DCCP_LISTEN) 330 321 return inet_csk_listen_poll(sk); 331 322 ··· 358 367 return mask; 359 368 } 360 369 361 - EXPORT_SYMBOL_GPL(dccp_poll); 370 + EXPORT_SYMBOL_GPL(dccp_poll_mask); 362 371 363 372 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) 364 373 {
+3 -3
net/decnet/af_decnet.c
··· 1207 1207 } 1208 1208 1209 1209 1210 - static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait) 1210 + static __poll_t dn_poll_mask(struct socket *sock, __poll_t events) 1211 1211 { 1212 1212 struct sock *sk = sock->sk; 1213 1213 struct dn_scp *scp = DN_SK(sk); 1214 - __poll_t mask = datagram_poll(file, sock, wait); 1214 + __poll_t mask = datagram_poll_mask(sock, events); 1215 1215 1216 1216 if (!skb_queue_empty(&scp->other_receive_queue)) 1217 1217 mask |= EPOLLRDBAND; ··· 2331 2331 .socketpair = sock_no_socketpair, 2332 2332 .accept = dn_accept, 2333 2333 .getname = dn_getname, 2334 - .poll = dn_poll, 2334 + .poll_mask = dn_poll_mask, 2335 2335 .ioctl = dn_ioctl, 2336 2336 .listen = dn_listen, 2337 2337 .shutdown = dn_shutdown,
+2 -2
net/ieee802154/socket.c
··· 423 423 .socketpair = sock_no_socketpair, 424 424 .accept = sock_no_accept, 425 425 .getname = sock_no_getname, 426 - .poll = datagram_poll, 426 + .poll_mask = datagram_poll_mask, 427 427 .ioctl = ieee802154_sock_ioctl, 428 428 .listen = sock_no_listen, 429 429 .shutdown = sock_no_shutdown, ··· 969 969 .socketpair = sock_no_socketpair, 970 970 .accept = sock_no_accept, 971 971 .getname = sock_no_getname, 972 - .poll = datagram_poll, 972 + .poll_mask = datagram_poll_mask, 973 973 .ioctl = ieee802154_sock_ioctl, 974 974 .listen = sock_no_listen, 975 975 .shutdown = sock_no_shutdown,
+4 -4
net/ipv4/af_inet.c
··· 986 986 .socketpair = sock_no_socketpair, 987 987 .accept = inet_accept, 988 988 .getname = inet_getname, 989 - .poll = tcp_poll, 989 + .poll_mask = tcp_poll_mask, 990 990 .ioctl = inet_ioctl, 991 991 .listen = inet_listen, 992 992 .shutdown = inet_shutdown, ··· 1018 1018 .socketpair = sock_no_socketpair, 1019 1019 .accept = sock_no_accept, 1020 1020 .getname = inet_getname, 1021 - .poll = udp_poll, 1021 + .poll_mask = udp_poll_mask, 1022 1022 .ioctl = inet_ioctl, 1023 1023 .listen = sock_no_listen, 1024 1024 .shutdown = inet_shutdown, ··· 1039 1039 1040 1040 /* 1041 1041 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without 1042 - * udp_poll 1042 + * udp_poll_mask 1043 1043 */ 1044 1044 static const struct proto_ops inet_sockraw_ops = { 1045 1045 .family = PF_INET, ··· 1050 1050 .socketpair = sock_no_socketpair, 1051 1051 .accept = sock_no_accept, 1052 1052 .getname = inet_getname, 1053 - .poll = datagram_poll, 1053 + .poll_mask = datagram_poll_mask, 1054 1054 .ioctl = inet_ioctl, 1055 1055 .listen = sock_no_listen, 1056 1056 .shutdown = inet_shutdown,
+6 -17
net/ipv4/tcp.c
··· 494 494 } 495 495 496 496 /* 497 - * Wait for a TCP event. 498 - * 499 - * Note that we don't need to lock the socket, as the upper poll layers 500 - * take care of normal races (between the test and the event) and we don't 501 - * go look at any of the socket buffers directly. 497 + * Socket is not locked. We are protected from async events by poll logic and 498 + * correct handling of state changes made by other threads is impossible in 499 + * any case. 502 500 */ 503 - __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) 501 + __poll_t tcp_poll_mask(struct socket *sock, __poll_t events) 504 502 { 505 - __poll_t mask; 506 503 struct sock *sk = sock->sk; 507 504 const struct tcp_sock *tp = tcp_sk(sk); 505 + __poll_t mask = 0; 508 506 int state; 509 - 510 - sock_poll_wait(file, sk_sleep(sk), wait); 511 507 512 508 state = inet_sk_state_load(sk); 513 509 if (state == TCP_LISTEN) 514 510 return inet_csk_listen_poll(sk); 515 - 516 - /* Socket is not locked. We are protected from async events 517 - * by poll logic and correct handling of state changes 518 - * made by other threads is impossible in any case. 519 - */ 520 - 521 - mask = 0; 522 511 523 512 /* 524 513 * EPOLLHUP is certainly not done right. But poll() doesn't ··· 589 600 590 601 return mask; 591 602 } 592 - EXPORT_SYMBOL(tcp_poll); 603 + EXPORT_SYMBOL(tcp_poll_mask); 593 604 594 605 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) 595 606 {
+5 -5
net/ipv4/udp.c
··· 2501 2501 * udp_poll - wait for a UDP event. 2502 2502 * @file - file struct 2503 2503 * @sock - socket 2504 - * @wait - poll table 2504 + * @events - events to wait for 2505 2505 * 2506 2506 * This is same as datagram poll, except for the special case of 2507 2507 * blocking sockets. If application is using a blocking fd ··· 2510 2510 * but then block when reading it. Add special case code 2511 2511 * to work around these arguably broken applications. 2512 2512 */ 2513 - __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) 2513 + __poll_t udp_poll_mask(struct socket *sock, __poll_t events) 2514 2514 { 2515 - __poll_t mask = datagram_poll(file, sock, wait); 2515 + __poll_t mask = datagram_poll_mask(sock, events); 2516 2516 struct sock *sk = sock->sk; 2517 2517 2518 2518 if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) 2519 2519 mask |= EPOLLIN | EPOLLRDNORM; 2520 2520 2521 2521 /* Check for false positives due to checksum errors */ 2522 - if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) && 2522 + if ((mask & EPOLLRDNORM) && !(sock->file->f_flags & O_NONBLOCK) && 2523 2523 !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) 2524 2524 mask &= ~(EPOLLIN | EPOLLRDNORM); 2525 2525 2526 2526 return mask; 2527 2527 2528 2528 } 2529 - EXPORT_SYMBOL(udp_poll); 2529 + EXPORT_SYMBOL(udp_poll_mask); 2530 2530 2531 2531 int udp_abort(struct sock *sk, int err) 2532 2532 {
+2 -2
net/ipv6/af_inet6.c
··· 571 571 .socketpair = sock_no_socketpair, /* a do nothing */ 572 572 .accept = inet_accept, /* ok */ 573 573 .getname = inet6_getname, 574 - .poll = tcp_poll, /* ok */ 574 + .poll_mask = tcp_poll_mask, /* ok */ 575 575 .ioctl = inet6_ioctl, /* must change */ 576 576 .listen = inet_listen, /* ok */ 577 577 .shutdown = inet_shutdown, /* ok */ ··· 601 601 .socketpair = sock_no_socketpair, /* a do nothing */ 602 602 .accept = sock_no_accept, /* a do nothing */ 603 603 .getname = inet6_getname, 604 - .poll = udp_poll, /* ok */ 604 + .poll_mask = udp_poll_mask, /* ok */ 605 605 .ioctl = inet6_ioctl, /* must change */ 606 606 .listen = sock_no_listen, /* ok */ 607 607 .shutdown = inet_shutdown, /* ok */
+2 -2
net/ipv6/raw.c
··· 1334 1334 } 1335 1335 #endif /* CONFIG_PROC_FS */ 1336 1336 1337 - /* Same as inet6_dgram_ops, sans udp_poll. */ 1337 + /* Same as inet6_dgram_ops, sans udp_poll_mask. */ 1338 1338 const struct proto_ops inet6_sockraw_ops = { 1339 1339 .family = PF_INET6, 1340 1340 .owner = THIS_MODULE, ··· 1344 1344 .socketpair = sock_no_socketpair, /* a do nothing */ 1345 1345 .accept = sock_no_accept, /* a do nothing */ 1346 1346 .getname = inet6_getname, 1347 - .poll = datagram_poll, /* ok */ 1347 + .poll_mask = datagram_poll_mask, /* ok */ 1348 1348 .ioctl = inet6_ioctl, /* must change */ 1349 1349 .listen = sock_no_listen, /* ok */ 1350 1350 .shutdown = inet_shutdown, /* ok */
+2 -5
net/iucv/af_iucv.c
··· 1488 1488 return 0; 1489 1489 } 1490 1490 1491 - __poll_t iucv_sock_poll(struct file *file, struct socket *sock, 1492 - poll_table *wait) 1491 + static __poll_t iucv_sock_poll_mask(struct socket *sock, __poll_t events) 1493 1492 { 1494 1493 struct sock *sk = sock->sk; 1495 1494 __poll_t mask = 0; 1496 - 1497 - sock_poll_wait(file, sk_sleep(sk), wait); 1498 1495 1499 1496 if (sk->sk_state == IUCV_LISTEN) 1500 1497 return iucv_accept_poll(sk); ··· 2385 2388 .getname = iucv_sock_getname, 2386 2389 .sendmsg = iucv_sock_sendmsg, 2387 2390 .recvmsg = iucv_sock_recvmsg, 2388 - .poll = iucv_sock_poll, 2391 + .poll_mask = iucv_sock_poll_mask, 2389 2392 .ioctl = sock_no_ioctl, 2390 2393 .mmap = sock_no_mmap, 2391 2394 .socketpair = sock_no_socketpair,
+5 -5
net/kcm/kcmsock.c
··· 1336 1336 struct list_head *head; 1337 1337 int index = 0; 1338 1338 1339 - /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so 1340 - * we set sk_state, otherwise epoll_wait always returns right away with 1341 - * EPOLLHUP 1339 + /* For SOCK_SEQPACKET sock type, datagram_poll_mask checks the sk_state, 1340 + * so we set sk_state, otherwise epoll_wait always returns right away 1341 + * with EPOLLHUP 1342 1342 */ 1343 1343 kcm->sk.sk_state = TCP_ESTABLISHED; 1344 1344 ··· 1903 1903 .socketpair = sock_no_socketpair, 1904 1904 .accept = sock_no_accept, 1905 1905 .getname = sock_no_getname, 1906 - .poll = datagram_poll, 1906 + .poll_mask = datagram_poll_mask, 1907 1907 .ioctl = kcm_ioctl, 1908 1908 .listen = sock_no_listen, 1909 1909 .shutdown = sock_no_shutdown, ··· 1924 1924 .socketpair = sock_no_socketpair, 1925 1925 .accept = sock_no_accept, 1926 1926 .getname = sock_no_getname, 1927 - .poll = datagram_poll, 1927 + .poll_mask = datagram_poll_mask, 1928 1928 .ioctl = kcm_ioctl, 1929 1929 .listen = sock_no_listen, 1930 1930 .shutdown = sock_no_shutdown,
+1 -1
net/key/af_key.c
··· 3751 3751 3752 3752 /* Now the operations that really occur. */ 3753 3753 .release = pfkey_release, 3754 - .poll = datagram_poll, 3754 + .poll_mask = datagram_poll_mask, 3755 3755 .sendmsg = pfkey_sendmsg, 3756 3756 .recvmsg = pfkey_recvmsg, 3757 3757 };
+1 -1
net/l2tp/l2tp_ip.c
··· 613 613 .socketpair = sock_no_socketpair, 614 614 .accept = sock_no_accept, 615 615 .getname = l2tp_ip_getname, 616 - .poll = datagram_poll, 616 + .poll_mask = datagram_poll_mask, 617 617 .ioctl = inet_ioctl, 618 618 .listen = sock_no_listen, 619 619 .shutdown = inet_shutdown,
+1 -1
net/l2tp/l2tp_ip6.c
··· 754 754 .socketpair = sock_no_socketpair, 755 755 .accept = sock_no_accept, 756 756 .getname = l2tp_ip6_getname, 757 - .poll = datagram_poll, 757 + .poll_mask = datagram_poll_mask, 758 758 .ioctl = inet6_ioctl, 759 759 .listen = sock_no_listen, 760 760 .shutdown = inet_shutdown,
+1 -1
net/l2tp/l2tp_ppp.c
··· 1788 1788 .socketpair = sock_no_socketpair, 1789 1789 .accept = sock_no_accept, 1790 1790 .getname = pppol2tp_getname, 1791 - .poll = datagram_poll, 1791 + .poll_mask = datagram_poll_mask, 1792 1792 .listen = sock_no_listen, 1793 1793 .shutdown = sock_no_shutdown, 1794 1794 .setsockopt = pppol2tp_setsockopt,
+1 -1
net/llc/af_llc.c
··· 1192 1192 .socketpair = sock_no_socketpair, 1193 1193 .accept = llc_ui_accept, 1194 1194 .getname = llc_ui_getname, 1195 - .poll = datagram_poll, 1195 + .poll_mask = datagram_poll_mask, 1196 1196 .ioctl = llc_ui_ioctl, 1197 1197 .listen = llc_ui_listen, 1198 1198 .shutdown = llc_ui_shutdown,
+1 -1
net/netlink/af_netlink.c
··· 2658 2658 .socketpair = sock_no_socketpair, 2659 2659 .accept = sock_no_accept, 2660 2660 .getname = netlink_getname, 2661 - .poll = datagram_poll, 2661 + .poll_mask = datagram_poll_mask, 2662 2662 .ioctl = netlink_ioctl, 2663 2663 .listen = sock_no_listen, 2664 2664 .shutdown = sock_no_shutdown,
+1 -1
net/netrom/af_netrom.c
··· 1355 1355 .socketpair = sock_no_socketpair, 1356 1356 .accept = nr_accept, 1357 1357 .getname = nr_getname, 1358 - .poll = datagram_poll, 1358 + .poll_mask = datagram_poll_mask, 1359 1359 .ioctl = nr_ioctl, 1360 1360 .listen = nr_listen, 1361 1361 .shutdown = sock_no_shutdown,
+3 -6
net/nfc/llcp_sock.c
··· 548 548 return 0; 549 549 } 550 550 551 - static __poll_t llcp_sock_poll(struct file *file, struct socket *sock, 552 - poll_table *wait) 551 + static __poll_t llcp_sock_poll_mask(struct socket *sock, __poll_t events) 553 552 { 554 553 struct sock *sk = sock->sk; 555 554 __poll_t mask = 0; 556 555 557 556 pr_debug("%p\n", sk); 558 - 559 - sock_poll_wait(file, sk_sleep(sk), wait); 560 557 561 558 if (sk->sk_state == LLCP_LISTEN) 562 559 return llcp_accept_poll(sk); ··· 896 899 .socketpair = sock_no_socketpair, 897 900 .accept = llcp_sock_accept, 898 901 .getname = llcp_sock_getname, 899 - .poll = llcp_sock_poll, 902 + .poll_mask = llcp_sock_poll_mask, 900 903 .ioctl = sock_no_ioctl, 901 904 .listen = llcp_sock_listen, 902 905 .shutdown = sock_no_shutdown, ··· 916 919 .socketpair = sock_no_socketpair, 917 920 .accept = sock_no_accept, 918 921 .getname = llcp_sock_getname, 919 - .poll = llcp_sock_poll, 922 + .poll_mask = llcp_sock_poll_mask, 920 923 .ioctl = sock_no_ioctl, 921 924 .listen = sock_no_listen, 922 925 .shutdown = sock_no_shutdown,
+2 -2
net/nfc/rawsock.c
··· 284 284 .socketpair = sock_no_socketpair, 285 285 .accept = sock_no_accept, 286 286 .getname = sock_no_getname, 287 - .poll = datagram_poll, 287 + .poll_mask = datagram_poll_mask, 288 288 .ioctl = sock_no_ioctl, 289 289 .listen = sock_no_listen, 290 290 .shutdown = sock_no_shutdown, ··· 304 304 .socketpair = sock_no_socketpair, 305 305 .accept = sock_no_accept, 306 306 .getname = sock_no_getname, 307 - .poll = datagram_poll, 307 + .poll_mask = datagram_poll_mask, 308 308 .ioctl = sock_no_ioctl, 309 309 .listen = sock_no_listen, 310 310 .shutdown = sock_no_shutdown,
+4 -5
net/packet/af_packet.c
··· 4110 4110 return 0; 4111 4111 } 4112 4112 4113 - static __poll_t packet_poll(struct file *file, struct socket *sock, 4114 - poll_table *wait) 4113 + static __poll_t packet_poll_mask(struct socket *sock, __poll_t events) 4115 4114 { 4116 4115 struct sock *sk = sock->sk; 4117 4116 struct packet_sock *po = pkt_sk(sk); 4118 - __poll_t mask = datagram_poll(file, sock, wait); 4117 + __poll_t mask = datagram_poll_mask(sock, events); 4119 4118 4120 4119 spin_lock_bh(&sk->sk_receive_queue.lock); 4121 4120 if (po->rx_ring.pg_vec) { ··· 4456 4457 .socketpair = sock_no_socketpair, 4457 4458 .accept = sock_no_accept, 4458 4459 .getname = packet_getname_spkt, 4459 - .poll = datagram_poll, 4460 + .poll_mask = datagram_poll_mask, 4460 4461 .ioctl = packet_ioctl, 4461 4462 .listen = sock_no_listen, 4462 4463 .shutdown = sock_no_shutdown, ··· 4477 4478 .socketpair = sock_no_socketpair, 4478 4479 .accept = sock_no_accept, 4479 4480 .getname = packet_getname, 4480 - .poll = packet_poll, 4481 + .poll_mask = packet_poll_mask, 4481 4482 .ioctl = packet_ioctl, 4482 4483 .listen = sock_no_listen, 4483 4484 .shutdown = sock_no_shutdown,
+3 -6
net/phonet/socket.c
··· 340 340 return sizeof(struct sockaddr_pn); 341 341 } 342 342 343 - static __poll_t pn_socket_poll(struct file *file, struct socket *sock, 344 - poll_table *wait) 343 + static __poll_t pn_socket_poll_mask(struct socket *sock, __poll_t events) 345 344 { 346 345 struct sock *sk = sock->sk; 347 346 struct pep_sock *pn = pep_sk(sk); 348 347 __poll_t mask = 0; 349 - 350 - poll_wait(file, sk_sleep(sk), wait); 351 348 352 349 if (sk->sk_state == TCP_CLOSE) 353 350 return EPOLLERR; ··· 445 448 .socketpair = sock_no_socketpair, 446 449 .accept = sock_no_accept, 447 450 .getname = pn_socket_getname, 448 - .poll = datagram_poll, 451 + .poll_mask = datagram_poll_mask, 449 452 .ioctl = pn_socket_ioctl, 450 453 .listen = sock_no_listen, 451 454 .shutdown = sock_no_shutdown, ··· 470 473 .socketpair = sock_no_socketpair, 471 474 .accept = pn_socket_accept, 472 475 .getname = pn_socket_getname, 473 - .poll = pn_socket_poll, 476 + .poll_mask = pn_socket_poll_mask, 474 477 .ioctl = pn_socket_ioctl, 475 478 .listen = pn_socket_listen, 476 479 .shutdown = sock_no_shutdown,
+1 -1
net/qrtr/qrtr.c
··· 1023 1023 .recvmsg = qrtr_recvmsg, 1024 1024 .getname = qrtr_getname, 1025 1025 .ioctl = qrtr_ioctl, 1026 - .poll = datagram_poll, 1026 + .poll_mask = datagram_poll_mask, 1027 1027 .shutdown = sock_no_shutdown, 1028 1028 .setsockopt = sock_no_setsockopt, 1029 1029 .getsockopt = sock_no_getsockopt,
+1 -1
net/rose/af_rose.c
··· 1470 1470 .socketpair = sock_no_socketpair, 1471 1471 .accept = rose_accept, 1472 1472 .getname = rose_getname, 1473 - .poll = datagram_poll, 1473 + .poll_mask = datagram_poll_mask, 1474 1474 .ioctl = rose_ioctl, 1475 1475 .listen = rose_listen, 1476 1476 .shutdown = sock_no_shutdown,
+3 -7
net/rxrpc/af_rxrpc.c
··· 734 734 /* 735 735 * permit an RxRPC socket to be polled 736 736 */ 737 - static __poll_t rxrpc_poll(struct file *file, struct socket *sock, 738 - poll_table *wait) 737 + static __poll_t rxrpc_poll_mask(struct socket *sock, __poll_t events) 739 738 { 740 739 struct sock *sk = sock->sk; 741 740 struct rxrpc_sock *rx = rxrpc_sk(sk); 742 - __poll_t mask; 743 - 744 - sock_poll_wait(file, sk_sleep(sk), wait); 745 - mask = 0; 741 + __poll_t mask = 0; 746 742 747 743 /* the socket is readable if there are any messages waiting on the Rx 748 744 * queue */ ··· 945 949 .socketpair = sock_no_socketpair, 946 950 .accept = sock_no_accept, 947 951 .getname = sock_no_getname, 948 - .poll = rxrpc_poll, 952 + .poll_mask = rxrpc_poll_mask, 949 953 .ioctl = sock_no_ioctl, 950 954 .listen = rxrpc_listen, 951 955 .shutdown = rxrpc_shutdown,
+1 -1
net/sctp/ipv6.c
··· 1010 1010 .socketpair = sock_no_socketpair, 1011 1011 .accept = inet_accept, 1012 1012 .getname = sctp_getname, 1013 - .poll = sctp_poll, 1013 + .poll_mask = sctp_poll_mask, 1014 1014 .ioctl = inet6_ioctl, 1015 1015 .listen = sctp_inet_listen, 1016 1016 .shutdown = inet_shutdown,
+1 -1
net/sctp/protocol.c
··· 1016 1016 .socketpair = sock_no_socketpair, 1017 1017 .accept = inet_accept, 1018 1018 .getname = inet_getname, /* Semantics are different. */ 1019 - .poll = sctp_poll, 1019 + .poll_mask = sctp_poll_mask, 1020 1020 .ioctl = inet_ioctl, 1021 1021 .listen = sctp_inet_listen, 1022 1022 .shutdown = inet_shutdown, /* Looks harmless. */
+1 -3
net/sctp/socket.c
··· 7722 7722 * here, again, by modeling the current TCP/UDP code. We don't have 7723 7723 * a good way to test with it yet. 7724 7724 */ 7725 - __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait) 7725 + __poll_t sctp_poll_mask(struct socket *sock, __poll_t events) 7726 7726 { 7727 7727 struct sock *sk = sock->sk; 7728 7728 struct sctp_sock *sp = sctp_sk(sk); 7729 7729 __poll_t mask; 7730 - 7731 - poll_wait(file, sk_sleep(sk), wait); 7732 7730 7733 7731 sock_rps_record_flow(sk); 7734 7732
+42 -17
net/socket.c
··· 117 117 static int sock_mmap(struct file *file, struct vm_area_struct *vma); 118 118 119 119 static int sock_close(struct inode *inode, struct file *file); 120 - static __poll_t sock_poll(struct file *file, 121 - struct poll_table_struct *wait); 120 + static struct wait_queue_head *sock_get_poll_head(struct file *file, 121 + __poll_t events); 122 + static __poll_t sock_poll_mask(struct file *file, __poll_t); 123 + static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait); 122 124 static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 123 125 #ifdef CONFIG_COMPAT 124 126 static long compat_sock_ioctl(struct file *file, ··· 143 141 .llseek = no_llseek, 144 142 .read_iter = sock_read_iter, 145 143 .write_iter = sock_write_iter, 144 + .get_poll_head = sock_get_poll_head, 145 + .poll_mask = sock_poll_mask, 146 146 .poll = sock_poll, 147 147 .unlocked_ioctl = sock_ioctl, 148 148 #ifdef CONFIG_COMPAT ··· 1118 1114 } 1119 1115 EXPORT_SYMBOL(sock_create_lite); 1120 1116 1117 + static struct wait_queue_head *sock_get_poll_head(struct file *file, 1118 + __poll_t events) 1119 + { 1120 + struct socket *sock = file->private_data; 1121 + 1122 + if (!sock->ops->poll_mask) 1123 + return NULL; 1124 + sock_poll_busy_loop(sock, events); 1125 + return sk_sleep(sock->sk); 1126 + } 1127 + 1128 + static __poll_t sock_poll_mask(struct file *file, __poll_t events) 1129 + { 1130 + struct socket *sock = file->private_data; 1131 + 1132 + /* 1133 + * We need to be sure we are in sync with the socket flags modification. 1134 + * 1135 + * This memory barrier is paired in the wq_has_sleeper. 1136 + */ 1137 + smp_mb(); 1138 + 1139 + /* this socket can poll_ll so tell the system call */ 1140 + return sock->ops->poll_mask(sock, events) | 1141 + (sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0); 1142 + } 1143 + 1121 1144 /* No kernel lock held - perfect */ 1122 1145 static __poll_t sock_poll(struct file *file, poll_table *wait) 1123 1146 { 1124 - __poll_t busy_flag = 0; 1125 - struct socket *sock; 1147 + struct socket *sock = file->private_data; 1148 + __poll_t events = poll_requested_events(wait), mask = 0; 1126 1149 1127 - /* 1128 - * We can't return errors to poll, so it's either yes or no. 1129 - */ 1130 - sock = file->private_data; 1131 - 1132 - if (sk_can_busy_loop(sock->sk)) { 1133 - /* this socket can poll_ll so tell the system call */ 1134 - busy_flag = POLL_BUSY_LOOP; 1135 - 1136 - /* once, only if requested by syscall */ 1137 - if (wait && (wait->_key & POLL_BUSY_LOOP)) 1138 - sk_busy_loop(sock->sk, 1); 1150 + if (sock->ops->poll) { 1151 + sock_poll_busy_loop(sock, events); 1152 + mask = sock->ops->poll(file, sock, wait); 1153 + } else if (sock->ops->poll_mask) { 1154 + sock_poll_wait(file, sock_get_poll_head(file, events), wait); 1155 + mask = sock->ops->poll_mask(sock, events); 1139 1156 } 1140 1157 1141 - return busy_flag | sock->ops->poll(file, sock, wait); 1158 + return mask | sock_poll_busy_flag(sock); 1142 1159 } 1143 1160 1144 1161 static int sock_mmap(struct file *file, struct vm_area_struct *vma)
+5 -9
net/tipc/socket.c
··· 692 692 } 693 693 694 694 /** 695 - * tipc_poll - read and possibly block on pollmask 695 + * tipc_poll - read pollmask 696 696 * @file: file structure associated with the socket 697 697 * @sock: socket for which to calculate the poll bits 698 - * @wait: ??? 699 698 * 700 699 * Returns pollmask value 701 700 * ··· 708 709 * imply that the operation will succeed, merely that it should be performed 709 710 * and will not block. 710 711 */ 711 - static __poll_t tipc_poll(struct file *file, struct socket *sock, 712 - poll_table *wait) 712 + static __poll_t tipc_poll_mask(struct socket *sock, __poll_t events) 713 713 { 714 714 struct sock *sk = sock->sk; 715 715 struct tipc_sock *tsk = tipc_sk(sk); 716 716 __poll_t revents = 0; 717 - 718 - sock_poll_wait(file, sk_sleep(sk), wait); 719 717 720 718 if (sk->sk_shutdown & RCV_SHUTDOWN) 721 719 revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; ··· 3024 3028 .socketpair = tipc_socketpair, 3025 3029 .accept = sock_no_accept, 3026 3030 .getname = tipc_getname, 3027 - .poll = tipc_poll, 3031 + .poll_mask = tipc_poll_mask, 3028 3032 .ioctl = tipc_ioctl, 3029 3033 .listen = sock_no_listen, 3030 3034 .shutdown = tipc_shutdown, ··· 3045 3049 .socketpair = tipc_socketpair, 3046 3050 .accept = tipc_accept, 3047 3051 .getname = tipc_getname, 3048 - .poll = tipc_poll, 3052 + .poll_mask = tipc_poll_mask, 3049 3053 .ioctl = tipc_ioctl, 3050 3054 .listen = tipc_listen, 3051 3055 .shutdown = tipc_shutdown, ··· 3066 3070 .socketpair = tipc_socketpair, 3067 3071 .accept = tipc_accept, 3068 3072 .getname = tipc_getname, 3069 - .poll = tipc_poll, 3073 + .poll_mask = tipc_poll_mask, 3070 3074 .ioctl = tipc_ioctl, 3071 3075 .listen = tipc_listen, 3072 3076 .shutdown = tipc_shutdown,
+11 -19
net/unix/af_unix.c
··· 638 638 static int unix_socketpair(struct socket *, struct socket *); 639 639 static int unix_accept(struct socket *, struct socket *, int, bool); 640 640 static int unix_getname(struct socket *, struct sockaddr *, int); 641 - static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 642 - static __poll_t unix_dgram_poll(struct file *, struct socket *, 643 - poll_table *); 641 + static __poll_t unix_poll_mask(struct socket *, __poll_t); 642 + static __poll_t unix_dgram_poll_mask(struct socket *, __poll_t); 644 643 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 645 644 static int unix_shutdown(struct socket *, int); 646 645 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); ··· 680 681 .socketpair = unix_socketpair, 681 682 .accept = unix_accept, 682 683 .getname = unix_getname, 683 - .poll = unix_poll, 684 + .poll_mask = unix_poll_mask, 684 685 .ioctl = unix_ioctl, 685 686 .listen = unix_listen, 686 687 .shutdown = unix_shutdown, ··· 703 704 .socketpair = unix_socketpair, 704 705 .accept = sock_no_accept, 705 706 .getname = unix_getname, 706 - .poll = unix_dgram_poll, 707 + .poll_mask = unix_dgram_poll_mask, 707 708 .ioctl = unix_ioctl, 708 709 .listen = sock_no_listen, 709 710 .shutdown = unix_shutdown, ··· 725 726 .socketpair = unix_socketpair, 726 727 .accept = unix_accept, 727 728 .getname = unix_getname, 728 - .poll = unix_dgram_poll, 729 + .poll_mask = unix_dgram_poll_mask, 729 730 .ioctl = unix_ioctl, 730 731 .listen = unix_listen, 731 732 .shutdown = unix_shutdown, ··· 2629 2630 return err; 2630 2631 } 2631 2632 2632 - static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 2633 + static __poll_t unix_poll_mask(struct socket *sock, __poll_t events) 2633 2634 { 2634 2635 struct sock *sk = sock->sk; 2635 - __poll_t mask; 2636 - 2637 - sock_poll_wait(file, sk_sleep(sk), wait); 2638 - mask = 0; 2636 + __poll_t mask = 0; 2639 2637 2640 2638 /* exceptional events? */ 2641 2639 if (sk->sk_err) ··· 2661 2665 return mask; 2662 2666 } 2663 2667 2664 - static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 2665 - poll_table *wait) 2668 + static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events) 2666 2669 { 2667 2670 struct sock *sk = sock->sk, *other; 2668 - unsigned int writable; 2669 - __poll_t mask; 2670 - 2671 - sock_poll_wait(file, sk_sleep(sk), wait); 2672 - mask = 0; 2671 + int writable; 2672 + __poll_t mask = 0; 2673 2673 2674 2674 /* exceptional events? */ 2675 2675 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) ··· 2691 2699 } 2692 2700 2693 2701 /* No write status requested, avoid expensive OUT tests. */ 2694 - if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 2702 + if (!(events & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 2695 2703 return mask; 2696 2704 2697 2705 writable = unix_writable(sk);
+6 -13
net/vmw_vsock/af_vsock.c
··· 850 850 return err; 851 851 } 852 852 853 - static __poll_t vsock_poll(struct file *file, struct socket *sock, 854 - poll_table *wait) 853 + static __poll_t vsock_poll_mask(struct socket *sock, __poll_t events) 855 854 { 856 - struct sock *sk; 857 - __poll_t mask; 858 - struct vsock_sock *vsk; 859 - 860 - sk = sock->sk; 861 - vsk = vsock_sk(sk); 862 - 863 - poll_wait(file, sk_sleep(sk), wait); 864 - mask = 0; 855 + struct sock *sk = sock->sk; 856 + struct vsock_sock *vsk = vsock_sk(sk); 857 + __poll_t mask = 0; 865 858 866 859 if (sk->sk_err) 867 860 /* Signify that there has been an error on this socket. */ ··· 1084 1091 .socketpair = sock_no_socketpair, 1085 1092 .accept = sock_no_accept, 1086 1093 .getname = vsock_getname, 1087 - .poll = vsock_poll, 1094 + .poll_mask = vsock_poll_mask, 1088 1095 .ioctl = sock_no_ioctl, 1089 1096 .listen = sock_no_listen, 1090 1097 .shutdown = vsock_shutdown, ··· 1842 1849 .socketpair = sock_no_socketpair, 1843 1850 .accept = vsock_accept, 1844 1851 .getname = vsock_getname, 1845 - .poll = vsock_poll, 1852 + .poll_mask = vsock_poll_mask, 1846 1853 .ioctl = sock_no_ioctl, 1847 1854 .listen = vsock_listen, 1848 1855 .shutdown = vsock_shutdown,
+1 -1
net/x25/af_x25.c
··· 1750 1750 .socketpair = sock_no_socketpair, 1751 1751 .accept = x25_accept, 1752 1752 .getname = x25_getname, 1753 - .poll = datagram_poll, 1753 + .poll_mask = datagram_poll_mask, 1754 1754 .ioctl = x25_ioctl, 1755 1755 #ifdef CONFIG_COMPAT 1756 1756 .compat_ioctl = compat_x25_ioctl,
+1 -1
virt/kvm/eventfd.c
··· 397 397 * Check if there was an event already pending on the eventfd 398 398 * before we registered, and trigger it as if we didn't miss it. 399 399 */ 400 - events = f.file->f_op->poll(f.file, &irqfd->pt); 400 + events = vfs_poll(f.file, &irqfd->pt); 401 401 402 402 if (events & EPOLLIN) 403 403 schedule_work(&irqfd->inject);