Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

eventfd - allow atomic read and waitqueue remove

KVM needs a wait to atomically remove themselves from the eventfd ->poll()
wait queue head, in order to handle correctly their IRQfd deassign
operation.

This patch introduces such API, plus a way to read an eventfd from its
context.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Avi Kivity <avi@redhat.com>

authored by

Davide Libenzi and committed by
Marcelo Tosatti
cb289d62 a6085fba

+90 -15
+74 -15
fs/eventfd.c
··· 135 135 return events; 136 136 } 137 137 138 - static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, 139 - loff_t *ppos) 138 + static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) 140 139 { 141 - struct eventfd_ctx *ctx = file->private_data; 140 + *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; 141 + ctx->count -= *cnt; 142 + } 143 + 144 + /** 145 + * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. 146 + * @ctx: [in] Pointer to eventfd context. 147 + * @wait: [in] Wait queue to be removed. 148 + * @cnt: [out] Pointer to the 64bit conter value. 149 + * 150 + * Returns zero if successful, or the following error codes: 151 + * 152 + * -EAGAIN : The operation would have blocked. 153 + * 154 + * This is used to atomically remove a wait queue entry from the eventfd wait 155 + * queue head, and read/reset the counter value. 156 + */ 157 + int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, 158 + __u64 *cnt) 159 + { 160 + unsigned long flags; 161 + 162 + spin_lock_irqsave(&ctx->wqh.lock, flags); 163 + eventfd_ctx_do_read(ctx, cnt); 164 + __remove_wait_queue(&ctx->wqh, wait); 165 + if (*cnt != 0 && waitqueue_active(&ctx->wqh)) 166 + wake_up_locked_poll(&ctx->wqh, POLLOUT); 167 + spin_unlock_irqrestore(&ctx->wqh.lock, flags); 168 + 169 + return *cnt != 0 ? 0 : -EAGAIN; 170 + } 171 + EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); 172 + 173 + /** 174 + * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero. 175 + * @ctx: [in] Pointer to eventfd context. 176 + * @no_wait: [in] Different from zero if the operation should not block. 177 + * @cnt: [out] Pointer to the 64bit conter value. 178 + * 179 + * Returns zero if successful, or the following error codes: 180 + * 181 + * -EAGAIN : The operation would have blocked but @no_wait was nonzero. 182 + * -ERESTARTSYS : A signal interrupted the wait operation. 183 + * 184 + * If @no_wait is zero, the function might sleep until the eventfd internal 185 + * counter becomes greater than zero. 186 + */ 187 + ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt) 188 + { 142 189 ssize_t res; 143 - __u64 ucnt = 0; 144 190 DECLARE_WAITQUEUE(wait, current); 145 191 146 - if (count < sizeof(ucnt)) 147 - return -EINVAL; 148 192 spin_lock_irq(&ctx->wqh.lock); 193 + *cnt = 0; 149 194 res = -EAGAIN; 150 195 if (ctx->count > 0) 151 - res = sizeof(ucnt); 152 - else if (!(file->f_flags & O_NONBLOCK)) { 196 + res = 0; 197 + else if (!no_wait) { 153 198 __add_wait_queue(&ctx->wqh, &wait); 154 - for (res = 0;;) { 199 + for (;;) { 155 200 set_current_state(TASK_INTERRUPTIBLE); 156 201 if (ctx->count > 0) { 157 - res = sizeof(ucnt); 202 + res = 0; 158 203 break; 159 204 } 160 205 if (signal_pending(current)) { ··· 213 168 __remove_wait_queue(&ctx->wqh, &wait); 214 169 __set_current_state(TASK_RUNNING); 215 170 } 216 - if (likely(res > 0)) { 217 - ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; 218 - ctx->count -= ucnt; 171 + if (likely(res == 0)) { 172 + eventfd_ctx_do_read(ctx, cnt); 219 173 if (waitqueue_active(&ctx->wqh)) 220 174 wake_up_locked_poll(&ctx->wqh, POLLOUT); 221 175 } 222 176 spin_unlock_irq(&ctx->wqh.lock); 223 - if (res > 0 && put_user(ucnt, (__u64 __user *) buf)) 224 - return -EFAULT; 225 177 226 178 return res; 179 + } 180 + EXPORT_SYMBOL_GPL(eventfd_ctx_read); 181 + 182 + static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, 183 + loff_t *ppos) 184 + { 185 + struct eventfd_ctx *ctx = file->private_data; 186 + ssize_t res; 187 + __u64 cnt; 188 + 189 + if (count < sizeof(cnt)) 190 + return -EINVAL; 191 + res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt); 192 + if (res < 0) 193 + return res; 194 + 195 + return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt); 227 196 } 228 197 229 198 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
+16
include/linux/eventfd.h
··· 10 10 11 11 #include <linux/fcntl.h> 12 12 #include <linux/file.h> 13 + #include <linux/wait.h> 13 14 14 15 /* 15 16 * CAREFUL: Check include/asm-generic/fcntl.h when defining ··· 35 34 struct eventfd_ctx *eventfd_ctx_fdget(int fd); 36 35 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); 37 36 int eventfd_signal(struct eventfd_ctx *ctx, int n); 37 + ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt); 38 + int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, 39 + __u64 *cnt); 38 40 39 41 #else /* CONFIG_EVENTFD */ 40 42 ··· 63 59 static inline void eventfd_ctx_put(struct eventfd_ctx *ctx) 64 60 { 65 61 62 + } 63 + 64 + static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, 65 + __u64 *cnt) 66 + { 67 + return -ENOSYS; 68 + } 69 + 70 + static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, 71 + wait_queue_t *wait, __u64 *cnt) 72 + { 73 + return -ENOSYS; 66 74 } 67 75 68 76 #endif