Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

um: Rewrite the sigio workaround based on epoll and tgkill

The existing sigio workaround implementation removes FDs from the
poll when events are triggered, requiring users to re-add them via
add_sigio_fd() after processing. This introduces a potential race
condition between FD removal in write_sigio_thread() and next_poll
update in __add_sigio_fd(), and is inefficient due to frequent FD
removal and re-addition. Rewrite the implementation based on epoll
and tgkill for improved efficiency and reliability.

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20250315161910.4082396-2-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

authored by

Tiwei Bie and committed by
Johannes Berg
33c9da5d 69f52573

+48 -317
+1 -1
arch/um/drivers/random.c
··· 79 79 if (err < 0) 80 80 goto err_out_cleanup_hw; 81 81 82 - sigio_broken(random_fd); 82 + sigio_broken(); 83 83 hwrng.name = RNG_MODULE_NAME; 84 84 hwrng.read = rng_dev_read; 85 85
+1 -1
arch/um/drivers/rtc_user.c
··· 39 39 } 40 40 41 41 /* apparently timerfd won't send SIGIO, use workaround */ 42 - sigio_broken(uml_rtc_irq_fds[0]); 42 + sigio_broken(); 43 43 err = add_sigio_fd(uml_rtc_irq_fds[0]); 44 44 if (err < 0) { 45 45 close(uml_rtc_irq_fds[0]);
+1 -1
arch/um/include/shared/os.h
··· 314 314 extern int add_sigio_fd(int fd); 315 315 extern int ignore_sigio_fd(int fd); 316 316 extern void maybe_sigio_broken(int fd); 317 - extern void sigio_broken(int fd); 317 + extern void sigio_broken(void); 318 318 /* 319 319 * unlocked versions for IRQ controller code. 320 320 *
-1
arch/um/include/shared/sigio.h
··· 6 6 #ifndef __SIGIO_H__ 7 7 #define __SIGIO_H__ 8 8 9 - extern int write_sigio_irq(int fd); 10 9 extern void sigio_lock(void); 11 10 extern void sigio_unlock(void); 12 11
-26
arch/um/kernel/sigio.c
··· 8 8 #include <os.h> 9 9 #include <sigio.h> 10 10 11 - /* Protected by sigio_lock() called from write_sigio_workaround */ 12 - static int sigio_irq_fd = -1; 13 - 14 - static irqreturn_t sigio_interrupt(int irq, void *data) 15 - { 16 - char c; 17 - 18 - os_read_file(sigio_irq_fd, &c, sizeof(c)); 19 - return IRQ_HANDLED; 20 - } 21 - 22 - int write_sigio_irq(int fd) 23 - { 24 - int err; 25 - 26 - err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt, 27 - 0, "write sigio", NULL); 28 - if (err < 0) { 29 - printk(KERN_ERR "write_sigio_irq : um_request_irq failed, " 30 - "err = %d\n", err); 31 - return -1; 32 - } 33 - sigio_irq_fd = fd; 34 - return 0; 35 - } 36 - 37 11 /* These are called from os-Linux/sigio.c to protect its pollfds arrays. */ 38 12 static DEFINE_MUTEX(sigio_mutex); 39 13
+45 -287
arch/um/os-Linux/sigio.c
··· 11 11 #include <sched.h> 12 12 #include <signal.h> 13 13 #include <string.h> 14 + #include <sys/epoll.h> 14 15 #include <kern_util.h> 15 16 #include <init.h> 16 17 #include <os.h> ··· 24 23 */ 25 24 static struct os_helper_thread *write_sigio_td; 26 25 27 - /* 28 - * These arrays are initialized before the sigio thread is started, and 29 - * the descriptors closed after it is killed. So, it can't see them change. 30 - * On the UML side, they are changed under the sigio_lock. 31 - */ 32 - #define SIGIO_FDS_INIT {-1, -1} 26 + static int epollfd = -1; 33 27 34 - static int write_sigio_fds[2] = SIGIO_FDS_INIT; 35 - static int sigio_private[2] = SIGIO_FDS_INIT; 28 + #define MAX_EPOLL_EVENTS 64 36 29 37 - struct pollfds { 38 - struct pollfd *poll; 39 - int size; 40 - int used; 41 - }; 42 - 43 - /* 44 - * Protected by sigio_lock(). Used by the sigio thread, but the UML thread 45 - * synchronizes with it. 46 - */ 47 - static struct pollfds current_poll; 48 - static struct pollfds next_poll; 49 - static struct pollfds all_sigio_fds; 30 + static struct epoll_event epoll_events[MAX_EPOLL_EVENTS]; 50 31 51 32 static void *write_sigio_thread(void *unused) 52 33 { 53 - struct pollfds *fds, tmp; 54 - struct pollfd *p; 55 - int i, n, respond_fd; 56 - char c; 34 + int pid = getpid(); 35 + int r; 57 36 58 37 os_fix_helper_thread_signals(); 59 38 60 - fds = &current_poll; 61 39 while (1) { 62 - n = poll(fds->poll, fds->used, -1); 63 - if (n < 0) { 40 + r = epoll_wait(epollfd, epoll_events, MAX_EPOLL_EVENTS, -1); 41 + if (r < 0) { 64 42 if (errno == EINTR) 65 43 continue; 66 - printk(UM_KERN_ERR "write_sigio_thread : poll returned " 67 - "%d, errno = %d\n", n, errno); 44 + printk(UM_KERN_ERR "%s: epoll_wait failed, errno = %d\n", 45 + __func__, errno); 68 46 } 69 - for (i = 0; i < fds->used; i++) { 70 - p = &fds->poll[i]; 71 - if (p->revents == 0) 72 - continue; 73 - if (p->fd == sigio_private[1]) { 74 - CATCH_EINTR(n = read(sigio_private[1], &c, 75 - sizeof(c))); 76 - if (n != sizeof(c)) 77 - printk(UM_KERN_ERR 78 - "write_sigio_thread : " 79 - "read on socket failed, " 80 - "err = %d\n", errno); 81 - tmp = current_poll; 82 - current_poll = next_poll; 83 - next_poll = tmp; 84 - respond_fd = sigio_private[1]; 85 - } 86 - else { 87 - respond_fd = write_sigio_fds[1]; 88 - fds->used--; 89 - memmove(&fds->poll[i], &fds->poll[i + 1], 90 - (fds->used - i) * sizeof(*fds->poll)); 91 - } 92 47 93 - CATCH_EINTR(n = write(respond_fd, &c, sizeof(c))); 94 - if (n != sizeof(c)) 95 - printk(UM_KERN_ERR "write_sigio_thread : " 96 - "write on socket failed, err = %d\n", 97 - errno); 98 - } 48 + CATCH_EINTR(r = tgkill(pid, pid, SIGIO)); 49 + if (r < 0) 50 + printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n", 51 + __func__, errno); 99 52 } 100 53 101 54 return NULL; 102 55 } 103 56 104 - static int need_poll(struct pollfds *polls, int n) 105 - { 106 - struct pollfd *new; 107 - 108 - if (n <= polls->size) 109 - return 0; 110 - 111 - new = uml_kmalloc(n * sizeof(struct pollfd), UM_GFP_ATOMIC); 112 - if (new == NULL) { 113 - printk(UM_KERN_ERR "need_poll : failed to allocate new " 114 - "pollfds\n"); 115 - return -ENOMEM; 116 - } 117 - 118 - memcpy(new, polls->poll, polls->used * sizeof(struct pollfd)); 119 - kfree(polls->poll); 120 - 121 - polls->poll = new; 122 - polls->size = n; 123 - return 0; 124 - } 125 - 126 - /* 127 - * Must be called with sigio_lock held, because it's needed by the marked 128 - * critical section. 129 - */ 130 - static void update_thread(void) 131 - { 132 - unsigned long flags; 133 - int n; 134 - char c; 135 - 136 - flags = um_set_signals_trace(0); 137 - CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c))); 138 - if (n != sizeof(c)) { 139 - printk(UM_KERN_ERR "update_thread : write failed, err = %d\n", 140 - errno); 141 - goto fail; 142 - } 143 - 144 - CATCH_EINTR(n = read(sigio_private[0], &c, sizeof(c))); 145 - if (n != sizeof(c)) { 146 - printk(UM_KERN_ERR "update_thread : read failed, err = %d\n", 147 - errno); 148 - goto fail; 149 - } 150 - 151 - um_set_signals_trace(flags); 152 - return; 153 - fail: 154 - /* Critical section start */ 155 - if (write_sigio_td) { 156 - os_kill_helper_thread(write_sigio_td); 157 - write_sigio_td = NULL; 158 - } 159 - close(sigio_private[0]); 160 - close(sigio_private[1]); 161 - close(write_sigio_fds[0]); 162 - close(write_sigio_fds[1]); 163 - /* Critical section end */ 164 - um_set_signals_trace(flags); 165 - } 166 - 167 57 int __add_sigio_fd(int fd) 168 58 { 169 - struct pollfd *p; 170 - int err, i, n; 59 + struct epoll_event event = { 60 + .data.fd = fd, 61 + .events = EPOLLIN | EPOLLET, 62 + }; 63 + int r; 171 64 172 - for (i = 0; i < all_sigio_fds.used; i++) { 173 - if (all_sigio_fds.poll[i].fd == fd) 174 - break; 175 - } 176 - if (i == all_sigio_fds.used) 177 - return -ENOSPC; 178 - 179 - p = &all_sigio_fds.poll[i]; 180 - 181 - for (i = 0; i < current_poll.used; i++) { 182 - if (current_poll.poll[i].fd == fd) 183 - return 0; 184 - } 185 - 186 - n = current_poll.used; 187 - err = need_poll(&next_poll, n + 1); 188 - if (err) 189 - return err; 190 - 191 - memcpy(next_poll.poll, current_poll.poll, 192 - current_poll.used * sizeof(struct pollfd)); 193 - next_poll.poll[n] = *p; 194 - next_poll.used = n + 1; 195 - update_thread(); 196 - 197 - return 0; 65 + CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event)); 66 + return r < 0 ? -errno : 0; 198 67 } 199 - 200 68 201 69 int add_sigio_fd(int fd) 202 70 { ··· 80 210 81 211 int __ignore_sigio_fd(int fd) 82 212 { 83 - struct pollfd *p; 84 - int err, i, n = 0; 213 + struct epoll_event event; 214 + int r; 85 215 86 - /* 87 - * This is called from exitcalls elsewhere in UML - if 88 - * sigio_cleanup has already run, then update_thread will hang 89 - * or fail because the thread is no longer running. 90 - */ 91 - if (!write_sigio_td) 92 - return -EIO; 93 - 94 - for (i = 0; i < current_poll.used; i++) { 95 - if (current_poll.poll[i].fd == fd) 96 - break; 97 - } 98 - if (i == current_poll.used) 99 - return -ENOENT; 100 - 101 - err = need_poll(&next_poll, current_poll.used - 1); 102 - if (err) 103 - return err; 104 - 105 - for (i = 0; i < current_poll.used; i++) { 106 - p = &current_poll.poll[i]; 107 - if (p->fd != fd) 108 - next_poll.poll[n++] = *p; 109 - } 110 - next_poll.used = current_poll.used - 1; 111 - 112 - update_thread(); 113 - 114 - return 0; 216 + CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event)); 217 + return r < 0 ? -errno : 0; 115 218 } 116 219 117 220 int ignore_sigio_fd(int fd) ··· 98 255 return err; 99 256 } 100 257 101 - static struct pollfd *setup_initial_poll(int fd) 102 - { 103 - struct pollfd *p; 104 - 105 - p = uml_kmalloc(sizeof(struct pollfd), UM_GFP_KERNEL); 106 - if (p == NULL) { 107 - printk(UM_KERN_ERR "setup_initial_poll : failed to allocate " 108 - "poll\n"); 109 - return NULL; 110 - } 111 - *p = ((struct pollfd) { .fd = fd, 112 - .events = POLLIN, 113 - .revents = 0 }); 114 - return p; 115 - } 116 - 117 258 static void write_sigio_workaround(void) 118 259 { 119 - struct pollfd *p; 120 260 int err; 121 - int l_write_sigio_fds[2]; 122 - int l_sigio_private[2]; 123 - struct os_helper_thread *l_write_sigio_td; 124 - 125 - /* We call this *tons* of times - and most ones we must just fail. */ 126 - sigio_lock(); 127 - l_write_sigio_td = write_sigio_td; 128 - sigio_unlock(); 129 - 130 - if (l_write_sigio_td) 131 - return; 132 - 133 - err = os_pipe(l_write_sigio_fds, 1, 1); 134 - if (err < 0) { 135 - printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 1 failed, " 136 - "err = %d\n", -err); 137 - return; 138 - } 139 - err = os_pipe(l_sigio_private, 1, 1); 140 - if (err < 0) { 141 - printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 2 failed, " 142 - "err = %d\n", -err); 143 - goto out_close1; 144 - } 145 - 146 - p = setup_initial_poll(l_sigio_private[1]); 147 - if (!p) 148 - goto out_close2; 149 261 150 262 sigio_lock(); 151 - 152 - /* 153 - * Did we race? Don't try to optimize this, please, it's not so likely 154 - * to happen, and no more than once at the boot. 155 - */ 156 263 if (write_sigio_td) 157 - goto out_free; 264 + goto out; 158 265 159 - current_poll = ((struct pollfds) { .poll = p, 160 - .used = 1, 161 - .size = 1 }); 162 - 163 - if (write_sigio_irq(l_write_sigio_fds[0])) 164 - goto out_clear_poll; 165 - 166 - memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); 167 - memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); 168 - 169 - err = os_run_helper_thread(&write_sigio_td, write_sigio_thread, NULL); 170 - if (err < 0) 171 - goto out_clear; 172 - 173 - sigio_unlock(); 174 - return; 175 - 176 - out_clear: 177 - write_sigio_td = NULL; 178 - write_sigio_fds[0] = -1; 179 - write_sigio_fds[1] = -1; 180 - sigio_private[0] = -1; 181 - sigio_private[1] = -1; 182 - out_clear_poll: 183 - current_poll = ((struct pollfds) { .poll = NULL, 184 - .size = 0, 185 - .used = 0 }); 186 - out_free: 187 - sigio_unlock(); 188 - kfree(p); 189 - out_close2: 190 - close(l_sigio_private[0]); 191 - close(l_sigio_private[1]); 192 - out_close1: 193 - close(l_write_sigio_fds[0]); 194 - close(l_write_sigio_fds[1]); 195 - } 196 - 197 - void sigio_broken(int fd) 198 - { 199 - int err; 200 - 201 - write_sigio_workaround(); 202 - 203 - sigio_lock(); 204 - err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1); 205 - if (err) { 206 - printk(UM_KERN_ERR "maybe_sigio_broken - failed to add pollfd " 207 - "for descriptor %d\n", fd); 266 + epollfd = epoll_create(MAX_EPOLL_EVENTS); 267 + if (epollfd < 0) { 268 + printk(UM_KERN_ERR "%s: epoll_create failed, errno = %d\n", 269 + __func__, errno); 208 270 goto out; 209 271 } 210 272 211 - all_sigio_fds.poll[all_sigio_fds.used++] = 212 - ((struct pollfd) { .fd = fd, 213 - .events = POLLIN, 214 - .revents = 0 }); 273 + err = os_run_helper_thread(&write_sigio_td, write_sigio_thread, NULL); 274 + if (err < 0) { 275 + printk(UM_KERN_ERR "%s: os_run_helper_thread failed, errno = %d\n", 276 + __func__, -err); 277 + close(epollfd); 278 + epollfd = -1; 279 + goto out; 280 + } 281 + 215 282 out: 216 283 sigio_unlock(); 284 + } 285 + 286 + void sigio_broken(void) 287 + { 288 + write_sigio_workaround(); 217 289 } 218 290 219 291 /* Changed during early boot */ ··· 142 384 if (pty_output_sigio) 143 385 return; 144 386 145 - sigio_broken(fd); 387 + sigio_broken(); 146 388 } 147 389 148 390 static void sigio_cleanup(void)