Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

epoll: Add a flag, EPOLLWAKEUP, to prevent suspend while epoll events are ready

When an epoll_event, that has the EPOLLWAKEUP flag set, is ready, a
wakeup_source will be active to prevent suspend. This can be used to
handle wakeup events from a driver that support poll, e.g. input, if
that driver wakes up the waitqueue passed to epoll before allowing
suspend.

Signed-off-by: Arve Hjønnevåg <arve@android.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

authored by

Arve Hjønnevåg and committed by
Rafael J. Wysocki
4d7e30d9 b86ff982

+103 -4
+87 -3
fs/eventpoll.c
··· 33 33 #include <linux/bitops.h> 34 34 #include <linux/mutex.h> 35 35 #include <linux/anon_inodes.h> 36 + #include <linux/device.h> 36 37 #include <asm/uaccess.h> 37 38 #include <asm/io.h> 38 39 #include <asm/mman.h> ··· 88 87 */ 89 88 90 89 /* Epoll private bits inside the event mask */ 91 - #define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) 90 + #define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET) 92 91 93 92 /* Maximum number of nesting allowed inside epoll sets */ 94 93 #define EP_MAX_NESTS 4 ··· 155 154 /* List header used to link this item to the "struct file" items list */ 156 155 struct list_head fllink; 157 156 157 + /* wakeup_source used when EPOLLWAKEUP is set */ 158 + struct wakeup_source *ws; 159 + 158 160 /* The structure that describe the interested events and the source fd */ 159 161 struct epoll_event event; 160 162 }; ··· 197 193 * holding ->lock. 198 194 */ 199 195 struct epitem *ovflist; 196 + 197 + /* wakeup_source used when ep_scan_ready_list is running */ 198 + struct wakeup_source *ws; 200 199 201 200 /* The user that created the eventpoll descriptor */ 202 201 struct user_struct *user; ··· 595 588 * queued into ->ovflist but the "txlist" might already 596 589 * contain them, and the list_splice() below takes care of them. 597 590 */ 598 - if (!ep_is_linked(&epi->rdllink)) 591 + if (!ep_is_linked(&epi->rdllink)) { 599 592 list_add_tail(&epi->rdllink, &ep->rdllist); 593 + __pm_stay_awake(epi->ws); 594 + } 600 595 } 601 596 /* 602 597 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after ··· 611 602 * Quickly re-inject items left on "txlist". 612 603 */ 613 604 list_splice(&txlist, &ep->rdllist); 605 + __pm_relax(ep->ws); 614 606 615 607 if (!list_empty(&ep->rdllist)) { 616 608 /* ··· 666 656 list_del_init(&epi->rdllink); 667 657 spin_unlock_irqrestore(&ep->lock, flags); 668 658 659 + wakeup_source_unregister(epi->ws); 660 + 669 661 /* At this point it is safe to free the eventpoll item */ 670 662 kmem_cache_free(epi_cache, epi); 671 663 ··· 718 706 mutex_unlock(&epmutex); 719 707 mutex_destroy(&ep->mtx); 720 708 free_uid(ep->user); 709 + wakeup_source_unregister(ep->ws); 721 710 kfree(ep); 722 711 } 723 712 ··· 750 737 * callback, but it's not actually ready, as far as 751 738 * caller requested events goes. We can remove it here. 752 739 */ 740 + __pm_relax(epi->ws); 753 741 list_del_init(&epi->rdllink); 754 742 } 755 743 } ··· 941 927 if (epi->next == EP_UNACTIVE_PTR) { 942 928 epi->next = ep->ovflist; 943 929 ep->ovflist = epi; 930 + if (epi->ws) { 931 + /* 932 + * Activate ep->ws since epi->ws may get 933 + * deactivated at any time. 934 + */ 935 + __pm_stay_awake(ep->ws); 936 + } 937 + 944 938 } 945 939 goto out_unlock; 946 940 } 947 941 948 942 /* If this file is already in the ready list we exit soon */ 949 - if (!ep_is_linked(&epi->rdllink)) 943 + if (!ep_is_linked(&epi->rdllink)) { 950 944 list_add_tail(&epi->rdllink, &ep->rdllist); 945 + __pm_stay_awake(epi->ws); 946 + } 951 947 952 948 /* 953 949 * Wake up ( if active ) both the eventpoll wait list and the ->poll() ··· 1115 1091 return error; 1116 1092 } 1117 1093 1094 + static int ep_create_wakeup_source(struct epitem *epi) 1095 + { 1096 + const char *name; 1097 + 1098 + if (!epi->ep->ws) { 1099 + epi->ep->ws = wakeup_source_register("eventpoll"); 1100 + if (!epi->ep->ws) 1101 + return -ENOMEM; 1102 + } 1103 + 1104 + name = epi->ffd.file->f_path.dentry->d_name.name; 1105 + epi->ws = wakeup_source_register(name); 1106 + if (!epi->ws) 1107 + return -ENOMEM; 1108 + 1109 + return 0; 1110 + } 1111 + 1112 + static void ep_destroy_wakeup_source(struct epitem *epi) 1113 + { 1114 + wakeup_source_unregister(epi->ws); 1115 + epi->ws = NULL; 1116 + } 1117 + 1118 1118 /* 1119 1119 * Must be called with "mtx" held. 1120 1120 */ ··· 1166 1118 epi->event = *event; 1167 1119 epi->nwait = 0; 1168 1120 epi->next = EP_UNACTIVE_PTR; 1121 + if (epi->event.events & EPOLLWAKEUP) { 1122 + error = ep_create_wakeup_source(epi); 1123 + if (error) 1124 + goto error_create_wakeup_source; 1125 + } else { 1126 + epi->ws = NULL; 1127 + } 1169 1128 1170 1129 /* Initialize the poll table using the queue callback */ 1171 1130 epq.epi = epi; ··· 1219 1164 /* If the file is already "ready" we drop it inside the ready list */ 1220 1165 if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { 1221 1166 list_add_tail(&epi->rdllink, &ep->rdllist); 1167 + __pm_stay_awake(epi->ws); 1222 1168 1223 1169 /* Notify waiting tasks that events are available */ 1224 1170 if (waitqueue_active(&ep->wq)) ··· 1260 1204 list_del_init(&epi->rdllink); 1261 1205 spin_unlock_irqrestore(&ep->lock, flags); 1262 1206 1207 + wakeup_source_unregister(epi->ws); 1208 + 1209 + error_create_wakeup_source: 1263 1210 kmem_cache_free(epi_cache, epi); 1264 1211 1265 1212 return error; ··· 1288 1229 epi->event.events = event->events; 1289 1230 pt._key = event->events; 1290 1231 epi->event.data = event->data; /* protected by mtx */ 1232 + if (epi->event.events & EPOLLWAKEUP) { 1233 + if (!epi->ws) 1234 + ep_create_wakeup_source(epi); 1235 + } else if (epi->ws) { 1236 + ep_destroy_wakeup_source(epi); 1237 + } 1291 1238 1292 1239 /* 1293 1240 * Get current event bits. We can safely use the file* here because ··· 1309 1244 spin_lock_irq(&ep->lock); 1310 1245 if (!ep_is_linked(&epi->rdllink)) { 1311 1246 list_add_tail(&epi->rdllink, &ep->rdllist); 1247 + __pm_stay_awake(epi->ws); 1312 1248 1313 1249 /* Notify waiting tasks that events are available */ 1314 1250 if (waitqueue_active(&ep->wq)) ··· 1348 1282 !list_empty(head) && eventcnt < esed->maxevents;) { 1349 1283 epi = list_first_entry(head, struct epitem, rdllink); 1350 1284 1285 + /* 1286 + * Activate ep->ws before deactivating epi->ws to prevent 1287 + * triggering auto-suspend here (in case we reactive epi->ws 1288 + * below). 1289 + * 1290 + * This could be rearranged to delay the deactivation of epi->ws 1291 + * instead, but then epi->ws would temporarily be out of sync 1292 + * with ep_is_linked(). 1293 + */ 1294 + if (epi->ws && epi->ws->active) 1295 + __pm_stay_awake(ep->ws); 1296 + __pm_relax(epi->ws); 1351 1297 list_del_init(&epi->rdllink); 1352 1298 1353 1299 pt._key = epi->event.events; ··· 1376 1298 if (__put_user(revents, &uevent->events) || 1377 1299 __put_user(epi->event.data, &uevent->data)) { 1378 1300 list_add(&epi->rdllink, head); 1301 + __pm_stay_awake(epi->ws); 1379 1302 return eventcnt ? eventcnt : -EFAULT; 1380 1303 } 1381 1304 eventcnt++; ··· 1396 1317 * poll callback will queue them in ep->ovflist. 1397 1318 */ 1398 1319 list_add_tail(&epi->rdllink, &ep->rdllist); 1320 + __pm_stay_awake(epi->ws); 1399 1321 } 1400 1322 } 1401 1323 } ··· 1707 1627 /* The target file descriptor must support poll */ 1708 1628 error = -EPERM; 1709 1629 if (!tfile->f_op || !tfile->f_op->poll) 1630 + goto error_tgt_fput; 1631 + 1632 + /* Check if EPOLLWAKEUP is allowed */ 1633 + if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) 1710 1634 goto error_tgt_fput; 1711 1635 1712 1636 /*
+4 -1
include/linux/capability.h
··· 360 360 361 361 #define CAP_WAKE_ALARM 35 362 362 363 + /* Allow preventing system suspends while epoll events are pending */ 363 364 364 - #define CAP_LAST_CAP CAP_WAKE_ALARM 365 + #define CAP_EPOLLWAKEUP 36 366 + 367 + #define CAP_LAST_CAP CAP_EPOLLWAKEUP 365 368 366 369 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) 367 370
+12
include/linux/eventpoll.h
··· 26 26 #define EPOLL_CTL_DEL 2 27 27 #define EPOLL_CTL_MOD 3 28 28 29 + /* 30 + * Request the handling of system wakeup events so as to prevent system suspends 31 + * from happening while those events are being processed. 32 + * 33 + * Assuming neither EPOLLET nor EPOLLONESHOT is set, system suspends will not be 34 + * re-allowed until epoll_wait is called again after consuming the wakeup 35 + * event(s). 36 + * 37 + * Requires CAP_EPOLLWAKEUP 38 + */ 39 + #define EPOLLWAKEUP (1 << 29) 40 + 29 41 /* Set the One Shot behaviour for the target file descriptor */ 30 42 #define EPOLLONESHOT (1 << 30) 31 43