Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fanotify: Use interruptible wait when waiting for permission events

When waiting for response to fanotify permission events, we currently
use uninterruptible waits. That makes code simple however it can cause
lots of processes to end up in uninterruptible sleep with hard reboot
being the only alternative in case fanotify listener process stops
responding (e.g. due to a bug in its implementation). Uninterruptible
sleep also makes system hibernation fail if the listener gets frozen
before the process generating fanotify permission event.

Fix these problems by using interruptible sleep for waiting for response
to fanotify event. This is slightly tricky though - we have to
detect when the event got already reported to userspace as in that
case we must not free the event. Instead we push the responsibility for
freeing the event to the process that will write response to the
event.

Reported-by: Orion Poplawski <orion@nwra.com>
Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>

Jan Kara fabf7f29 40873284

+42 -5
+32 -3
fs/notify/fanotify/fanotify.c
··· 77 77 return 0; 78 78 } 79 79 80 + /* 81 + * Wait for response to permission event. The function also takes care of 82 + * freeing the permission event (or offloads that in case the wait is canceled 83 + * by a signal). The function returns 0 in case access got allowed by userspace, 84 + * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case 85 + * the wait got interrupted by a signal. 86 + */ 80 87 static int fanotify_get_response(struct fsnotify_group *group, 81 88 struct fanotify_perm_event *event, 82 89 struct fsnotify_iter_info *iter_info) ··· 92 85 93 86 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 94 87 95 - wait_event(group->fanotify_data.access_waitq, 96 - event->state == FAN_EVENT_ANSWERED); 88 + ret = wait_event_interruptible(group->fanotify_data.access_waitq, 89 + event->state == FAN_EVENT_ANSWERED); 90 + /* Signal pending? */ 91 + if (ret < 0) { 92 + spin_lock(&group->notification_lock); 93 + /* Event reported to userspace and no answer yet? */ 94 + if (event->state == FAN_EVENT_REPORTED) { 95 + /* Event will get freed once userspace answers to it */ 96 + event->state = FAN_EVENT_CANCELED; 97 + spin_unlock(&group->notification_lock); 98 + return ret; 99 + } 100 + /* Event not yet reported? Just remove it. */ 101 + if (event->state == FAN_EVENT_INIT) 102 + fsnotify_remove_queued_event(group, &event->fae.fse); 103 + /* 104 + * Event may be also answered in case signal delivery raced 105 + * with wakeup. In that case we have nothing to do besides 106 + * freeing the event and reporting error. 107 + */ 108 + spin_unlock(&group->notification_lock); 109 + goto out; 110 + } 97 111 98 112 /* userspace responded, convert to something usable */ 99 113 switch (event->response & ~FAN_AUDIT) { ··· 132 104 133 105 pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, 134 106 group, event, ret); 107 + out: 108 + fsnotify_destroy_event(group, &event->fae.fse); 135 109 136 110 return ret; 137 111 } ··· 436 406 } else if (fanotify_is_perm_event(mask)) { 437 407 ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event), 438 408 iter_info); 439 - fsnotify_destroy_event(group, fsn_event); 440 409 } 441 410 finish: 442 411 if (fanotify_is_perm_event(mask))
+2 -1
fs/notify/fanotify/fanotify.h
··· 12 12 enum { 13 13 FAN_EVENT_INIT, 14 14 FAN_EVENT_REPORTED, 15 - FAN_EVENT_ANSWERED 15 + FAN_EVENT_ANSWERED, 16 + FAN_EVENT_CANCELED, 16 17 }; 17 18 18 19 /*
+8 -1
fs/notify/fanotify/fanotify_user.c
··· 147 147 unsigned int response) 148 148 __releases(&group->notification_lock) 149 149 { 150 + bool destroy = false; 151 + 150 152 assert_spin_locked(&group->notification_lock); 151 153 event->response = response; 152 - event->state = FAN_EVENT_ANSWERED; 154 + if (event->state == FAN_EVENT_CANCELED) 155 + destroy = true; 156 + else 157 + event->state = FAN_EVENT_ANSWERED; 153 158 spin_unlock(&group->notification_lock); 159 + if (destroy) 160 + fsnotify_destroy_event(group, &event->fae.fse); 154 161 } 155 162 156 163 static int process_access_response(struct fsnotify_group *group,