inotify: clean up inotify_read and fix locking problems

If userspace supplies an invalid pointer to a read() of an inotify
instance, the inotify device's event list mutex is unlocked twice.
This causes an unbalance which effectively leaves the data structure
unprotected, and we can trigger oopses by accessing the inotify
instance from different tasks concurrently.

The best fix (contributed largely by Linus) is a total rewrite
of the function in question:

On Thu, Jan 22, 2009 at 7:05 AM, Linus Torvalds wrote:
> The thing to notice is that:
>
> - locking is done in just one place, and there is no question about it
> not having an unlock.
>
> - that whole double-while(1)-loop thing is gone.
>
> - use multiple functions to make nesting and error handling sane
>
> - do error testing after doing the things you always need to do, ie do
> this:
>
> mutex_lock(..)
> ret = function_call();
> mutex_unlock(..)
>
> .. test ret here ..
>
> instead of doing conditional exits with unlocking or freeing.
>
> So if the code is written in this way, it may still be buggy, but at least
> it's not buggy because of subtle "forgot to unlock" or "forgot to free"
> issues.
>
> This _always_ unlocks if it locked, and it always frees if it got a
> non-error kevent.

Cc: John McCutchan <ttb@tentacle.dhs.org>
Cc: Robert Love <rlove@google.com>
Cc: <stable@kernel.org>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Vegard Nossum and committed by
Linus Torvalds
3632dee2 aeb565df

+74 -61
+74 -61
fs/notify/inotify/inotify_user.c
··· 427 return ret; 428 } 429 430 static ssize_t inotify_read(struct file *file, char __user *buf, 431 size_t count, loff_t *pos) 432 { 433 - size_t event_size = sizeof (struct inotify_event); 434 struct inotify_device *dev; 435 char __user *start; 436 int ret; ··· 491 dev = file->private_data; 492 493 while (1) { 494 495 prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); 496 497 mutex_lock(&dev->ev_mutex); 498 - if (!list_empty(&dev->events)) { 499 - ret = 0; 500 - break; 501 - } 502 mutex_unlock(&dev->ev_mutex); 503 504 - if (file->f_flags & O_NONBLOCK) { 505 - ret = -EAGAIN; 506 - break; 507 } 508 509 - if (signal_pending(current)) { 510 - ret = -EINTR; 511 break; 512 - } 513 514 schedule(); 515 } 516 517 finish_wait(&dev->wq, &wait); 518 - if (ret) 519 - return ret; 520 - 521 - while (1) { 522 - struct inotify_kernel_event *kevent; 523 - 524 ret = buf - start; 525 - if (list_empty(&dev->events)) 526 - break; 527 - 528 - kevent = inotify_dev_get_event(dev); 529 - if (event_size + kevent->event.len > count) { 530 - if (ret == 0 && count > 0) { 531 - /* 532 - * could not get a single event because we 533 - * didn't have enough buffer space. 534 - */ 535 - ret = -EINVAL; 536 - } 537 - break; 538 - } 539 - remove_kevent(dev, kevent); 540 - 541 - /* 542 - * Must perform the copy_to_user outside the mutex in order 543 - * to avoid a lock order reversal with mmap_sem. 544 - */ 545 - mutex_unlock(&dev->ev_mutex); 546 - 547 - if (copy_to_user(buf, &kevent->event, event_size)) { 548 - ret = -EFAULT; 549 - break; 550 - } 551 - buf += event_size; 552 - count -= event_size; 553 - 554 - if (kevent->name) { 555 - if (copy_to_user(buf, kevent->name, kevent->event.len)){ 556 - ret = -EFAULT; 557 - break; 558 - } 559 - buf += kevent->event.len; 560 - count -= kevent->event.len; 561 - } 562 - 563 - free_kevent(kevent); 564 - 565 - mutex_lock(&dev->ev_mutex); 566 - } 567 - mutex_unlock(&dev->ev_mutex); 568 - 569 return ret; 570 } 571
··· 427 return ret; 428 } 429 430 + /* 431 + * Get an inotify_kernel_event if one exists and is small 432 + * enough to fit in "count". Return an error pointer if 433 + * not large enough. 434 + * 435 + * Called with the device ev_mutex held. 436 + */ 437 + static struct inotify_kernel_event *get_one_event(struct inotify_device *dev, 438 + size_t count) 439 + { 440 + size_t event_size = sizeof(struct inotify_event); 441 + struct inotify_kernel_event *kevent; 442 + 443 + if (list_empty(&dev->events)) 444 + return NULL; 445 + 446 + kevent = inotify_dev_get_event(dev); 447 + if (kevent->name) 448 + event_size += kevent->event.len; 449 + 450 + if (event_size > count) 451 + return ERR_PTR(-EINVAL); 452 + 453 + remove_kevent(dev, kevent); 454 + return kevent; 455 + } 456 + 457 + /* 458 + * Copy an event to user space, returning how much we copied. 459 + * 460 + * We already checked that the event size is smaller than the 461 + * buffer we had in "get_one_event()" above. 462 + */ 463 + static ssize_t copy_event_to_user(struct inotify_kernel_event *kevent, 464 + char __user *buf) 465 + { 466 + size_t event_size = sizeof(struct inotify_event); 467 + 468 + if (copy_to_user(buf, &kevent->event, event_size)) 469 + return -EFAULT; 470 + 471 + if (kevent->name) { 472 + buf += event_size; 473 + 474 + if (copy_to_user(buf, kevent->name, kevent->event.len)) 475 + return -EFAULT; 476 + 477 + event_size += kevent->event.len; 478 + } 479 + return event_size; 480 + } 481 + 482 static ssize_t inotify_read(struct file *file, char __user *buf, 483 size_t count, loff_t *pos) 484 { 485 struct inotify_device *dev; 486 char __user *start; 487 int ret; ··· 440 dev = file->private_data; 441 442 while (1) { 443 + struct inotify_kernel_event *kevent; 444 445 prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); 446 447 mutex_lock(&dev->ev_mutex); 448 + kevent = get_one_event(dev, count); 449 mutex_unlock(&dev->ev_mutex); 450 451 + if (kevent) { 452 + ret = PTR_ERR(kevent); 453 + if (IS_ERR(kevent)) 454 + break; 455 + ret = copy_event_to_user(kevent, buf); 456 + free_kevent(kevent); 457 + if (ret < 0) 458 + break; 459 + buf += ret; 460 + count -= ret; 461 + continue; 462 } 463 464 + ret = -EAGAIN; 465 + if (file->f_flags & O_NONBLOCK) 466 break; 467 + ret = -EINTR; 468 + if (signal_pending(current)) 469 + break; 470 + 471 + if (start != buf) 472 + break; 473 474 schedule(); 475 } 476 477 finish_wait(&dev->wq, &wait); 478 + if (start != buf && ret != -EFAULT) 479 ret = buf - start; 480 return ret; 481 } 482