Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

super: wait until we passed kill super

Recent rework moved block device closing out of sb->put_super() and into
sb->kill_sb() to avoid deadlocks as s_umount is held in put_super() and
blkdev_put() can end up taking s_umount again.

That means we need to move the removal of the superblock from @fs_supers
out of generic_shutdown_super() and into deactivate_locked_super() to
ensure that concurrent mounters don't fail to open block devices that
are still in use because blkdev_put() in sb->kill_sb() hasn't been
called yet.

We can now do this as we can make iterators through @fs_super and
@super_blocks wait without holding s_umount. Concurrent mounts will wait
until a dying superblock is fully dead so until sb->kill_sb() has been
called and SB_DEAD been set. Concurrent iterators can already discard
any SB_DYING superblock.

Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230818-vfs-super-fixes-v3-v3-4-9f0b1876e46b@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>

+65 -7
+64 -7
fs/super.c
··· 153 153 } 154 154 155 155 /* wake waiters */ 156 - #define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING) 156 + #define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING | SB_DEAD) 157 157 static void super_wake(struct super_block *sb, unsigned int flag) 158 158 { 159 159 WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS)); ··· 461 461 list_lru_destroy(&s->s_dentry_lru); 462 462 list_lru_destroy(&s->s_inode_lru); 463 463 464 + /* 465 + * Remove it from @fs_supers so it isn't found by new 466 + * sget{_fc}() walkers anymore. Any concurrent mounter still 467 + * managing to grab a temporary reference is guaranteed to 468 + * already see SB_DYING and will wait until we notify them about 469 + * SB_DEAD. 470 + */ 471 + spin_lock(&sb_lock); 472 + hlist_del_init(&s->s_instances); 473 + spin_unlock(&sb_lock); 474 + 475 + /* 476 + * Let concurrent mounts know that this thing is really dead. 477 + * We don't need @sb->s_umount here as every concurrent caller 478 + * will see SB_DYING and either discard the superblock or wait 479 + * for SB_DEAD. 480 + */ 481 + super_wake(s, SB_DEAD); 482 + 464 483 put_filesystem(fs); 465 484 put_super(s); 466 485 } else { ··· 534 515 super_unlock_excl(s); 535 516 put_super(s); 536 517 return 0; 518 + } 519 + 520 + static inline bool wait_dead(struct super_block *sb) 521 + { 522 + unsigned int flags; 523 + 524 + /* 525 + * Pairs with memory barrier in super_wake() and ensures 526 + * that we see SB_DEAD after we're woken. 527 + */ 528 + flags = smp_load_acquire(&sb->s_flags); 529 + return flags & SB_DEAD; 530 + } 531 + 532 + /** 533 + * grab_super_dead - acquire an active reference to a superblock 534 + * @sb: superblock to acquire 535 + * 536 + * Acquire a temporary reference on a superblock and try to trade it for 537 + * an active reference. This is used in sget{_fc}() to wait for a 538 + * superblock to either become SB_BORN or for it to pass through 539 + * sb->kill() and be marked as SB_DEAD. 540 + * 541 + * Return: This returns true if an active reference could be acquired, 542 + * false if not. 543 + */ 544 + static bool grab_super_dead(struct super_block *sb) 545 + { 546 + 547 + sb->s_count++; 548 + if (grab_super(sb)) { 549 + put_super(sb); 550 + lockdep_assert_held(&sb->s_umount); 551 + return true; 552 + } 553 + wait_var_event(&sb->s_flags, wait_dead(sb)); 554 + put_super(sb); 555 + lockdep_assert_not_held(&sb->s_umount); 556 + return false; 537 557 } 538 558 539 559 /* ··· 701 643 spin_unlock(&sb->s_inode_list_lock); 702 644 } 703 645 } 704 - spin_lock(&sb_lock); 705 - /* should be initialized for __put_super_and_need_restart() */ 706 - hlist_del_init(&sb->s_instances); 707 - spin_unlock(&sb_lock); 708 646 /* 709 647 * Broadcast to everyone that grabbed a temporary reference to this 710 648 * superblock before we removed it from @fs_supers that the superblock 711 649 * is dying. Every walker of @fs_supers outside of sget{_fc}() will now 712 650 * discard this superblock and treat it as dead. 651 + * 652 + * We leave the superblock on @fs_supers so it can be found by 653 + * sget{_fc}() until we passed sb->kill_sb(). 713 654 */ 714 655 super_wake(sb, SB_DYING); 715 656 super_unlock_excl(sb); ··· 803 746 destroy_unused_super(s); 804 747 return ERR_PTR(-EBUSY); 805 748 } 806 - if (!grab_super(old)) 749 + if (!grab_super_dead(old)) 807 750 goto retry; 808 751 destroy_unused_super(s); 809 752 return old; ··· 847 790 destroy_unused_super(s); 848 791 return ERR_PTR(-EBUSY); 849 792 } 850 - if (!grab_super(old)) 793 + if (!grab_super_dead(old)) 851 794 goto retry; 852 795 destroy_unused_super(s); 853 796 return old;
+1
include/linux/fs.h
··· 1095 1095 #define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */ 1096 1096 1097 1097 /* These sb flags are internal to the kernel */ 1098 + #define SB_DEAD BIT(21) 1098 1099 #define SB_DYING BIT(24) 1099 1100 #define SB_SUBMOUNT BIT(26) 1100 1101 #define SB_FORCE BIT(27)