Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'vfs-6.16-rc1.super' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs freezing updates from Christian Brauner:
"This contains various filesystem freezing related work for this cycle:

- Allow the power subsystem to support filesystem freeze for suspend
and hibernate.

Now all the pieces are in place to actually allow the power
subsystem to freeze/thaw filesystems during suspend/resume.
Filesystems are only frozen and thawed if the power subsystem does
actually own the freeze.

If the filesystem is already frozen by the time we've frozen all
userspace processes we don't care to freeze it again. That's
userspace's job once the process resumes. We only actually freeze
filesystems if we absolutely have to and we ignore other failures
to freeze.

We could bubble up errors and fail suspend/resume if the error
isn't EBUSY (aka it's already frozen) but I don't think that this
is worth it. Filesystem freezing during suspend/resume is
best-effort. If the user has 500 ext4 filesystems mounted and 4
fail to freeze for whatever reason then we simply skip them.

What we have now is already a big improvement and let's see how we
fare with it before making our lives even harder (and uglier) than
we have to.

- Allow efivars to support freeze and thaw

Allow efivarfs to partake to resync variable state during system
hibernation and suspend. Add freeze/thaw support.

This is a pretty straightforward implementation. We simply add
regular freeze/thaw support for both userspace and the kernel.
efivars is the first pseudofilesystem that adds support for
filesystem freezing and thawing.

The simplicity comes from the fact that we simply always resync
variable state after efivarfs has been frozen. It doesn't matter
whether that's because of suspend, userspace initiated freeze or
hibernation. Efivars is simple enough that it doesn't matter that
we walk all dentries. There are no directories and there aren't
insane amounts of entries and both freeze/thaw are already
heavy-handed operations. If userspace initiated a freeze/thaw cycle
they would need CAP_SYS_ADMIN in the initial user namespace (as
that's where efivarfs is mounted) so it can't be triggered by
random userspace. IOW, we really really don't care"

* tag 'vfs-6.16-rc1.super' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
f2fs: fix freezing filesystem during resize
kernfs: add warning about implementing freeze/thaw
efivarfs: support freeze/thaw
power: freeze filesystems during suspend/resume
libfs: export find_next_child()
super: add filesystem freezing helpers for suspend and hibernate
gfs2: pass through holder from the VFS for freeze/thaw
super: use common iterator (Part 2)
super: use a common iterator (Part 1)
super: skip dying superblocks early
super: simplify user_get_super()
super: remove pointless s_root checks
fs: allow all writers to be frozen
locking/percpu-rwsem: add freezable alternative to down_read

+417 -276
-1
fs/efivarfs/internal.h
··· 17 17 struct efivarfs_mount_opts mount_opts; 18 18 struct super_block *sb; 19 19 struct notifier_block nb; 20 - struct notifier_block pm_nb; 21 20 }; 22 21 23 22 struct efi_variable {
+50 -143
fs/efivarfs/super.c
··· 21 21 #include <linux/namei.h> 22 22 23 23 #include "internal.h" 24 + #include "../internal.h" 24 25 25 26 static int efivarfs_ops_notifier(struct notifier_block *nb, unsigned long event, 26 27 void *data) ··· 121 120 122 121 return 0; 123 122 } 123 + 124 + static int efivarfs_freeze_fs(struct super_block *sb); 125 + static int efivarfs_unfreeze_fs(struct super_block *sb); 126 + 124 127 static const struct super_operations efivarfs_ops = { 125 128 .statfs = efivarfs_statfs, 126 129 .drop_inode = generic_delete_inode, 127 130 .alloc_inode = efivarfs_alloc_inode, 128 131 .free_inode = efivarfs_free_inode, 129 132 .show_options = efivarfs_show_options, 133 + .freeze_fs = efivarfs_freeze_fs, 134 + .unfreeze_fs = efivarfs_unfreeze_fs, 130 135 }; 131 136 132 137 /* ··· 372 365 if (err) 373 366 return err; 374 367 375 - register_pm_notifier(&sfi->pm_nb); 376 - 377 368 return efivar_init(efivarfs_callback, sb, true); 378 369 } 379 370 ··· 395 390 .parse_param = efivarfs_parse_param, 396 391 .reconfigure = efivarfs_reconfigure, 397 392 }; 398 - 399 - struct efivarfs_ctx { 400 - struct dir_context ctx; 401 - struct super_block *sb; 402 - struct dentry *dentry; 403 - }; 404 - 405 - static bool efivarfs_actor(struct dir_context *ctx, const char *name, int len, 406 - loff_t offset, u64 ino, unsigned mode) 407 - { 408 - unsigned long size; 409 - struct efivarfs_ctx *ectx = container_of(ctx, struct efivarfs_ctx, ctx); 410 - struct dentry *dentry = try_lookup_noperm(&QSTR_LEN(name, len), 411 - ectx->sb->s_root); 412 - struct inode *inode; 413 - struct efivar_entry *entry; 414 - int err; 415 - 416 - if (IS_ERR_OR_NULL(dentry)) 417 - return true; 418 - 419 - inode = d_inode(dentry); 420 - entry = efivar_entry(inode); 421 - 422 - err = efivar_entry_size(entry, &size); 423 - size += sizeof(__u32); /* attributes */ 424 - if (err) 425 - size = 0; 426 - 427 - inode_lock_nested(inode, I_MUTEX_CHILD); 428 - i_size_write(inode, size); 429 - inode_unlock(inode); 430 - 431 - if (!size) { 432 - ectx->dentry = dentry; 433 - return false; 434 - } 435 - 436 - dput(dentry); 437 - 438 - return true; 439 - } 440 393 441 394 static int efivarfs_check_missing(efi_char16_t *name16, efi_guid_t vendor, 442 395 unsigned long name_size, void *data) ··· 432 469 return err; 433 470 } 434 471 435 - static void efivarfs_deactivate_super_work(struct work_struct *work) 436 - { 437 - struct super_block *s = container_of(work, struct super_block, 438 - destroy_work); 439 - /* 440 - * note: here s->destroy_work is free for reuse (which 441 - * will happen in deactivate_super) 442 - */ 443 - deactivate_super(s); 444 - } 445 - 446 472 static struct file_system_type efivarfs_type; 447 473 448 - static int efivarfs_pm_notify(struct notifier_block *nb, unsigned long action, 449 - void *ptr) 474 + static int efivarfs_freeze_fs(struct super_block *sb) 450 475 { 451 - struct efivarfs_fs_info *sfi = container_of(nb, struct efivarfs_fs_info, 452 - pm_nb); 453 - struct path path; 454 - struct efivarfs_ctx ectx = { 455 - .ctx = { 456 - .actor = efivarfs_actor, 457 - }, 458 - .sb = sfi->sb, 459 - }; 460 - struct file *file; 461 - struct super_block *s = sfi->sb; 462 - static bool rescan_done = true; 476 + /* Nothing for us to do. */ 477 + return 0; 478 + } 463 479 464 - if (action == PM_HIBERNATION_PREPARE) { 465 - rescan_done = false; 466 - return NOTIFY_OK; 467 - } else if (action != PM_POST_HIBERNATION) { 468 - return NOTIFY_DONE; 469 - } 480 + static int efivarfs_unfreeze_fs(struct super_block *sb) 481 + { 482 + struct dentry *child = NULL; 470 483 471 - if (rescan_done) 472 - return NOTIFY_DONE; 473 - 474 - /* ensure single superblock is alive and pin it */ 475 - if (!atomic_inc_not_zero(&s->s_active)) 476 - return NOTIFY_DONE; 477 - 484 + /* 485 + * Unconditionally resync the variable state on a thaw request. 486 + * Given the size of efivarfs it really doesn't matter to simply 487 + * iterate through all of the entries and resync. Freeze/thaw 488 + * requests are rare enough for that to not matter and the 489 + * number of entries is pretty low too. So we really don't care. 490 + */ 478 491 pr_info("efivarfs: resyncing variable state\n"); 492 + for (;;) { 493 + int err; 494 + unsigned long size = 0; 495 + struct inode *inode; 496 + struct efivar_entry *entry; 479 497 480 - path.dentry = sfi->sb->s_root; 498 + child = find_next_child(sb->s_root, child); 499 + if (!child) 500 + break; 481 501 482 - /* 483 - * do not add SB_KERNMOUNT which a single superblock could 484 - * expose to userspace and which also causes MNT_INTERNAL, see 485 - * below 486 - */ 487 - path.mnt = vfs_kern_mount(&efivarfs_type, 0, 488 - efivarfs_type.name, NULL); 489 - if (IS_ERR(path.mnt)) { 490 - pr_err("efivarfs: internal mount failed\n"); 491 - /* 492 - * We may be the last pinner of the superblock but 493 - * calling efivarfs_kill_sb from within the notifier 494 - * here would deadlock trying to unregister it 495 - */ 496 - INIT_WORK(&s->destroy_work, efivarfs_deactivate_super_work); 497 - schedule_work(&s->destroy_work); 498 - return PTR_ERR(path.mnt); 502 + inode = d_inode(child); 503 + entry = efivar_entry(inode); 504 + 505 + err = efivar_entry_size(entry, &size); 506 + if (err) 507 + size = 0; 508 + else 509 + size += sizeof(__u32); 510 + 511 + inode_lock(inode); 512 + i_size_write(inode, size); 513 + inode_unlock(inode); 514 + 515 + /* The variable doesn't exist anymore, delete it. */ 516 + if (!size) { 517 + pr_info("efivarfs: removing variable %pd\n", child); 518 + simple_recursive_removal(child, NULL); 519 + } 499 520 } 500 521 501 - /* path.mnt now has pin on superblock, so this must be above one */ 502 - atomic_dec(&s->s_active); 503 - 504 - file = kernel_file_open(&path, O_RDONLY | O_DIRECTORY | O_NOATIME, 505 - current_cred()); 506 - /* 507 - * safe even if last put because no MNT_INTERNAL means this 508 - * will do delayed deactivate_super and not deadlock 509 - */ 510 - mntput(path.mnt); 511 - if (IS_ERR(file)) 512 - return NOTIFY_DONE; 513 - 514 - rescan_done = true; 515 - 516 - /* 517 - * First loop over the directory and verify each entry exists, 518 - * removing it if it doesn't 519 - */ 520 - file->f_pos = 2; /* skip . and .. */ 521 - do { 522 - ectx.dentry = NULL; 523 - iterate_dir(file, &ectx.ctx); 524 - if (ectx.dentry) { 525 - pr_info("efivarfs: removing variable %pd\n", 526 - ectx.dentry); 527 - simple_recursive_removal(ectx.dentry, NULL); 528 - dput(ectx.dentry); 529 - } 530 - } while (ectx.dentry); 531 - fput(file); 532 - 533 - /* 534 - * then loop over variables, creating them if there's no matching 535 - * dentry 536 - */ 537 - efivar_init(efivarfs_check_missing, sfi->sb, false); 538 - 539 - return NOTIFY_OK; 522 + efivar_init(efivarfs_check_missing, sb, false); 523 + pr_info("efivarfs: finished resyncing variable state\n"); 524 + return 0; 540 525 } 541 526 542 527 static int efivarfs_init_fs_context(struct fs_context *fc) ··· 504 593 fc->s_fs_info = sfi; 505 594 fc->ops = &efivarfs_context_ops; 506 595 507 - sfi->pm_nb.notifier_call = efivarfs_pm_notify; 508 - sfi->pm_nb.priority = 0; 509 - 510 596 return 0; 511 597 } 512 598 ··· 513 605 514 606 blocking_notifier_chain_unregister(&efivar_ops_nh, &sfi->nb); 515 607 kill_litter_super(sb); 516 - unregister_pm_notifier(&sfi->pm_nb); 517 608 518 609 kfree(sfi); 519 610 }
+3 -3
fs/f2fs/gc.c
··· 2271 2271 if (err) 2272 2272 return err; 2273 2273 2274 - err = freeze_super(sbi->sb, FREEZE_HOLDER_USERSPACE); 2274 + err = freeze_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL); 2275 2275 if (err) 2276 2276 return err; 2277 2277 2278 2278 if (f2fs_readonly(sbi->sb)) { 2279 - err = thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE); 2279 + err = thaw_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL); 2280 2280 if (err) 2281 2281 return err; 2282 2282 return -EROFS; ··· 2333 2333 out_err: 2334 2334 f2fs_up_write(&sbi->cp_global_sem); 2335 2335 f2fs_up_write(&sbi->gc_lock); 2336 - thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE); 2336 + thaw_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL); 2337 2337 return err; 2338 2338 }
+15 -9
fs/gfs2/super.c
··· 674 674 return sdp->sd_log_error; 675 675 } 676 676 677 - static int gfs2_do_thaw(struct gfs2_sbd *sdp) 677 + static int gfs2_do_thaw(struct gfs2_sbd *sdp, enum freeze_holder who, const void *freeze_owner) 678 678 { 679 679 struct super_block *sb = sdp->sd_vfs; 680 680 int error; ··· 682 682 error = gfs2_freeze_lock_shared(sdp); 683 683 if (error) 684 684 goto fail; 685 - error = thaw_super(sb, FREEZE_HOLDER_USERSPACE); 685 + error = thaw_super(sb, who, freeze_owner); 686 686 if (!error) 687 687 return 0; 688 688 ··· 703 703 if (test_bit(SDF_FROZEN, &sdp->sd_flags)) 704 704 goto freeze_failed; 705 705 706 - error = freeze_super(sb, FREEZE_HOLDER_USERSPACE); 706 + error = freeze_super(sb, FREEZE_HOLDER_USERSPACE, NULL); 707 707 if (error) 708 708 goto freeze_failed; 709 709 710 710 gfs2_freeze_unlock(sdp); 711 711 set_bit(SDF_FROZEN, &sdp->sd_flags); 712 712 713 - error = gfs2_do_thaw(sdp); 713 + error = gfs2_do_thaw(sdp, FREEZE_HOLDER_USERSPACE, NULL); 714 714 if (error) 715 715 goto out; 716 716 ··· 728 728 /** 729 729 * gfs2_freeze_super - prevent further writes to the filesystem 730 730 * @sb: the VFS structure for the filesystem 731 + * @who: freeze flags 732 + * @freeze_owner: owner of the freeze 731 733 * 732 734 */ 733 735 734 - static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who) 736 + static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who, 737 + const void *freeze_owner) 735 738 { 736 739 struct gfs2_sbd *sdp = sb->s_fs_info; 737 740 int error; ··· 747 744 } 748 745 749 746 for (;;) { 750 - error = freeze_super(sb, FREEZE_HOLDER_USERSPACE); 747 + error = freeze_super(sb, who, freeze_owner); 751 748 if (error) { 752 749 fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", 753 750 error); ··· 761 758 break; 762 759 } 763 760 764 - error = gfs2_do_thaw(sdp); 761 + error = gfs2_do_thaw(sdp, who, freeze_owner); 765 762 if (error) 766 763 goto out; 767 764 ··· 799 796 /** 800 797 * gfs2_thaw_super - reallow writes to the filesystem 801 798 * @sb: the VFS structure for the filesystem 799 + * @who: freeze flags 800 + * @freeze_owner: owner of the freeze 802 801 * 803 802 */ 804 803 805 - static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who) 804 + static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who, 805 + const void *freeze_owner) 806 806 { 807 807 struct gfs2_sbd *sdp = sb->s_fs_info; 808 808 int error; ··· 820 814 atomic_inc(&sb->s_active); 821 815 gfs2_freeze_unlock(sdp); 822 816 823 - error = gfs2_do_thaw(sdp); 817 + error = gfs2_do_thaw(sdp, who, freeze_owner); 824 818 825 819 if (!error) { 826 820 clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
+2 -2
fs/gfs2/sys.c
··· 174 174 175 175 switch (n) { 176 176 case 0: 177 - error = thaw_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE); 177 + error = thaw_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE, NULL); 178 178 break; 179 179 case 1: 180 - error = freeze_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE); 180 + error = freeze_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE, NULL); 181 181 break; 182 182 default: 183 183 return -EINVAL;
+1
fs/internal.h
··· 344 344 void file_f_owner_release(struct file *file); 345 345 bool file_seek_cur_needs_f_lock(struct file *file); 346 346 int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map); 347 + struct dentry *find_next_child(struct dentry *parent, struct dentry *prev); 347 348 int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, 348 349 struct kstat *stat, u32 request_mask, 349 350 unsigned int query_flags);
+4 -4
fs/ioctl.c
··· 396 396 397 397 /* Freeze */ 398 398 if (sb->s_op->freeze_super) 399 - return sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE); 400 - return freeze_super(sb, FREEZE_HOLDER_USERSPACE); 399 + return sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE, NULL); 400 + return freeze_super(sb, FREEZE_HOLDER_USERSPACE, NULL); 401 401 } 402 402 403 403 static int ioctl_fsthaw(struct file *filp) ··· 409 409 410 410 /* Thaw */ 411 411 if (sb->s_op->thaw_super) 412 - return sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE); 413 - return thaw_super(sb, FREEZE_HOLDER_USERSPACE); 412 + return sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE, NULL); 413 + return thaw_super(sb, FREEZE_HOLDER_USERSPACE, NULL); 414 414 } 415 415 416 416 static int ioctl_file_dedupe_range(struct file *file,
+15
fs/kernfs/mount.c
··· 62 62 63 63 .show_options = kernfs_sop_show_options, 64 64 .show_path = kernfs_sop_show_path, 65 + 66 + /* 67 + * sysfs is built on top of kernfs and sysfs provides the power 68 + * management infrastructure to support suspend/hibernate by 69 + * writing to various files in /sys/power/. As filesystems may 70 + * be automatically frozen during suspend/hibernate implementing 71 + * freeze/thaw support for kernfs generically will cause 72 + * deadlocks as the suspending/hibernation initiating task will 73 + * hold a VFS lock that it will then wait upon to be released. 74 + * If freeze/thaw for kernfs is needed talk to the VFS. 75 + */ 76 + .freeze_fs = NULL, 77 + .unfreeze_fs = NULL, 78 + .freeze_super = NULL, 79 + .thaw_super = NULL, 65 80 }; 66 81 67 82 static int kernfs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
+2 -1
fs/libfs.c
··· 583 583 .fsync = noop_fsync, 584 584 }; 585 585 586 - static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev) 586 + struct dentry *find_next_child(struct dentry *parent, struct dentry *prev) 587 587 { 588 588 struct dentry *child = NULL, *d; 589 589 ··· 603 603 dput(prev); 604 604 return child; 605 605 } 606 + EXPORT_SYMBOL(find_next_child); 606 607 607 608 void simple_recursive_removal(struct dentry *dentry, 608 609 void (*callback)(struct dentry *))
+226 -92
fs/super.c
··· 39 39 #include <uapi/linux/mount.h> 40 40 #include "internal.h" 41 41 42 - static int thaw_super_locked(struct super_block *sb, enum freeze_holder who); 42 + static int thaw_super_locked(struct super_block *sb, enum freeze_holder who, 43 + const void *freeze_owner); 43 44 44 45 static LIST_HEAD(super_blocks); 45 46 static DEFINE_SPINLOCK(sb_lock); ··· 888 887 } 889 888 EXPORT_SYMBOL(drop_super_exclusive); 890 889 891 - static void __iterate_supers(void (*f)(struct super_block *)) 890 + enum super_iter_flags_t { 891 + SUPER_ITER_EXCL = (1U << 0), 892 + SUPER_ITER_UNLOCKED = (1U << 1), 893 + SUPER_ITER_REVERSE = (1U << 2), 894 + }; 895 + 896 + static inline struct super_block *first_super(enum super_iter_flags_t flags) 897 + { 898 + if (flags & SUPER_ITER_REVERSE) 899 + return list_last_entry(&super_blocks, struct super_block, s_list); 900 + return list_first_entry(&super_blocks, struct super_block, s_list); 901 + } 902 + 903 + static inline struct super_block *next_super(struct super_block *sb, 904 + enum super_iter_flags_t flags) 905 + { 906 + if (flags & SUPER_ITER_REVERSE) 907 + return list_prev_entry(sb, s_list); 908 + return list_next_entry(sb, s_list); 909 + } 910 + 911 + static void __iterate_supers(void (*f)(struct super_block *, void *), void *arg, 912 + enum super_iter_flags_t flags) 892 913 { 893 914 struct super_block *sb, *p = NULL; 915 + bool excl = flags & SUPER_ITER_EXCL; 894 916 895 - spin_lock(&sb_lock); 896 - list_for_each_entry(sb, &super_blocks, s_list) { 917 + guard(spinlock)(&sb_lock); 918 + 919 + for (sb = first_super(flags); 920 + !list_entry_is_head(sb, &super_blocks, s_list); 921 + sb = next_super(sb, flags)) { 897 922 if (super_flags(sb, SB_DYING)) 898 923 continue; 899 924 sb->s_count++; 900 925 spin_unlock(&sb_lock); 901 926 902 - f(sb); 903 - 904 - spin_lock(&sb_lock); 905 - if (p) 906 - __put_super(p); 907 - p = sb; 908 - } 909 - if (p) 910 - __put_super(p); 911 - spin_unlock(&sb_lock); 912 - } 913 - /** 914 - * iterate_supers - call function for all active superblocks 915 - * @f: function to call 916 - * @arg: argument to pass to it 917 - * 918 - * Scans the superblock list and calls given function, passing it 919 - * locked superblock and given argument. 920 - */ 921 - void iterate_supers(void (*f)(struct super_block *, void *), void *arg) 922 - { 923 - struct super_block *sb, *p = NULL; 924 - 925 - spin_lock(&sb_lock); 926 - list_for_each_entry(sb, &super_blocks, s_list) { 927 - bool locked; 928 - 929 - sb->s_count++; 930 - spin_unlock(&sb_lock); 931 - 932 - locked = super_lock_shared(sb); 933 - if (locked) { 934 - if (sb->s_root) 935 - f(sb, arg); 936 - super_unlock_shared(sb); 927 + if (flags & SUPER_ITER_UNLOCKED) { 928 + f(sb, arg); 929 + } else if (super_lock(sb, excl)) { 930 + f(sb, arg); 931 + super_unlock(sb, excl); 937 932 } 938 933 939 934 spin_lock(&sb_lock); ··· 939 942 } 940 943 if (p) 941 944 __put_super(p); 942 - spin_unlock(&sb_lock); 945 + } 946 + 947 + void iterate_supers(void (*f)(struct super_block *, void *), void *arg) 948 + { 949 + __iterate_supers(f, arg, 0); 943 950 } 944 951 945 952 /** ··· 964 963 hlist_for_each_entry(sb, &type->fs_supers, s_instances) { 965 964 bool locked; 966 965 966 + if (super_flags(sb, SB_DYING)) 967 + continue; 968 + 967 969 sb->s_count++; 968 970 spin_unlock(&sb_lock); 969 971 970 972 locked = super_lock_shared(sb); 971 - if (locked) { 972 - if (sb->s_root) 973 - f(sb, arg); 974 - super_unlock_shared(sb); 975 - } 973 + if (locked) 974 + f(sb, arg); 976 975 977 976 spin_lock(&sb_lock); 978 977 if (p) ··· 992 991 993 992 spin_lock(&sb_lock); 994 993 list_for_each_entry(sb, &super_blocks, s_list) { 995 - if (sb->s_dev == dev) { 996 - bool locked; 994 + bool locked; 997 995 998 - sb->s_count++; 999 - spin_unlock(&sb_lock); 1000 - /* still alive? */ 1001 - locked = super_lock(sb, excl); 1002 - if (locked) { 1003 - if (sb->s_root) 1004 - return sb; 1005 - super_unlock(sb, excl); 1006 - } 1007 - /* nope, got unmounted */ 1008 - spin_lock(&sb_lock); 1009 - __put_super(sb); 1010 - break; 1011 - } 996 + if (sb->s_dev != dev) 997 + continue; 998 + 999 + sb->s_count++; 1000 + spin_unlock(&sb_lock); 1001 + 1002 + locked = super_lock(sb, excl); 1003 + if (locked) 1004 + return sb; 1005 + 1006 + spin_lock(&sb_lock); 1007 + __put_super(sb); 1008 + break; 1012 1009 } 1013 1010 spin_unlock(&sb_lock); 1014 1011 return NULL; ··· 1110 1111 return retval; 1111 1112 } 1112 1113 1113 - static void do_emergency_remount_callback(struct super_block *sb) 1114 + static void do_emergency_remount_callback(struct super_block *sb, void *unused) 1114 1115 { 1115 - bool locked = super_lock_excl(sb); 1116 - 1117 - if (locked && sb->s_root && sb->s_bdev && !sb_rdonly(sb)) { 1116 + if (sb->s_bdev && !sb_rdonly(sb)) { 1118 1117 struct fs_context *fc; 1119 1118 1120 1119 fc = fs_context_for_reconfigure(sb->s_root, ··· 1123 1126 put_fs_context(fc); 1124 1127 } 1125 1128 } 1126 - if (locked) 1127 - super_unlock_excl(sb); 1128 1129 } 1129 1130 1130 1131 static void do_emergency_remount(struct work_struct *work) 1131 1132 { 1132 - __iterate_supers(do_emergency_remount_callback); 1133 + __iterate_supers(do_emergency_remount_callback, NULL, 1134 + SUPER_ITER_EXCL | SUPER_ITER_REVERSE); 1133 1135 kfree(work); 1134 1136 printk("Emergency Remount complete\n"); 1135 1137 } ··· 1144 1148 } 1145 1149 } 1146 1150 1147 - static void do_thaw_all_callback(struct super_block *sb) 1151 + static void do_thaw_all_callback(struct super_block *sb, void *unused) 1148 1152 { 1149 - bool locked = super_lock_excl(sb); 1150 - 1151 - if (locked && sb->s_root) { 1152 - if (IS_ENABLED(CONFIG_BLOCK)) 1153 - while (sb->s_bdev && !bdev_thaw(sb->s_bdev)) 1154 - pr_warn("Emergency Thaw on %pg\n", sb->s_bdev); 1155 - thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE); 1156 - return; 1157 - } 1158 - if (locked) 1159 - super_unlock_excl(sb); 1153 + if (IS_ENABLED(CONFIG_BLOCK)) 1154 + while (sb->s_bdev && !bdev_thaw(sb->s_bdev)) 1155 + pr_warn("Emergency Thaw on %pg\n", sb->s_bdev); 1156 + thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE, NULL); 1157 + return; 1160 1158 } 1161 1159 1162 1160 static void do_thaw_all(struct work_struct *work) 1163 1161 { 1164 - __iterate_supers(do_thaw_all_callback); 1162 + __iterate_supers(do_thaw_all_callback, NULL, SUPER_ITER_EXCL); 1165 1163 kfree(work); 1166 1164 printk(KERN_WARNING "Emergency Thaw complete\n"); 1167 1165 } ··· 1174 1184 INIT_WORK(work, do_thaw_all); 1175 1185 schedule_work(work); 1176 1186 } 1187 + } 1188 + 1189 + static inline bool get_active_super(struct super_block *sb) 1190 + { 1191 + bool active = false; 1192 + 1193 + if (super_lock_excl(sb)) { 1194 + active = atomic_inc_not_zero(&sb->s_active); 1195 + super_unlock_excl(sb); 1196 + } 1197 + return active; 1198 + } 1199 + 1200 + static const char *filesystems_freeze_ptr = "filesystems_freeze"; 1201 + 1202 + static void filesystems_freeze_callback(struct super_block *sb, void *unused) 1203 + { 1204 + if (!sb->s_op->freeze_fs && !sb->s_op->freeze_super) 1205 + return; 1206 + 1207 + if (!get_active_super(sb)) 1208 + return; 1209 + 1210 + if (sb->s_op->freeze_super) 1211 + sb->s_op->freeze_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL, 1212 + filesystems_freeze_ptr); 1213 + else 1214 + freeze_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL, 1215 + filesystems_freeze_ptr); 1216 + 1217 + deactivate_super(sb); 1218 + } 1219 + 1220 + void filesystems_freeze(void) 1221 + { 1222 + __iterate_supers(filesystems_freeze_callback, NULL, 1223 + SUPER_ITER_UNLOCKED | SUPER_ITER_REVERSE); 1224 + } 1225 + 1226 + static void filesystems_thaw_callback(struct super_block *sb, void *unused) 1227 + { 1228 + if (!sb->s_op->freeze_fs && !sb->s_op->freeze_super) 1229 + return; 1230 + 1231 + if (!get_active_super(sb)) 1232 + return; 1233 + 1234 + if (sb->s_op->thaw_super) 1235 + sb->s_op->thaw_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL, 1236 + filesystems_freeze_ptr); 1237 + else 1238 + thaw_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL, 1239 + filesystems_freeze_ptr); 1240 + 1241 + deactivate_super(sb); 1242 + } 1243 + 1244 + void filesystems_thaw(void) 1245 + { 1246 + __iterate_supers(filesystems_thaw_callback, NULL, SUPER_ITER_UNLOCKED); 1177 1247 } 1178 1248 1179 1249 static DEFINE_IDA(unnamed_dev_ida); ··· 1529 1479 1530 1480 if (sb->s_op->freeze_super) 1531 1481 error = sb->s_op->freeze_super(sb, 1532 - FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE); 1482 + FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE, NULL); 1533 1483 else 1534 1484 error = freeze_super(sb, 1535 - FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE); 1485 + FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE, NULL); 1536 1486 if (!error) 1537 1487 error = sync_blockdev(bdev); 1538 1488 deactivate_super(sb); ··· 1578 1528 1579 1529 if (sb->s_op->thaw_super) 1580 1530 error = sb->s_op->thaw_super(sb, 1581 - FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE); 1531 + FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE, NULL); 1582 1532 else 1583 1533 error = thaw_super(sb, 1584 - FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE); 1534 + FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE, NULL); 1585 1535 deactivate_super(sb); 1586 1536 return error; 1587 1537 } ··· 1953 1903 } 1954 1904 1955 1905 #define FREEZE_HOLDERS (FREEZE_HOLDER_KERNEL | FREEZE_HOLDER_USERSPACE) 1956 - #define FREEZE_FLAGS (FREEZE_HOLDERS | FREEZE_MAY_NEST) 1906 + #define FREEZE_FLAGS (FREEZE_HOLDERS | FREEZE_MAY_NEST | FREEZE_EXCL) 1957 1907 1958 1908 static inline int freeze_inc(struct super_block *sb, enum freeze_holder who) 1959 1909 { ··· 1979 1929 return sb->s_writers.freeze_kcount + sb->s_writers.freeze_ucount; 1980 1930 } 1981 1931 1982 - static inline bool may_freeze(struct super_block *sb, enum freeze_holder who) 1932 + static inline bool may_freeze(struct super_block *sb, enum freeze_holder who, 1933 + const void *freeze_owner) 1983 1934 { 1935 + lockdep_assert_held(&sb->s_umount); 1936 + 1984 1937 WARN_ON_ONCE((who & ~FREEZE_FLAGS)); 1985 1938 WARN_ON_ONCE(hweight32(who & FREEZE_HOLDERS) > 1); 1939 + 1940 + if (who & FREEZE_EXCL) { 1941 + if (WARN_ON_ONCE(!(who & FREEZE_HOLDER_KERNEL))) 1942 + return false; 1943 + if (WARN_ON_ONCE(who & ~(FREEZE_EXCL | FREEZE_HOLDER_KERNEL))) 1944 + return false; 1945 + if (WARN_ON_ONCE(!freeze_owner)) 1946 + return false; 1947 + /* This freeze already has a specific owner. */ 1948 + if (sb->s_writers.freeze_owner) 1949 + return false; 1950 + /* 1951 + * This is already frozen multiple times so we're just 1952 + * going to take a reference count and mark the freeze as 1953 + * being owned by the caller. 1954 + */ 1955 + if (sb->s_writers.freeze_kcount + sb->s_writers.freeze_ucount) 1956 + sb->s_writers.freeze_owner = freeze_owner; 1957 + return true; 1958 + } 1986 1959 1987 1960 if (who & FREEZE_HOLDER_KERNEL) 1988 1961 return (who & FREEZE_MAY_NEST) || ··· 2016 1943 return false; 2017 1944 } 2018 1945 1946 + static inline bool may_unfreeze(struct super_block *sb, enum freeze_holder who, 1947 + const void *freeze_owner) 1948 + { 1949 + lockdep_assert_held(&sb->s_umount); 1950 + 1951 + WARN_ON_ONCE((who & ~FREEZE_FLAGS)); 1952 + WARN_ON_ONCE(hweight32(who & FREEZE_HOLDERS) > 1); 1953 + 1954 + if (who & FREEZE_EXCL) { 1955 + if (WARN_ON_ONCE(!(who & FREEZE_HOLDER_KERNEL))) 1956 + return false; 1957 + if (WARN_ON_ONCE(who & ~(FREEZE_EXCL | FREEZE_HOLDER_KERNEL))) 1958 + return false; 1959 + if (WARN_ON_ONCE(!freeze_owner)) 1960 + return false; 1961 + if (WARN_ON_ONCE(sb->s_writers.freeze_kcount == 0)) 1962 + return false; 1963 + /* This isn't exclusively frozen. */ 1964 + if (!sb->s_writers.freeze_owner) 1965 + return false; 1966 + /* This isn't exclusively frozen by us. */ 1967 + if (sb->s_writers.freeze_owner != freeze_owner) 1968 + return false; 1969 + /* 1970 + * This is still frozen multiple times so we're just 1971 + * going to drop our reference count and undo our 1972 + * exclusive freeze. 1973 + */ 1974 + if ((sb->s_writers.freeze_kcount + sb->s_writers.freeze_ucount) > 1) 1975 + sb->s_writers.freeze_owner = NULL; 1976 + return true; 1977 + } 1978 + 1979 + if (who & FREEZE_HOLDER_KERNEL) { 1980 + /* 1981 + * Someone's trying to steal the reference belonging to 1982 + * @sb->s_writers.freeze_owner. 1983 + */ 1984 + if (sb->s_writers.freeze_kcount == 1 && 1985 + sb->s_writers.freeze_owner) 1986 + return false; 1987 + return sb->s_writers.freeze_kcount > 0; 1988 + } 1989 + 1990 + if (who & FREEZE_HOLDER_USERSPACE) 1991 + return sb->s_writers.freeze_ucount > 0; 1992 + 1993 + return false; 1994 + } 1995 + 2019 1996 /** 2020 1997 * freeze_super - lock the filesystem and force it into a consistent state 2021 1998 * @sb: the super to lock 2022 1999 * @who: context that wants to freeze 2000 + * @freeze_owner: owner of the freeze 2023 2001 * 2024 2002 * Syncs the super to make sure the filesystem is consistent and calls the fs's 2025 2003 * freeze_fs. Subsequent calls to this without first thawing the fs may return ··· 2122 1998 * Return: If the freeze was successful zero is returned. If the freeze 2123 1999 * failed a negative error code is returned. 2124 2000 */ 2125 - int freeze_super(struct super_block *sb, enum freeze_holder who) 2001 + int freeze_super(struct super_block *sb, enum freeze_holder who, const void *freeze_owner) 2126 2002 { 2127 2003 int ret; 2128 2004 ··· 2134 2010 2135 2011 retry: 2136 2012 if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) { 2137 - if (may_freeze(sb, who)) 2013 + if (may_freeze(sb, who, freeze_owner)) 2138 2014 ret = !!WARN_ON_ONCE(freeze_inc(sb, who) == 1); 2139 2015 else 2140 2016 ret = -EBUSY; ··· 2156 2032 if (sb_rdonly(sb)) { 2157 2033 /* Nothing to do really... */ 2158 2034 WARN_ON_ONCE(freeze_inc(sb, who) > 1); 2035 + sb->s_writers.freeze_owner = freeze_owner; 2159 2036 sb->s_writers.frozen = SB_FREEZE_COMPLETE; 2160 2037 wake_up_var(&sb->s_writers.frozen); 2161 2038 super_unlock_excl(sb); ··· 2204 2079 * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super(). 2205 2080 */ 2206 2081 WARN_ON_ONCE(freeze_inc(sb, who) > 1); 2082 + sb->s_writers.freeze_owner = freeze_owner; 2207 2083 sb->s_writers.frozen = SB_FREEZE_COMPLETE; 2208 2084 wake_up_var(&sb->s_writers.frozen); 2209 2085 lockdep_sb_freeze_release(sb); ··· 2219 2093 * removes that state without releasing the other state or unlocking the 2220 2094 * filesystem. 2221 2095 */ 2222 - static int thaw_super_locked(struct super_block *sb, enum freeze_holder who) 2096 + static int thaw_super_locked(struct super_block *sb, enum freeze_holder who, 2097 + const void *freeze_owner) 2223 2098 { 2224 2099 int error = -EINVAL; 2225 2100 2226 2101 if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) 2102 + goto out_unlock; 2103 + 2104 + if (!may_unfreeze(sb, who, freeze_owner)) 2227 2105 goto out_unlock; 2228 2106 2229 2107 /* ··· 2239 2109 2240 2110 if (sb_rdonly(sb)) { 2241 2111 sb->s_writers.frozen = SB_UNFROZEN; 2112 + sb->s_writers.freeze_owner = NULL; 2242 2113 wake_up_var(&sb->s_writers.frozen); 2243 2114 goto out_deactivate; 2244 2115 } ··· 2257 2126 } 2258 2127 2259 2128 sb->s_writers.frozen = SB_UNFROZEN; 2129 + sb->s_writers.freeze_owner = NULL; 2260 2130 wake_up_var(&sb->s_writers.frozen); 2261 2131 sb_freeze_unlock(sb, SB_FREEZE_FS); 2262 2132 out_deactivate: ··· 2273 2141 * thaw_super -- unlock filesystem 2274 2142 * @sb: the super to thaw 2275 2143 * @who: context that wants to freeze 2144 + * @freeze_owner: owner of the freeze 2276 2145 * 2277 2146 * Unlocks the filesystem and marks it writeable again after freeze_super() 2278 2147 * if there are no remaining freezes on the filesystem. ··· 2287 2154 * have been frozen through the block layer via multiple block devices. 2288 2155 * The filesystem remains frozen until all block devices are unfrozen. 2289 2156 */ 2290 - int thaw_super(struct super_block *sb, enum freeze_holder who) 2157 + int thaw_super(struct super_block *sb, enum freeze_holder who, 2158 + const void *freeze_owner) 2291 2159 { 2292 2160 if (!super_lock_excl(sb)) { 2293 2161 WARN_ON_ONCE("Dying superblock while thawing!"); 2294 2162 return -EINVAL; 2295 2163 } 2296 - return thaw_super_locked(sb, who); 2164 + return thaw_super_locked(sb, who, freeze_owner); 2297 2165 } 2298 2166 EXPORT_SYMBOL(thaw_super); 2299 2167
+2 -2
fs/xfs/scrub/fscounters.c
··· 123 123 { 124 124 int error; 125 125 126 - error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL); 126 + error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL, NULL); 127 127 trace_xchk_fsfreeze(sc, error); 128 128 return error; 129 129 } ··· 135 135 int error; 136 136 137 137 /* This should always succeed, we have a kernel freeze */ 138 - error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL); 138 + error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL, NULL); 139 139 trace_xchk_fsthaw(sc, error); 140 140 return error; 141 141 }
+3 -3
fs/xfs/xfs_notify_failure.c
··· 127 127 struct super_block *sb = mp->m_super; 128 128 int error; 129 129 130 - error = freeze_super(sb, FREEZE_HOLDER_KERNEL); 130 + error = freeze_super(sb, FREEZE_HOLDER_KERNEL, NULL); 131 131 if (error) 132 132 xfs_emerg(mp, "already frozen by kernel, err=%d", error); 133 133 ··· 143 143 int error; 144 144 145 145 if (kernel_frozen) { 146 - error = thaw_super(sb, FREEZE_HOLDER_KERNEL); 146 + error = thaw_super(sb, FREEZE_HOLDER_KERNEL, NULL); 147 147 if (error) 148 148 xfs_emerg(mp, "still frozen after notify failure, err=%d", 149 149 error); ··· 153 153 * Also thaw userspace call anyway because the device is about to be 154 154 * removed immediately. 155 155 */ 156 - thaw_super(sb, FREEZE_HOLDER_USERSPACE); 156 + thaw_super(sb, FREEZE_HOLDER_USERSPACE, NULL); 157 157 } 158 158 159 159 static int
+13 -6
include/linux/fs.h
··· 1316 1316 unsigned short frozen; /* Is sb frozen? */ 1317 1317 int freeze_kcount; /* How many kernel freeze requests? */ 1318 1318 int freeze_ucount; /* How many userspace freeze requests? */ 1319 + const void *freeze_owner; /* Owner of the freeze */ 1319 1320 struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; 1320 1321 }; 1321 1322 ··· 1790 1789 1791 1790 static inline void __sb_start_write(struct super_block *sb, int level) 1792 1791 { 1793 - percpu_down_read(sb->s_writers.rw_sem + level - 1); 1792 + percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, true); 1794 1793 } 1795 1794 1796 1795 static inline bool __sb_start_write_trylock(struct super_block *sb, int level) ··· 2289 2288 * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem 2290 2289 * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem 2291 2290 * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed 2291 + * @FREEZE_EXCL: a freeze that can only be undone by the owner 2292 2292 * 2293 2293 * Indicate who the owner of the freeze or thaw request is and whether 2294 2294 * the freeze needs to be exclusive or can nest. ··· 2303 2301 FREEZE_HOLDER_KERNEL = (1U << 0), 2304 2302 FREEZE_HOLDER_USERSPACE = (1U << 1), 2305 2303 FREEZE_MAY_NEST = (1U << 2), 2304 + FREEZE_EXCL = (1U << 3), 2306 2305 }; 2307 2306 2308 2307 struct super_operations { ··· 2317 2314 void (*evict_inode) (struct inode *); 2318 2315 void (*put_super) (struct super_block *); 2319 2316 int (*sync_fs)(struct super_block *sb, int wait); 2320 - int (*freeze_super) (struct super_block *, enum freeze_holder who); 2317 + int (*freeze_super) (struct super_block *, enum freeze_holder who, const void *owner); 2321 2318 int (*freeze_fs) (struct super_block *); 2322 - int (*thaw_super) (struct super_block *, enum freeze_holder who); 2319 + int (*thaw_super) (struct super_block *, enum freeze_holder who, const void *owner); 2323 2320 int (*unfreeze_fs) (struct super_block *); 2324 2321 int (*statfs) (struct dentry *, struct kstatfs *); 2325 2322 int (*remount_fs) (struct super_block *, int *, char *); ··· 2729 2726 extern int vfs_statfs(const struct path *, struct kstatfs *); 2730 2727 extern int user_statfs(const char __user *, struct kstatfs *); 2731 2728 extern int fd_statfs(int, struct kstatfs *); 2732 - int freeze_super(struct super_block *super, enum freeze_holder who); 2733 - int thaw_super(struct super_block *super, enum freeze_holder who); 2729 + int freeze_super(struct super_block *super, enum freeze_holder who, 2730 + const void *freeze_owner); 2731 + int thaw_super(struct super_block *super, enum freeze_holder who, 2732 + const void *freeze_owner); 2734 2733 extern __printf(2, 3) 2735 2734 int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); 2736 2735 extern int super_setup_bdi(struct super_block *sb); ··· 3541 3536 extern struct file_system_type *get_fs_type(const char *name); 3542 3537 extern void drop_super(struct super_block *sb); 3543 3538 extern void drop_super_exclusive(struct super_block *sb); 3544 - extern void iterate_supers(void (*)(struct super_block *, void *), void *); 3539 + extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg); 3545 3540 extern void iterate_supers_type(struct file_system_type *, 3546 3541 void (*)(struct super_block *, void *), void *); 3542 + void filesystems_freeze(void); 3543 + void filesystems_thaw(void); 3547 3544 3548 3545 extern int dcache_dir_open(struct inode *, struct file *); 3549 3546 extern int dcache_dir_close(struct inode *, struct file *);
+16 -4
include/linux/percpu-rwsem.h
··· 43 43 #define DEFINE_STATIC_PERCPU_RWSEM(name) \ 44 44 __DEFINE_PERCPU_RWSEM(name, static) 45 45 46 - extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool); 46 + extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool, bool); 47 47 48 - static inline void percpu_down_read(struct percpu_rw_semaphore *sem) 48 + static inline void percpu_down_read_internal(struct percpu_rw_semaphore *sem, 49 + bool freezable) 49 50 { 50 51 might_sleep(); 51 52 ··· 64 63 if (likely(rcu_sync_is_idle(&sem->rss))) 65 64 this_cpu_inc(*sem->read_count); 66 65 else 67 - __percpu_down_read(sem, false); /* Unconditional memory barrier */ 66 + __percpu_down_read(sem, false, freezable); /* Unconditional memory barrier */ 68 67 /* 69 68 * The preempt_enable() prevents the compiler from 70 69 * bleeding the critical section out. 71 70 */ 72 71 preempt_enable(); 72 + } 73 + 74 + static inline void percpu_down_read(struct percpu_rw_semaphore *sem) 75 + { 76 + percpu_down_read_internal(sem, false); 77 + } 78 + 79 + static inline void percpu_down_read_freezable(struct percpu_rw_semaphore *sem, 80 + bool freeze) 81 + { 82 + percpu_down_read_internal(sem, freeze); 73 83 } 74 84 75 85 static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) ··· 94 82 if (likely(rcu_sync_is_idle(&sem->rss))) 95 83 this_cpu_inc(*sem->read_count); 96 84 else 97 - ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */ 85 + ret = __percpu_down_read(sem, true, false); /* Unconditional memory barrier */ 98 86 preempt_enable(); 99 87 /* 100 88 * The barrier() from preempt_enable() prevents the compiler from
+8 -5
kernel/locking/percpu-rwsem.c
··· 138 138 return !reader; /* wake (readers until) 1 writer */ 139 139 } 140 140 141 - static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader) 141 + static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader, 142 + bool freeze) 142 143 { 143 144 DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function); 144 145 bool wait; ··· 157 156 spin_unlock_irq(&sem->waiters.lock); 158 157 159 158 while (wait) { 160 - set_current_state(TASK_UNINTERRUPTIBLE); 159 + set_current_state(TASK_UNINTERRUPTIBLE | 160 + (freeze ? TASK_FREEZABLE : 0)); 161 161 if (!smp_load_acquire(&wq_entry.private)) 162 162 break; 163 163 schedule(); ··· 166 164 __set_current_state(TASK_RUNNING); 167 165 } 168 166 169 - bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) 167 + bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try, 168 + bool freeze) 170 169 { 171 170 if (__percpu_down_read_trylock(sem)) 172 171 return true; ··· 177 174 178 175 trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_READ); 179 176 preempt_enable(); 180 - percpu_rwsem_wait(sem, /* .reader = */ true); 177 + percpu_rwsem_wait(sem, /* .reader = */ true, freeze); 181 178 preempt_disable(); 182 179 trace_contention_end(sem, 0); 183 180 ··· 240 237 */ 241 238 if (!__percpu_down_write_trylock(sem)) { 242 239 trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE); 243 - percpu_rwsem_wait(sem, /* .reader = */ false); 240 + percpu_rwsem_wait(sem, /* .reader = */ false, false); 244 241 contended = true; 245 242 } 246 243
+15 -1
kernel/power/hibernate.c
··· 778 778 goto Restore; 779 779 780 780 ksys_sync_helper(); 781 + if (filesystem_freeze_enabled) 782 + filesystems_freeze(); 781 783 782 784 error = freeze_processes(); 783 785 if (error) ··· 848 846 /* Don't bother checking whether freezer_test_done is true */ 849 847 freezer_test_done = false; 850 848 Exit: 849 + filesystems_thaw(); 851 850 pm_notifier_call_chain(PM_POST_HIBERNATION); 852 851 Restore: 853 852 pm_restore_console(); ··· 884 881 error = pm_notifier_call_chain_robust(PM_HIBERNATION_PREPARE, PM_POST_HIBERNATION); 885 882 if (error) 886 883 goto restore; 884 + 885 + if (filesystem_freeze_enabled) 886 + filesystems_freeze(); 887 887 888 888 error = freeze_processes(); 889 889 if (error) ··· 947 941 thaw_processes(); 948 942 949 943 exit: 944 + filesystems_thaw(); 950 945 pm_notifier_call_chain(PM_POST_HIBERNATION); 951 946 952 947 restore: ··· 1036 1029 if (error) 1037 1030 goto Restore; 1038 1031 1032 + if (filesystem_freeze_enabled) 1033 + filesystems_freeze(); 1034 + 1039 1035 pm_pr_dbg("Preparing processes for hibernation restore.\n"); 1040 1036 error = freeze_processes(); 1041 - if (error) 1037 + if (error) { 1038 + filesystems_thaw(); 1042 1039 goto Close_Finish; 1040 + } 1043 1041 1044 1042 error = freeze_kernel_threads(); 1045 1043 if (error) { 1046 1044 thaw_processes(); 1045 + filesystems_thaw(); 1047 1046 goto Close_Finish; 1048 1047 } 1049 1048 1050 1049 error = load_image_and_restore(); 1051 1050 thaw_processes(); 1051 + filesystems_thaw(); 1052 1052 Finish: 1053 1053 pm_notifier_call_chain(PM_POST_RESTORE); 1054 1054 Restore:
+31
kernel/power/main.c
··· 962 962 963 963 #endif /* CONFIG_FREEZER*/ 964 964 965 + #if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) 966 + bool filesystem_freeze_enabled = false; 967 + 968 + static ssize_t freeze_filesystems_show(struct kobject *kobj, 969 + struct kobj_attribute *attr, char *buf) 970 + { 971 + return sysfs_emit(buf, "%d\n", filesystem_freeze_enabled); 972 + } 973 + 974 + static ssize_t freeze_filesystems_store(struct kobject *kobj, 975 + struct kobj_attribute *attr, 976 + const char *buf, size_t n) 977 + { 978 + unsigned long val; 979 + 980 + if (kstrtoul(buf, 10, &val)) 981 + return -EINVAL; 982 + 983 + if (val > 1) 984 + return -EINVAL; 985 + 986 + filesystem_freeze_enabled = !!val; 987 + return n; 988 + } 989 + 990 + power_attr(freeze_filesystems); 991 + #endif /* CONFIG_SUSPEND || CONFIG_HIBERNATION */ 992 + 965 993 static struct attribute * g[] = { 966 994 &state_attr.attr, 967 995 #ifdef CONFIG_PM_TRACE ··· 1019 991 #endif 1020 992 #ifdef CONFIG_FREEZER 1021 993 &pm_freeze_timeout_attr.attr, 994 + #endif 995 + #if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) 996 + &freeze_filesystems_attr.attr, 1022 997 #endif 1023 998 NULL, 1024 999 };
+4
kernel/power/power.h
··· 18 18 unsigned long size; 19 19 } __aligned(PAGE_SIZE); 20 20 21 + #if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) 22 + extern bool filesystem_freeze_enabled; 23 + #endif 24 + 21 25 #ifdef CONFIG_HIBERNATION 22 26 /* kernel/power/snapshot.c */ 23 27 extern void __init hibernate_reserved_size_init(void);
+7
kernel/power/suspend.c
··· 30 30 #include <trace/events/power.h> 31 31 #include <linux/compiler.h> 32 32 #include <linux/moduleparam.h> 33 + #include <linux/fs.h> 33 34 34 35 #include "power.h" 35 36 ··· 375 374 if (error) 376 375 goto Restore; 377 376 377 + if (filesystem_freeze_enabled) 378 + filesystems_freeze(); 378 379 trace_suspend_resume(TPS("freeze_processes"), 0, true); 379 380 error = suspend_freeze_processes(); 380 381 trace_suspend_resume(TPS("freeze_processes"), 0, false); ··· 553 550 static void suspend_finish(void) 554 551 { 555 552 suspend_thaw_processes(); 553 + filesystems_thaw(); 556 554 pm_notifier_call_chain(PM_POST_SUSPEND); 557 555 pm_restore_console(); 558 556 } ··· 592 588 ksys_sync_helper(); 593 589 trace_suspend_resume(TPS("sync_filesystems"), 0, false); 594 590 } 591 + if (filesystem_freeze_enabled) 592 + filesystems_freeze(); 595 593 596 594 pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]); 597 595 pm_suspend_clear_flags(); ··· 615 609 pm_pr_dbg("Finishing wakeup.\n"); 616 610 suspend_finish(); 617 611 Unlock: 612 + filesystems_thaw(); 618 613 mutex_unlock(&system_transition_mutex); 619 614 return error; 620 615 }