Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

quota: Fix slow quotaoff

Eric has reported that commit dabc8b207566 ("quota: fix dqput() to
follow the guarantees dquot_srcu should provide") heavily increases
runtime of generic/270 xfstest for ext4 in nojournal mode. The reason
for this is that ext4 in nojournal mode leaves dquots dirty until the last
dqput() and thus the cleanup done in quota_release_workfn() has to write
them all. Due to the way quota_release_workfn() is written this results
in synchronize_srcu() call for each dirty dquot which makes the dquot
cleanup when turning quotas off extremely slow.

To be able to avoid synchronize_srcu() for each dirty dquot we need to
rework how we track dquots to be cleaned up. Instead of keeping the last
dquot reference while it is on releasing_dquots list, we drop it right
away and mark the dquot with new DQ_RELEASING_B bit instead. This way we
can we can remove dquot from releasing_dquots list when new reference to
it is acquired and thus there's no need to call synchronize_srcu() each
time we drop dq_list_lock.

References: https://lore.kernel.org/all/ZRytn6CxFK2oECUt@debian-BULLSEYE-live-builder-AMD64
Reported-by: Eric Whitney <enwlinux@gmail.com>
Fixes: dabc8b207566 ("quota: fix dqput() to follow the guarantees dquot_srcu should provide")
CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>

Jan Kara 869b6ea1 8a749fd1

+43 -29
+39 -27
fs/quota/dquot.c
··· 233 233 * All dquots are placed to the end of inuse_list when first created, and this 234 234 * list is used for invalidate operation, which must look at every dquot. 235 235 * 236 - * When the last reference of a dquot will be dropped, the dquot will be 237 - * added to releasing_dquots. We'd then queue work item which would call 236 + * When the last reference of a dquot is dropped, the dquot is added to 237 + * releasing_dquots. We'll then queue work item which will call 238 238 * synchronize_srcu() and after that perform the final cleanup of all the 239 - * dquots on the list. Both releasing_dquots and free_dquots use the 240 - * dq_free list_head in the dquot struct. When a dquot is removed from 241 - * releasing_dquots, a reference count is always subtracted, and if 242 - * dq_count == 0 at that point, the dquot will be added to the free_dquots. 239 + * dquots on the list. Each cleaned up dquot is moved to free_dquots list. 240 + * Both releasing_dquots and free_dquots use the dq_free list_head in the dquot 241 + * struct. 243 242 * 244 - * Unused dquots (dq_count == 0) are added to the free_dquots list when freed, 245 - * and this list is searched whenever we need an available dquot. Dquots are 246 - * removed from the list as soon as they are used again, and 247 - * dqstats.free_dquots gives the number of dquots on the list. When 248 - * dquot is invalidated it's completely released from memory. 243 + * Unused and cleaned up dquots are in the free_dquots list and this list is 244 + * searched whenever we need an available dquot. Dquots are removed from the 245 + * list as soon as they are used again and dqstats.free_dquots gives the number 246 + * of dquots on the list. When dquot is invalidated it's completely released 247 + * from memory. 249 248 * 250 249 * Dirty dquots are added to the dqi_dirty_list of quota_info when mark 251 250 * dirtied, and this list is searched when writing dirty dquots back to ··· 320 321 static inline void put_releasing_dquots(struct dquot *dquot) 321 322 { 322 323 list_add_tail(&dquot->dq_free, &releasing_dquots); 324 + set_bit(DQ_RELEASING_B, &dquot->dq_flags); 323 325 } 324 326 325 327 static inline void remove_free_dquot(struct dquot *dquot) ··· 328 328 if (list_empty(&dquot->dq_free)) 329 329 return; 330 330 list_del_init(&dquot->dq_free); 331 - if (!atomic_read(&dquot->dq_count)) 331 + if (!test_bit(DQ_RELEASING_B, &dquot->dq_flags)) 332 332 dqstats_dec(DQST_FREE_DQUOTS); 333 + else 334 + clear_bit(DQ_RELEASING_B, &dquot->dq_flags); 333 335 } 334 336 335 337 static inline void put_inuse(struct dquot *dquot) ··· 583 581 continue; 584 582 /* Wait for dquot users */ 585 583 if (atomic_read(&dquot->dq_count)) { 586 - /* dquot in releasing_dquots, flush and retry */ 587 - if (!list_empty(&dquot->dq_free)) { 588 - spin_unlock(&dq_list_lock); 589 - goto restart; 590 - } 591 - 592 584 atomic_inc(&dquot->dq_count); 593 585 spin_unlock(&dq_list_lock); 594 586 /* ··· 599 603 /* At this moment dquot() need not exist (it could be 600 604 * reclaimed by prune_dqcache(). Hence we must 601 605 * restart. */ 606 + goto restart; 607 + } 608 + /* 609 + * The last user already dropped its reference but dquot didn't 610 + * get fully cleaned up yet. Restart the scan which flushes the 611 + * work cleaning up released dquots. 612 + */ 613 + if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { 614 + spin_unlock(&dq_list_lock); 602 615 goto restart; 603 616 } 604 617 /* ··· 701 696 dq_dirty); 702 697 703 698 WARN_ON(!dquot_active(dquot)); 699 + /* If the dquot is releasing we should not touch it */ 700 + if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { 701 + spin_unlock(&dq_list_lock); 702 + flush_delayed_work(&quota_release_work); 703 + spin_lock(&dq_list_lock); 704 + continue; 705 + } 704 706 705 707 /* Now we have active dquot from which someone is 706 708 * holding reference so we can safely just increase ··· 821 809 /* Exchange the list head to avoid livelock. */ 822 810 list_replace_init(&releasing_dquots, &rls_head); 823 811 spin_unlock(&dq_list_lock); 812 + synchronize_srcu(&dquot_srcu); 824 813 825 814 restart: 826 - synchronize_srcu(&dquot_srcu); 827 815 spin_lock(&dq_list_lock); 828 816 while (!list_empty(&rls_head)) { 829 817 dquot = list_first_entry(&rls_head, struct dquot, dq_free); 830 - /* Dquot got used again? */ 831 - if (atomic_read(&dquot->dq_count) > 1) { 832 - remove_free_dquot(dquot); 833 - atomic_dec(&dquot->dq_count); 834 - continue; 835 - } 818 + WARN_ON_ONCE(atomic_read(&dquot->dq_count)); 819 + /* 820 + * Note that DQ_RELEASING_B protects us from racing with 821 + * invalidate_dquots() calls so we are safe to work with the 822 + * dquot even after we drop dq_list_lock. 823 + */ 836 824 if (dquot_dirty(dquot)) { 837 825 spin_unlock(&dq_list_lock); 838 826 /* Commit dquot before releasing */ ··· 846 834 } 847 835 /* Dquot is inactive and clean, now move it to free list */ 848 836 remove_free_dquot(dquot); 849 - atomic_dec(&dquot->dq_count); 850 837 put_dquot_last(dquot); 851 838 } 852 839 spin_unlock(&dq_list_lock); ··· 886 875 BUG_ON(!list_empty(&dquot->dq_free)); 887 876 #endif 888 877 put_releasing_dquots(dquot); 878 + atomic_dec(&dquot->dq_count); 889 879 spin_unlock(&dq_list_lock); 890 880 queue_delayed_work(system_unbound_wq, &quota_release_work, 1); 891 881 } ··· 975 963 dqstats_inc(DQST_LOOKUPS); 976 964 } 977 965 /* Wait for dq_lock - after this we know that either dquot_release() is 978 - * already finished or it will be canceled due to dq_count > 1 test */ 966 + * already finished or it will be canceled due to dq_count > 0 test */ 979 967 wait_on_dquot(dquot); 980 968 /* Read the dquot / allocate space in quota file */ 981 969 if (!dquot_active(dquot)) {
+3 -1
include/linux/quota.h
··· 285 285 #define DQ_FAKE_B 3 /* no limits only usage */ 286 286 #define DQ_READ_B 4 /* dquot was read into memory */ 287 287 #define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ 288 - #define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\ 288 + #define DQ_RELEASING_B 6 /* dquot is in releasing_dquots list waiting 289 + * to be cleaned up */ 290 + #define DQ_LASTSET_B 7 /* Following 6 bits (see QIF_) are reserved\ 289 291 * for the mask of entries set via SETQUOTA\ 290 292 * quotactl. They are set under dq_data_lock\ 291 293 * and the quota format handling dquot can\
+1 -1
include/linux/quotaops.h
··· 57 57 { 58 58 if (test_bit(DQ_MOD_B, &dquot->dq_flags)) 59 59 return true; 60 - if (atomic_read(&dquot->dq_count) > 1) 60 + if (atomic_read(&dquot->dq_count) > 0) 61 61 return true; 62 62 return false; 63 63 }