at v5.7 301 lines 7.1 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * http://www.huawei.com/ 5 * Created by Gao Xiang <gaoxiang25@huawei.com> 6 */ 7#include "internal.h" 8#include <linux/pagevec.h> 9 10struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 11{ 12 struct page *page; 13 14 if (!list_empty(pool)) { 15 page = lru_to_page(pool); 16 DBG_BUGON(page_ref_count(page) != 1); 17 list_del(&page->lru); 18 } else { 19 page = alloc_page(gfp); 20 } 21 return page; 22} 23 24#if (EROFS_PCPUBUF_NR_PAGES > 0) 25static struct { 26 u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; 27} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; 28 29void *erofs_get_pcpubuf(unsigned int pagenr) 30{ 31 preempt_disable(); 32 return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; 33} 34#endif 35 36#ifdef CONFIG_EROFS_FS_ZIP 37/* global shrink count (for all mounted EROFS instances) */ 38static atomic_long_t erofs_global_shrink_cnt; 39 40static int erofs_workgroup_get(struct erofs_workgroup *grp) 41{ 42 int o; 43 44repeat: 45 o = erofs_wait_on_workgroup_freezed(grp); 46 if (o <= 0) 47 return -1; 48 49 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 50 goto repeat; 51 52 /* decrease refcount paired by erofs_workgroup_put */ 53 if (o == 1) 54 atomic_long_dec(&erofs_global_shrink_cnt); 55 return 0; 56} 57 58struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 59 pgoff_t index) 60{ 61 struct erofs_sb_info *sbi = EROFS_SB(sb); 62 struct erofs_workgroup *grp; 63 64repeat: 65 rcu_read_lock(); 66 grp = xa_load(&sbi->managed_pslots, index); 67 if (grp) { 68 if (erofs_workgroup_get(grp)) { 69 /* prefer to relax rcu read side */ 70 rcu_read_unlock(); 71 goto repeat; 72 } 73 74 DBG_BUGON(index != grp->index); 75 } 76 rcu_read_unlock(); 77 return grp; 78} 79 80struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, 81 struct erofs_workgroup *grp) 82{ 83 struct erofs_sb_info *const sbi = EROFS_SB(sb); 84 struct erofs_workgroup *pre; 85 86 /* 87 * Bump up a reference count before making this visible 88 * to others for the XArray in order to avoid potential 89 * UAF without serialized by xa_lock. 90 */ 91 atomic_inc(&grp->refcount); 92 93repeat: 94 xa_lock(&sbi->managed_pslots); 95 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, 96 NULL, grp, GFP_NOFS); 97 if (pre) { 98 if (xa_is_err(pre)) { 99 pre = ERR_PTR(xa_err(pre)); 100 } else if (erofs_workgroup_get(pre)) { 101 /* try to legitimize the current in-tree one */ 102 xa_unlock(&sbi->managed_pslots); 103 cond_resched(); 104 goto repeat; 105 } 106 atomic_dec(&grp->refcount); 107 grp = pre; 108 } 109 xa_unlock(&sbi->managed_pslots); 110 return grp; 111} 112 113static void __erofs_workgroup_free(struct erofs_workgroup *grp) 114{ 115 atomic_long_dec(&erofs_global_shrink_cnt); 116 erofs_workgroup_free_rcu(grp); 117} 118 119int erofs_workgroup_put(struct erofs_workgroup *grp) 120{ 121 int count = atomic_dec_return(&grp->refcount); 122 123 if (count == 1) 124 atomic_long_inc(&erofs_global_shrink_cnt); 125 else if (!count) 126 __erofs_workgroup_free(grp); 127 return count; 128} 129 130static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) 131{ 132 erofs_workgroup_unfreeze(grp, 0); 133 __erofs_workgroup_free(grp); 134} 135 136static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 137 struct erofs_workgroup *grp) 138{ 139 /* 140 * If managed cache is on, refcount of workgroups 141 * themselves could be < 0 (freezed). In other words, 142 * there is no guarantee that all refcounts > 0. 143 */ 144 if (!erofs_workgroup_try_to_freeze(grp, 1)) 145 return false; 146 147 /* 148 * Note that all cached pages should be unattached 149 * before deleted from the XArray. Otherwise some 150 * cached pages could be still attached to the orphan 151 * old workgroup when the new one is available in the tree. 152 */ 153 if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 154 erofs_workgroup_unfreeze(grp, 1); 155 return false; 156 } 157 158 /* 159 * It's impossible to fail after the workgroup is freezed, 160 * however in order to avoid some race conditions, add a 161 * DBG_BUGON to observe this in advance. 162 */ 163 DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp); 164 165 /* 166 * If managed cache is on, last refcount should indicate 167 * the related workstation. 168 */ 169 erofs_workgroup_unfreeze_final(grp); 170 return true; 171} 172 173static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 174 unsigned long nr_shrink) 175{ 176 struct erofs_workgroup *grp; 177 unsigned int freed = 0; 178 unsigned long index; 179 180 xa_for_each(&sbi->managed_pslots, index, grp) { 181 /* try to shrink each valid workgroup */ 182 if (!erofs_try_to_release_workgroup(sbi, grp)) 183 continue; 184 185 ++freed; 186 if (!--nr_shrink) 187 break; 188 } 189 return freed; 190} 191 192/* protected by 'erofs_sb_list_lock' */ 193static unsigned int shrinker_run_no; 194 195/* protects the mounted 'erofs_sb_list' */ 196static DEFINE_SPINLOCK(erofs_sb_list_lock); 197static LIST_HEAD(erofs_sb_list); 198 199void erofs_shrinker_register(struct super_block *sb) 200{ 201 struct erofs_sb_info *sbi = EROFS_SB(sb); 202 203 mutex_init(&sbi->umount_mutex); 204 205 spin_lock(&erofs_sb_list_lock); 206 list_add(&sbi->list, &erofs_sb_list); 207 spin_unlock(&erofs_sb_list_lock); 208} 209 210void erofs_shrinker_unregister(struct super_block *sb) 211{ 212 struct erofs_sb_info *const sbi = EROFS_SB(sb); 213 214 mutex_lock(&sbi->umount_mutex); 215 /* clean up all remaining workgroups in memory */ 216 erofs_shrink_workstation(sbi, ~0UL); 217 218 spin_lock(&erofs_sb_list_lock); 219 list_del(&sbi->list); 220 spin_unlock(&erofs_sb_list_lock); 221 mutex_unlock(&sbi->umount_mutex); 222} 223 224static unsigned long erofs_shrink_count(struct shrinker *shrink, 225 struct shrink_control *sc) 226{ 227 return atomic_long_read(&erofs_global_shrink_cnt); 228} 229 230static unsigned long erofs_shrink_scan(struct shrinker *shrink, 231 struct shrink_control *sc) 232{ 233 struct erofs_sb_info *sbi; 234 struct list_head *p; 235 236 unsigned long nr = sc->nr_to_scan; 237 unsigned int run_no; 238 unsigned long freed = 0; 239 240 spin_lock(&erofs_sb_list_lock); 241 do { 242 run_no = ++shrinker_run_no; 243 } while (run_no == 0); 244 245 /* Iterate over all mounted superblocks and try to shrink them */ 246 p = erofs_sb_list.next; 247 while (p != &erofs_sb_list) { 248 sbi = list_entry(p, struct erofs_sb_info, list); 249 250 /* 251 * We move the ones we do to the end of the list, so we stop 252 * when we see one we have already done. 253 */ 254 if (sbi->shrinker_run_no == run_no) 255 break; 256 257 if (!mutex_trylock(&sbi->umount_mutex)) { 258 p = p->next; 259 continue; 260 } 261 262 spin_unlock(&erofs_sb_list_lock); 263 sbi->shrinker_run_no = run_no; 264 265 freed += erofs_shrink_workstation(sbi, nr - freed); 266 267 spin_lock(&erofs_sb_list_lock); 268 /* Get the next list element before we move this one */ 269 p = p->next; 270 271 /* 272 * Move this one to the end of the list to provide some 273 * fairness. 274 */ 275 list_move_tail(&sbi->list, &erofs_sb_list); 276 mutex_unlock(&sbi->umount_mutex); 277 278 if (freed >= nr) 279 break; 280 } 281 spin_unlock(&erofs_sb_list_lock); 282 return freed; 283} 284 285static struct shrinker erofs_shrinker_info = { 286 .scan_objects = erofs_shrink_scan, 287 .count_objects = erofs_shrink_count, 288 .seeks = DEFAULT_SEEKS, 289}; 290 291int __init erofs_init_shrinker(void) 292{ 293 return register_shrinker(&erofs_shrinker_info); 294} 295 296void erofs_exit_shrinker(void) 297{ 298 unregister_shrinker(&erofs_shrinker_info); 299} 300#endif /* !CONFIG_EROFS_FS_ZIP */ 301