at v5.6 325 lines 7.5 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * http://www.huawei.com/ 5 * Created by Gao Xiang <gaoxiang25@huawei.com> 6 */ 7#include "internal.h" 8#include <linux/pagevec.h> 9 10struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 11{ 12 struct page *page; 13 14 if (!list_empty(pool)) { 15 page = lru_to_page(pool); 16 DBG_BUGON(page_ref_count(page) != 1); 17 list_del(&page->lru); 18 } else { 19 page = alloc_page(gfp); 20 } 21 return page; 22} 23 24#if (EROFS_PCPUBUF_NR_PAGES > 0) 25static struct { 26 u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; 27} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; 28 29void *erofs_get_pcpubuf(unsigned int pagenr) 30{ 31 preempt_disable(); 32 return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; 33} 34#endif 35 36#ifdef CONFIG_EROFS_FS_ZIP 37/* global shrink count (for all mounted EROFS instances) */ 38static atomic_long_t erofs_global_shrink_cnt; 39 40#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) 41#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) 42 43static int erofs_workgroup_get(struct erofs_workgroup *grp) 44{ 45 int o; 46 47repeat: 48 o = erofs_wait_on_workgroup_freezed(grp); 49 if (o <= 0) 50 return -1; 51 52 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 53 goto repeat; 54 55 /* decrease refcount paired by erofs_workgroup_put */ 56 if (o == 1) 57 atomic_long_dec(&erofs_global_shrink_cnt); 58 return 0; 59} 60 61struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 62 pgoff_t index) 63{ 64 struct erofs_sb_info *sbi = EROFS_SB(sb); 65 struct erofs_workgroup *grp; 66 67repeat: 68 rcu_read_lock(); 69 grp = radix_tree_lookup(&sbi->workstn_tree, index); 70 if (grp) { 71 if (erofs_workgroup_get(grp)) { 72 /* prefer to relax rcu read side */ 73 rcu_read_unlock(); 74 goto repeat; 75 } 76 77 DBG_BUGON(index != grp->index); 78 } 79 rcu_read_unlock(); 80 return grp; 81} 82 83int erofs_register_workgroup(struct super_block *sb, 84 struct erofs_workgroup *grp) 85{ 86 struct erofs_sb_info *sbi; 87 int err; 88 89 /* grp shouldn't be broken or used before */ 90 if (atomic_read(&grp->refcount) != 1) { 91 DBG_BUGON(1); 92 return -EINVAL; 93 } 94 95 err = radix_tree_preload(GFP_NOFS); 96 if (err) 97 return err; 98 99 sbi = EROFS_SB(sb); 100 xa_lock(&sbi->workstn_tree); 101 102 /* 103 * Bump up reference count before making this workgroup 104 * visible to other users in order to avoid potential UAF 105 * without serialized by workstn_lock. 106 */ 107 __erofs_workgroup_get(grp); 108 109 err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp); 110 if (err) 111 /* 112 * it's safe to decrease since the workgroup isn't visible 113 * and refcount >= 2 (cannot be freezed). 114 */ 115 __erofs_workgroup_put(grp); 116 117 xa_unlock(&sbi->workstn_tree); 118 radix_tree_preload_end(); 119 return err; 120} 121 122static void __erofs_workgroup_free(struct erofs_workgroup *grp) 123{ 124 atomic_long_dec(&erofs_global_shrink_cnt); 125 erofs_workgroup_free_rcu(grp); 126} 127 128int erofs_workgroup_put(struct erofs_workgroup *grp) 129{ 130 int count = atomic_dec_return(&grp->refcount); 131 132 if (count == 1) 133 atomic_long_inc(&erofs_global_shrink_cnt); 134 else if (!count) 135 __erofs_workgroup_free(grp); 136 return count; 137} 138 139static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) 140{ 141 erofs_workgroup_unfreeze(grp, 0); 142 __erofs_workgroup_free(grp); 143} 144 145static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 146 struct erofs_workgroup *grp) 147{ 148 /* 149 * If managed cache is on, refcount of workgroups 150 * themselves could be < 0 (freezed). In other words, 151 * there is no guarantee that all refcounts > 0. 152 */ 153 if (!erofs_workgroup_try_to_freeze(grp, 1)) 154 return false; 155 156 /* 157 * Note that all cached pages should be unattached 158 * before deleted from the radix tree. Otherwise some 159 * cached pages could be still attached to the orphan 160 * old workgroup when the new one is available in the tree. 161 */ 162 if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 163 erofs_workgroup_unfreeze(grp, 1); 164 return false; 165 } 166 167 /* 168 * It's impossible to fail after the workgroup is freezed, 169 * however in order to avoid some race conditions, add a 170 * DBG_BUGON to observe this in advance. 171 */ 172 DBG_BUGON(radix_tree_delete(&sbi->workstn_tree, grp->index) != grp); 173 174 /* 175 * If managed cache is on, last refcount should indicate 176 * the related workstation. 177 */ 178 erofs_workgroup_unfreeze_final(grp); 179 return true; 180} 181 182static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 183 unsigned long nr_shrink) 184{ 185 pgoff_t first_index = 0; 186 void *batch[PAGEVEC_SIZE]; 187 unsigned int freed = 0; 188 189 int i, found; 190repeat: 191 xa_lock(&sbi->workstn_tree); 192 193 found = radix_tree_gang_lookup(&sbi->workstn_tree, 194 batch, first_index, PAGEVEC_SIZE); 195 196 for (i = 0; i < found; ++i) { 197 struct erofs_workgroup *grp = batch[i]; 198 199 first_index = grp->index + 1; 200 201 /* try to shrink each valid workgroup */ 202 if (!erofs_try_to_release_workgroup(sbi, grp)) 203 continue; 204 205 ++freed; 206 if (!--nr_shrink) 207 break; 208 } 209 xa_unlock(&sbi->workstn_tree); 210 211 if (i && nr_shrink) 212 goto repeat; 213 return freed; 214} 215 216/* protected by 'erofs_sb_list_lock' */ 217static unsigned int shrinker_run_no; 218 219/* protects the mounted 'erofs_sb_list' */ 220static DEFINE_SPINLOCK(erofs_sb_list_lock); 221static LIST_HEAD(erofs_sb_list); 222 223void erofs_shrinker_register(struct super_block *sb) 224{ 225 struct erofs_sb_info *sbi = EROFS_SB(sb); 226 227 mutex_init(&sbi->umount_mutex); 228 229 spin_lock(&erofs_sb_list_lock); 230 list_add(&sbi->list, &erofs_sb_list); 231 spin_unlock(&erofs_sb_list_lock); 232} 233 234void erofs_shrinker_unregister(struct super_block *sb) 235{ 236 struct erofs_sb_info *const sbi = EROFS_SB(sb); 237 238 mutex_lock(&sbi->umount_mutex); 239 /* clean up all remaining workgroups in memory */ 240 erofs_shrink_workstation(sbi, ~0UL); 241 242 spin_lock(&erofs_sb_list_lock); 243 list_del(&sbi->list); 244 spin_unlock(&erofs_sb_list_lock); 245 mutex_unlock(&sbi->umount_mutex); 246} 247 248static unsigned long erofs_shrink_count(struct shrinker *shrink, 249 struct shrink_control *sc) 250{ 251 return atomic_long_read(&erofs_global_shrink_cnt); 252} 253 254static unsigned long erofs_shrink_scan(struct shrinker *shrink, 255 struct shrink_control *sc) 256{ 257 struct erofs_sb_info *sbi; 258 struct list_head *p; 259 260 unsigned long nr = sc->nr_to_scan; 261 unsigned int run_no; 262 unsigned long freed = 0; 263 264 spin_lock(&erofs_sb_list_lock); 265 do { 266 run_no = ++shrinker_run_no; 267 } while (run_no == 0); 268 269 /* Iterate over all mounted superblocks and try to shrink them */ 270 p = erofs_sb_list.next; 271 while (p != &erofs_sb_list) { 272 sbi = list_entry(p, struct erofs_sb_info, list); 273 274 /* 275 * We move the ones we do to the end of the list, so we stop 276 * when we see one we have already done. 277 */ 278 if (sbi->shrinker_run_no == run_no) 279 break; 280 281 if (!mutex_trylock(&sbi->umount_mutex)) { 282 p = p->next; 283 continue; 284 } 285 286 spin_unlock(&erofs_sb_list_lock); 287 sbi->shrinker_run_no = run_no; 288 289 freed += erofs_shrink_workstation(sbi, nr); 290 291 spin_lock(&erofs_sb_list_lock); 292 /* Get the next list element before we move this one */ 293 p = p->next; 294 295 /* 296 * Move this one to the end of the list to provide some 297 * fairness. 298 */ 299 list_move_tail(&sbi->list, &erofs_sb_list); 300 mutex_unlock(&sbi->umount_mutex); 301 302 if (freed >= nr) 303 break; 304 } 305 spin_unlock(&erofs_sb_list_lock); 306 return freed; 307} 308 309static struct shrinker erofs_shrinker_info = { 310 .scan_objects = erofs_shrink_scan, 311 .count_objects = erofs_shrink_count, 312 .seeks = DEFAULT_SEEKS, 313}; 314 315int __init erofs_init_shrinker(void) 316{ 317 return register_shrinker(&erofs_shrinker_info); 318} 319 320void erofs_exit_shrinker(void) 321{ 322 unregister_shrinker(&erofs_shrinker_info); 323} 324#endif /* !CONFIG_EROFS_FS_ZIP */ 325