at v5.3 353 lines 8.3 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * linux/drivers/staging/erofs/utils.c 4 * 5 * Copyright (C) 2018 HUAWEI, Inc. 6 * http://www.huawei.com/ 7 * Created by Gao Xiang <gaoxiang25@huawei.com> 8 * 9 * This file is subject to the terms and conditions of the GNU General Public 10 * License. See the file COPYING in the main directory of the Linux 11 * distribution for more details. 12 */ 13 14#include "internal.h" 15#include <linux/pagevec.h> 16 17struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 18{ 19 struct page *page; 20 21 if (!list_empty(pool)) { 22 page = lru_to_page(pool); 23 list_del(&page->lru); 24 } else { 25 page = alloc_pages(gfp | __GFP_NOFAIL, 0); 26 } 27 return page; 28} 29 30#if (EROFS_PCPUBUF_NR_PAGES > 0) 31static struct { 32 u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; 33} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; 34 35void *erofs_get_pcpubuf(unsigned int pagenr) 36{ 37 preempt_disable(); 38 return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; 39} 40#endif 41 42/* global shrink count (for all mounted EROFS instances) */ 43static atomic_long_t erofs_global_shrink_cnt; 44 45#ifdef CONFIG_EROFS_FS_ZIP 46#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) 47#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) 48 49static int erofs_workgroup_get(struct erofs_workgroup *grp) 50{ 51 int o; 52 53repeat: 54 o = erofs_wait_on_workgroup_freezed(grp); 55 if (unlikely(o <= 0)) 56 return -1; 57 58 if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o)) 59 goto repeat; 60 61 /* decrease refcount paired by erofs_workgroup_put */ 62 if (unlikely(o == 1)) 63 atomic_long_dec(&erofs_global_shrink_cnt); 64 return 0; 65} 66 67struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 68 pgoff_t index, bool *tag) 69{ 70 struct erofs_sb_info *sbi = EROFS_SB(sb); 71 struct erofs_workgroup *grp; 72 73repeat: 74 rcu_read_lock(); 75 grp = radix_tree_lookup(&sbi->workstn_tree, index); 76 if (grp) { 77 *tag = xa_pointer_tag(grp); 78 grp = xa_untag_pointer(grp); 79 80 if (erofs_workgroup_get(grp)) { 81 /* prefer to relax rcu read side */ 82 rcu_read_unlock(); 83 goto repeat; 84 } 85 86 DBG_BUGON(index != grp->index); 87 } 88 rcu_read_unlock(); 89 return grp; 90} 91 92int erofs_register_workgroup(struct super_block *sb, 93 struct erofs_workgroup *grp, 94 bool tag) 95{ 96 struct erofs_sb_info *sbi; 97 int err; 98 99 /* grp shouldn't be broken or used before */ 100 if (unlikely(atomic_read(&grp->refcount) != 1)) { 101 DBG_BUGON(1); 102 return -EINVAL; 103 } 104 105 err = radix_tree_preload(GFP_NOFS); 106 if (err) 107 return err; 108 109 sbi = EROFS_SB(sb); 110 erofs_workstn_lock(sbi); 111 112 grp = xa_tag_pointer(grp, tag); 113 114 /* 115 * Bump up reference count before making this workgroup 116 * visible to other users in order to avoid potential UAF 117 * without serialized by erofs_workstn_lock. 118 */ 119 __erofs_workgroup_get(grp); 120 121 err = radix_tree_insert(&sbi->workstn_tree, 122 grp->index, grp); 123 if (unlikely(err)) 124 /* 125 * it's safe to decrease since the workgroup isn't visible 126 * and refcount >= 2 (cannot be freezed). 127 */ 128 __erofs_workgroup_put(grp); 129 130 erofs_workstn_unlock(sbi); 131 radix_tree_preload_end(); 132 return err; 133} 134 135static void __erofs_workgroup_free(struct erofs_workgroup *grp) 136{ 137 atomic_long_dec(&erofs_global_shrink_cnt); 138 erofs_workgroup_free_rcu(grp); 139} 140 141int erofs_workgroup_put(struct erofs_workgroup *grp) 142{ 143 int count = atomic_dec_return(&grp->refcount); 144 145 if (count == 1) 146 atomic_long_inc(&erofs_global_shrink_cnt); 147 else if (!count) 148 __erofs_workgroup_free(grp); 149 return count; 150} 151 152#ifdef EROFS_FS_HAS_MANAGED_CACHE 153/* for cache-managed case, customized reclaim paths exist */ 154static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) 155{ 156 erofs_workgroup_unfreeze(grp, 0); 157 __erofs_workgroup_free(grp); 158} 159 160static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 161 struct erofs_workgroup *grp, 162 bool cleanup) 163{ 164 /* 165 * for managed cache enabled, the refcount of workgroups 166 * themselves could be < 0 (freezed). So there is no guarantee 167 * that all refcount > 0 if managed cache is enabled. 168 */ 169 if (!erofs_workgroup_try_to_freeze(grp, 1)) 170 return false; 171 172 /* 173 * note that all cached pages should be unlinked 174 * before delete it from the radix tree. 175 * Otherwise some cached pages of an orphan old workgroup 176 * could be still linked after the new one is available. 177 */ 178 if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 179 erofs_workgroup_unfreeze(grp, 1); 180 return false; 181 } 182 183 /* 184 * it is impossible to fail after the workgroup is freezed, 185 * however in order to avoid some race conditions, add a 186 * DBG_BUGON to observe this in advance. 187 */ 188 DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree, 189 grp->index)) != grp); 190 191 /* 192 * if managed cache is enable, the last refcount 193 * should indicate the related workstation. 194 */ 195 erofs_workgroup_unfreeze_final(grp); 196 return true; 197} 198 199#else 200/* for nocache case, no customized reclaim path at all */ 201static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 202 struct erofs_workgroup *grp, 203 bool cleanup) 204{ 205 int cnt = atomic_read(&grp->refcount); 206 207 DBG_BUGON(cnt <= 0); 208 DBG_BUGON(cleanup && cnt != 1); 209 210 if (cnt > 1) 211 return false; 212 213 DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree, 214 grp->index)) != grp); 215 216 /* (rarely) could be grabbed again when freeing */ 217 erofs_workgroup_put(grp); 218 return true; 219} 220 221#endif 222 223unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 224 unsigned long nr_shrink, 225 bool cleanup) 226{ 227 pgoff_t first_index = 0; 228 void *batch[PAGEVEC_SIZE]; 229 unsigned int freed = 0; 230 231 int i, found; 232repeat: 233 erofs_workstn_lock(sbi); 234 235 found = radix_tree_gang_lookup(&sbi->workstn_tree, 236 batch, first_index, PAGEVEC_SIZE); 237 238 for (i = 0; i < found; ++i) { 239 struct erofs_workgroup *grp = xa_untag_pointer(batch[i]); 240 241 first_index = grp->index + 1; 242 243 /* try to shrink each valid workgroup */ 244 if (!erofs_try_to_release_workgroup(sbi, grp, cleanup)) 245 continue; 246 247 ++freed; 248 if (unlikely(!--nr_shrink)) 249 break; 250 } 251 erofs_workstn_unlock(sbi); 252 253 if (i && nr_shrink) 254 goto repeat; 255 return freed; 256} 257 258#endif 259 260/* protected by 'erofs_sb_list_lock' */ 261static unsigned int shrinker_run_no; 262 263/* protects the mounted 'erofs_sb_list' */ 264static DEFINE_SPINLOCK(erofs_sb_list_lock); 265static LIST_HEAD(erofs_sb_list); 266 267void erofs_register_super(struct super_block *sb) 268{ 269 struct erofs_sb_info *sbi = EROFS_SB(sb); 270 271 mutex_init(&sbi->umount_mutex); 272 273 spin_lock(&erofs_sb_list_lock); 274 list_add(&sbi->list, &erofs_sb_list); 275 spin_unlock(&erofs_sb_list_lock); 276} 277 278void erofs_unregister_super(struct super_block *sb) 279{ 280 spin_lock(&erofs_sb_list_lock); 281 list_del(&EROFS_SB(sb)->list); 282 spin_unlock(&erofs_sb_list_lock); 283} 284 285static unsigned long erofs_shrink_count(struct shrinker *shrink, 286 struct shrink_control *sc) 287{ 288 return atomic_long_read(&erofs_global_shrink_cnt); 289} 290 291static unsigned long erofs_shrink_scan(struct shrinker *shrink, 292 struct shrink_control *sc) 293{ 294 struct erofs_sb_info *sbi; 295 struct list_head *p; 296 297 unsigned long nr = sc->nr_to_scan; 298 unsigned int run_no; 299 unsigned long freed = 0; 300 301 spin_lock(&erofs_sb_list_lock); 302 do 303 run_no = ++shrinker_run_no; 304 while (run_no == 0); 305 306 /* Iterate over all mounted superblocks and try to shrink them */ 307 p = erofs_sb_list.next; 308 while (p != &erofs_sb_list) { 309 sbi = list_entry(p, struct erofs_sb_info, list); 310 311 /* 312 * We move the ones we do to the end of the list, so we stop 313 * when we see one we have already done. 314 */ 315 if (sbi->shrinker_run_no == run_no) 316 break; 317 318 if (!mutex_trylock(&sbi->umount_mutex)) { 319 p = p->next; 320 continue; 321 } 322 323 spin_unlock(&erofs_sb_list_lock); 324 sbi->shrinker_run_no = run_no; 325 326#ifdef CONFIG_EROFS_FS_ZIP 327 freed += erofs_shrink_workstation(sbi, nr, false); 328#endif 329 330 spin_lock(&erofs_sb_list_lock); 331 /* Get the next list element before we move this one */ 332 p = p->next; 333 334 /* 335 * Move this one to the end of the list to provide some 336 * fairness. 337 */ 338 list_move_tail(&sbi->list, &erofs_sb_list); 339 mutex_unlock(&sbi->umount_mutex); 340 341 if (freed >= nr) 342 break; 343 } 344 spin_unlock(&erofs_sb_list_lock); 345 return freed; 346} 347 348struct shrinker erofs_shrinker_info = { 349 .scan_objects = erofs_shrink_scan, 350 .count_objects = erofs_shrink_count, 351 .seeks = DEFAULT_SEEKS, 352}; 353