at v5.2 341 lines 8.0 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * linux/drivers/staging/erofs/utils.c 4 * 5 * Copyright (C) 2018 HUAWEI, Inc. 6 * http://www.huawei.com/ 7 * Created by Gao Xiang <gaoxiang25@huawei.com> 8 * 9 * This file is subject to the terms and conditions of the GNU General Public 10 * License. See the file COPYING in the main directory of the Linux 11 * distribution for more details. 12 */ 13 14#include "internal.h" 15#include <linux/pagevec.h> 16 17struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 18{ 19 struct page *page; 20 21 if (!list_empty(pool)) { 22 page = lru_to_page(pool); 23 list_del(&page->lru); 24 } else { 25 page = alloc_pages(gfp | __GFP_NOFAIL, 0); 26 } 27 return page; 28} 29 30/* global shrink count (for all mounted EROFS instances) */ 31static atomic_long_t erofs_global_shrink_cnt; 32 33#ifdef CONFIG_EROFS_FS_ZIP 34#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) 35#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) 36 37static int erofs_workgroup_get(struct erofs_workgroup *grp) 38{ 39 int o; 40 41repeat: 42 o = erofs_wait_on_workgroup_freezed(grp); 43 if (unlikely(o <= 0)) 44 return -1; 45 46 if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o)) 47 goto repeat; 48 49 /* decrease refcount paired by erofs_workgroup_put */ 50 if (unlikely(o == 1)) 51 atomic_long_dec(&erofs_global_shrink_cnt); 52 return 0; 53} 54 55struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 56 pgoff_t index, bool *tag) 57{ 58 struct erofs_sb_info *sbi = EROFS_SB(sb); 59 struct erofs_workgroup *grp; 60 61repeat: 62 rcu_read_lock(); 63 grp = radix_tree_lookup(&sbi->workstn_tree, index); 64 if (grp) { 65 *tag = xa_pointer_tag(grp); 66 grp = xa_untag_pointer(grp); 67 68 if (erofs_workgroup_get(grp)) { 69 /* prefer to relax rcu read side */ 70 rcu_read_unlock(); 71 goto repeat; 72 } 73 74 DBG_BUGON(index != grp->index); 75 } 76 rcu_read_unlock(); 77 return grp; 78} 79 80int erofs_register_workgroup(struct super_block *sb, 81 struct erofs_workgroup *grp, 82 bool tag) 83{ 84 struct erofs_sb_info *sbi; 85 int err; 86 87 /* grp shouldn't be broken or used before */ 88 if (unlikely(atomic_read(&grp->refcount) != 1)) { 89 DBG_BUGON(1); 90 return -EINVAL; 91 } 92 93 err = radix_tree_preload(GFP_NOFS); 94 if (err) 95 return err; 96 97 sbi = EROFS_SB(sb); 98 erofs_workstn_lock(sbi); 99 100 grp = xa_tag_pointer(grp, tag); 101 102 /* 103 * Bump up reference count before making this workgroup 104 * visible to other users in order to avoid potential UAF 105 * without serialized by erofs_workstn_lock. 106 */ 107 __erofs_workgroup_get(grp); 108 109 err = radix_tree_insert(&sbi->workstn_tree, 110 grp->index, grp); 111 if (unlikely(err)) 112 /* 113 * it's safe to decrease since the workgroup isn't visible 114 * and refcount >= 2 (cannot be freezed). 115 */ 116 __erofs_workgroup_put(grp); 117 118 erofs_workstn_unlock(sbi); 119 radix_tree_preload_end(); 120 return err; 121} 122 123static void __erofs_workgroup_free(struct erofs_workgroup *grp) 124{ 125 atomic_long_dec(&erofs_global_shrink_cnt); 126 erofs_workgroup_free_rcu(grp); 127} 128 129int erofs_workgroup_put(struct erofs_workgroup *grp) 130{ 131 int count = atomic_dec_return(&grp->refcount); 132 133 if (count == 1) 134 atomic_long_inc(&erofs_global_shrink_cnt); 135 else if (!count) 136 __erofs_workgroup_free(grp); 137 return count; 138} 139 140#ifdef EROFS_FS_HAS_MANAGED_CACHE 141/* for cache-managed case, customized reclaim paths exist */ 142static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) 143{ 144 erofs_workgroup_unfreeze(grp, 0); 145 __erofs_workgroup_free(grp); 146} 147 148static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 149 struct erofs_workgroup *grp, 150 bool cleanup) 151{ 152 /* 153 * for managed cache enabled, the refcount of workgroups 154 * themselves could be < 0 (freezed). So there is no guarantee 155 * that all refcount > 0 if managed cache is enabled. 156 */ 157 if (!erofs_workgroup_try_to_freeze(grp, 1)) 158 return false; 159 160 /* 161 * note that all cached pages should be unlinked 162 * before delete it from the radix tree. 163 * Otherwise some cached pages of an orphan old workgroup 164 * could be still linked after the new one is available. 165 */ 166 if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 167 erofs_workgroup_unfreeze(grp, 1); 168 return false; 169 } 170 171 /* 172 * it is impossible to fail after the workgroup is freezed, 173 * however in order to avoid some race conditions, add a 174 * DBG_BUGON to observe this in advance. 175 */ 176 DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree, 177 grp->index)) != grp); 178 179 /* 180 * if managed cache is enable, the last refcount 181 * should indicate the related workstation. 182 */ 183 erofs_workgroup_unfreeze_final(grp); 184 return true; 185} 186 187#else 188/* for nocache case, no customized reclaim path at all */ 189static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 190 struct erofs_workgroup *grp, 191 bool cleanup) 192{ 193 int cnt = atomic_read(&grp->refcount); 194 195 DBG_BUGON(cnt <= 0); 196 DBG_BUGON(cleanup && cnt != 1); 197 198 if (cnt > 1) 199 return false; 200 201 DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree, 202 grp->index)) != grp); 203 204 /* (rarely) could be grabbed again when freeing */ 205 erofs_workgroup_put(grp); 206 return true; 207} 208 209#endif 210 211unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 212 unsigned long nr_shrink, 213 bool cleanup) 214{ 215 pgoff_t first_index = 0; 216 void *batch[PAGEVEC_SIZE]; 217 unsigned int freed = 0; 218 219 int i, found; 220repeat: 221 erofs_workstn_lock(sbi); 222 223 found = radix_tree_gang_lookup(&sbi->workstn_tree, 224 batch, first_index, PAGEVEC_SIZE); 225 226 for (i = 0; i < found; ++i) { 227 struct erofs_workgroup *grp = xa_untag_pointer(batch[i]); 228 229 first_index = grp->index + 1; 230 231 /* try to shrink each valid workgroup */ 232 if (!erofs_try_to_release_workgroup(sbi, grp, cleanup)) 233 continue; 234 235 ++freed; 236 if (unlikely(!--nr_shrink)) 237 break; 238 } 239 erofs_workstn_unlock(sbi); 240 241 if (i && nr_shrink) 242 goto repeat; 243 return freed; 244} 245 246#endif 247 248/* protected by 'erofs_sb_list_lock' */ 249static unsigned int shrinker_run_no; 250 251/* protects the mounted 'erofs_sb_list' */ 252static DEFINE_SPINLOCK(erofs_sb_list_lock); 253static LIST_HEAD(erofs_sb_list); 254 255void erofs_register_super(struct super_block *sb) 256{ 257 struct erofs_sb_info *sbi = EROFS_SB(sb); 258 259 mutex_init(&sbi->umount_mutex); 260 261 spin_lock(&erofs_sb_list_lock); 262 list_add(&sbi->list, &erofs_sb_list); 263 spin_unlock(&erofs_sb_list_lock); 264} 265 266void erofs_unregister_super(struct super_block *sb) 267{ 268 spin_lock(&erofs_sb_list_lock); 269 list_del(&EROFS_SB(sb)->list); 270 spin_unlock(&erofs_sb_list_lock); 271} 272 273static unsigned long erofs_shrink_count(struct shrinker *shrink, 274 struct shrink_control *sc) 275{ 276 return atomic_long_read(&erofs_global_shrink_cnt); 277} 278 279static unsigned long erofs_shrink_scan(struct shrinker *shrink, 280 struct shrink_control *sc) 281{ 282 struct erofs_sb_info *sbi; 283 struct list_head *p; 284 285 unsigned long nr = sc->nr_to_scan; 286 unsigned int run_no; 287 unsigned long freed = 0; 288 289 spin_lock(&erofs_sb_list_lock); 290 do 291 run_no = ++shrinker_run_no; 292 while (run_no == 0); 293 294 /* Iterate over all mounted superblocks and try to shrink them */ 295 p = erofs_sb_list.next; 296 while (p != &erofs_sb_list) { 297 sbi = list_entry(p, struct erofs_sb_info, list); 298 299 /* 300 * We move the ones we do to the end of the list, so we stop 301 * when we see one we have already done. 302 */ 303 if (sbi->shrinker_run_no == run_no) 304 break; 305 306 if (!mutex_trylock(&sbi->umount_mutex)) { 307 p = p->next; 308 continue; 309 } 310 311 spin_unlock(&erofs_sb_list_lock); 312 sbi->shrinker_run_no = run_no; 313 314#ifdef CONFIG_EROFS_FS_ZIP 315 freed += erofs_shrink_workstation(sbi, nr, false); 316#endif 317 318 spin_lock(&erofs_sb_list_lock); 319 /* Get the next list element before we move this one */ 320 p = p->next; 321 322 /* 323 * Move this one to the end of the list to provide some 324 * fairness. 325 */ 326 list_move_tail(&sbi->list, &erofs_sb_list); 327 mutex_unlock(&sbi->umount_mutex); 328 329 if (freed >= nr) 330 break; 331 } 332 spin_unlock(&erofs_sb_list_lock); 333 return freed; 334} 335 336struct shrinker erofs_shrinker_info = { 337 .scan_objects = erofs_shrink_scan, 338 .count_objects = erofs_shrink_count, 339 .seeks = DEFAULT_SEEKS, 340}; 341