Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/drivers/staging/erofs/utils.c
4 *
5 * Copyright (C) 2018 HUAWEI, Inc.
6 * http://www.huawei.com/
7 * Created by Gao Xiang <gaoxiang25@huawei.com>
8 *
9 * This file is subject to the terms and conditions of the GNU General Public
10 * License. See the file COPYING in the main directory of the Linux
11 * distribution for more details.
12 */
13
14#include "internal.h"
15#include <linux/pagevec.h>
16
17struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
18{
19 struct page *page;
20
21 if (!list_empty(pool)) {
22 page = lru_to_page(pool);
23 list_del(&page->lru);
24 } else {
25 page = alloc_pages(gfp | __GFP_NOFAIL, 0);
26 }
27 return page;
28}
29
30/* global shrink count (for all mounted EROFS instances) */
31static atomic_long_t erofs_global_shrink_cnt;
32
33#ifdef CONFIG_EROFS_FS_ZIP
34#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount)
35#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount)
36
37static int erofs_workgroup_get(struct erofs_workgroup *grp)
38{
39 int o;
40
41repeat:
42 o = erofs_wait_on_workgroup_freezed(grp);
43 if (unlikely(o <= 0))
44 return -1;
45
46 if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o))
47 goto repeat;
48
49 /* decrease refcount paired by erofs_workgroup_put */
50 if (unlikely(o == 1))
51 atomic_long_dec(&erofs_global_shrink_cnt);
52 return 0;
53}
54
55struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
56 pgoff_t index, bool *tag)
57{
58 struct erofs_sb_info *sbi = EROFS_SB(sb);
59 struct erofs_workgroup *grp;
60
61repeat:
62 rcu_read_lock();
63 grp = radix_tree_lookup(&sbi->workstn_tree, index);
64 if (grp) {
65 *tag = xa_pointer_tag(grp);
66 grp = xa_untag_pointer(grp);
67
68 if (erofs_workgroup_get(grp)) {
69 /* prefer to relax rcu read side */
70 rcu_read_unlock();
71 goto repeat;
72 }
73
74 DBG_BUGON(index != grp->index);
75 }
76 rcu_read_unlock();
77 return grp;
78}
79
80int erofs_register_workgroup(struct super_block *sb,
81 struct erofs_workgroup *grp,
82 bool tag)
83{
84 struct erofs_sb_info *sbi;
85 int err;
86
87 /* grp shouldn't be broken or used before */
88 if (unlikely(atomic_read(&grp->refcount) != 1)) {
89 DBG_BUGON(1);
90 return -EINVAL;
91 }
92
93 err = radix_tree_preload(GFP_NOFS);
94 if (err)
95 return err;
96
97 sbi = EROFS_SB(sb);
98 erofs_workstn_lock(sbi);
99
100 grp = xa_tag_pointer(grp, tag);
101
102 /*
103 * Bump up reference count before making this workgroup
104 * visible to other users in order to avoid potential UAF
105 * without serialized by erofs_workstn_lock.
106 */
107 __erofs_workgroup_get(grp);
108
109 err = radix_tree_insert(&sbi->workstn_tree,
110 grp->index, grp);
111 if (unlikely(err))
112 /*
113 * it's safe to decrease since the workgroup isn't visible
114 * and refcount >= 2 (cannot be freezed).
115 */
116 __erofs_workgroup_put(grp);
117
118 erofs_workstn_unlock(sbi);
119 radix_tree_preload_end();
120 return err;
121}
122
123static void __erofs_workgroup_free(struct erofs_workgroup *grp)
124{
125 atomic_long_dec(&erofs_global_shrink_cnt);
126 erofs_workgroup_free_rcu(grp);
127}
128
129int erofs_workgroup_put(struct erofs_workgroup *grp)
130{
131 int count = atomic_dec_return(&grp->refcount);
132
133 if (count == 1)
134 atomic_long_inc(&erofs_global_shrink_cnt);
135 else if (!count)
136 __erofs_workgroup_free(grp);
137 return count;
138}
139
140#ifdef EROFS_FS_HAS_MANAGED_CACHE
141/* for cache-managed case, customized reclaim paths exist */
142static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
143{
144 erofs_workgroup_unfreeze(grp, 0);
145 __erofs_workgroup_free(grp);
146}
147
148static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
149 struct erofs_workgroup *grp,
150 bool cleanup)
151{
152 /*
153 * for managed cache enabled, the refcount of workgroups
154 * themselves could be < 0 (freezed). So there is no guarantee
155 * that all refcount > 0 if managed cache is enabled.
156 */
157 if (!erofs_workgroup_try_to_freeze(grp, 1))
158 return false;
159
160 /*
161 * note that all cached pages should be unlinked
162 * before delete it from the radix tree.
163 * Otherwise some cached pages of an orphan old workgroup
164 * could be still linked after the new one is available.
165 */
166 if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
167 erofs_workgroup_unfreeze(grp, 1);
168 return false;
169 }
170
171 /*
172 * it is impossible to fail after the workgroup is freezed,
173 * however in order to avoid some race conditions, add a
174 * DBG_BUGON to observe this in advance.
175 */
176 DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
177 grp->index)) != grp);
178
179 /*
180 * if managed cache is enable, the last refcount
181 * should indicate the related workstation.
182 */
183 erofs_workgroup_unfreeze_final(grp);
184 return true;
185}
186
187#else
188/* for nocache case, no customized reclaim path at all */
189static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
190 struct erofs_workgroup *grp,
191 bool cleanup)
192{
193 int cnt = atomic_read(&grp->refcount);
194
195 DBG_BUGON(cnt <= 0);
196 DBG_BUGON(cleanup && cnt != 1);
197
198 if (cnt > 1)
199 return false;
200
201 DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
202 grp->index)) != grp);
203
204 /* (rarely) could be grabbed again when freeing */
205 erofs_workgroup_put(grp);
206 return true;
207}
208
209#endif
210
211unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
212 unsigned long nr_shrink,
213 bool cleanup)
214{
215 pgoff_t first_index = 0;
216 void *batch[PAGEVEC_SIZE];
217 unsigned int freed = 0;
218
219 int i, found;
220repeat:
221 erofs_workstn_lock(sbi);
222
223 found = radix_tree_gang_lookup(&sbi->workstn_tree,
224 batch, first_index, PAGEVEC_SIZE);
225
226 for (i = 0; i < found; ++i) {
227 struct erofs_workgroup *grp = xa_untag_pointer(batch[i]);
228
229 first_index = grp->index + 1;
230
231 /* try to shrink each valid workgroup */
232 if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
233 continue;
234
235 ++freed;
236 if (unlikely(!--nr_shrink))
237 break;
238 }
239 erofs_workstn_unlock(sbi);
240
241 if (i && nr_shrink)
242 goto repeat;
243 return freed;
244}
245
246#endif
247
248/* protected by 'erofs_sb_list_lock' */
249static unsigned int shrinker_run_no;
250
251/* protects the mounted 'erofs_sb_list' */
252static DEFINE_SPINLOCK(erofs_sb_list_lock);
253static LIST_HEAD(erofs_sb_list);
254
255void erofs_register_super(struct super_block *sb)
256{
257 struct erofs_sb_info *sbi = EROFS_SB(sb);
258
259 mutex_init(&sbi->umount_mutex);
260
261 spin_lock(&erofs_sb_list_lock);
262 list_add(&sbi->list, &erofs_sb_list);
263 spin_unlock(&erofs_sb_list_lock);
264}
265
266void erofs_unregister_super(struct super_block *sb)
267{
268 spin_lock(&erofs_sb_list_lock);
269 list_del(&EROFS_SB(sb)->list);
270 spin_unlock(&erofs_sb_list_lock);
271}
272
273static unsigned long erofs_shrink_count(struct shrinker *shrink,
274 struct shrink_control *sc)
275{
276 return atomic_long_read(&erofs_global_shrink_cnt);
277}
278
279static unsigned long erofs_shrink_scan(struct shrinker *shrink,
280 struct shrink_control *sc)
281{
282 struct erofs_sb_info *sbi;
283 struct list_head *p;
284
285 unsigned long nr = sc->nr_to_scan;
286 unsigned int run_no;
287 unsigned long freed = 0;
288
289 spin_lock(&erofs_sb_list_lock);
290 do
291 run_no = ++shrinker_run_no;
292 while (run_no == 0);
293
294 /* Iterate over all mounted superblocks and try to shrink them */
295 p = erofs_sb_list.next;
296 while (p != &erofs_sb_list) {
297 sbi = list_entry(p, struct erofs_sb_info, list);
298
299 /*
300 * We move the ones we do to the end of the list, so we stop
301 * when we see one we have already done.
302 */
303 if (sbi->shrinker_run_no == run_no)
304 break;
305
306 if (!mutex_trylock(&sbi->umount_mutex)) {
307 p = p->next;
308 continue;
309 }
310
311 spin_unlock(&erofs_sb_list_lock);
312 sbi->shrinker_run_no = run_no;
313
314#ifdef CONFIG_EROFS_FS_ZIP
315 freed += erofs_shrink_workstation(sbi, nr, false);
316#endif
317
318 spin_lock(&erofs_sb_list_lock);
319 /* Get the next list element before we move this one */
320 p = p->next;
321
322 /*
323 * Move this one to the end of the list to provide some
324 * fairness.
325 */
326 list_move_tail(&sbi->list, &erofs_sb_list);
327 mutex_unlock(&sbi->umount_mutex);
328
329 if (freed >= nr)
330 break;
331 }
332 spin_unlock(&erofs_sb_list_lock);
333 return freed;
334}
335
336struct shrinker erofs_shrinker_info = {
337 .scan_objects = erofs_shrink_scan,
338 .count_objects = erofs_shrink_count,
339 .seeks = DEFAULT_SEEKS,
340};
341