Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2018 HUAWEI, Inc.
4 * https://www.huawei.com/
5 * Created by Gao Xiang <gaoxiang25@huawei.com>
6 */
7#include "internal.h"
8#include <linux/pagevec.h>
9
10struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
11{
12 struct page *page;
13
14 if (!list_empty(pool)) {
15 page = lru_to_page(pool);
16 DBG_BUGON(page_ref_count(page) != 1);
17 list_del(&page->lru);
18 } else {
19 page = alloc_page(gfp);
20 }
21 return page;
22}
23
24#ifdef CONFIG_EROFS_FS_ZIP
25/* global shrink count (for all mounted EROFS instances) */
26static atomic_long_t erofs_global_shrink_cnt;
27
28static int erofs_workgroup_get(struct erofs_workgroup *grp)
29{
30 int o;
31
32repeat:
33 o = erofs_wait_on_workgroup_freezed(grp);
34 if (o <= 0)
35 return -1;
36
37 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
38 goto repeat;
39
40 /* decrease refcount paired by erofs_workgroup_put */
41 if (o == 1)
42 atomic_long_dec(&erofs_global_shrink_cnt);
43 return 0;
44}
45
46struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
47 pgoff_t index)
48{
49 struct erofs_sb_info *sbi = EROFS_SB(sb);
50 struct erofs_workgroup *grp;
51
52repeat:
53 rcu_read_lock();
54 grp = xa_load(&sbi->managed_pslots, index);
55 if (grp) {
56 if (erofs_workgroup_get(grp)) {
57 /* prefer to relax rcu read side */
58 rcu_read_unlock();
59 goto repeat;
60 }
61
62 DBG_BUGON(index != grp->index);
63 }
64 rcu_read_unlock();
65 return grp;
66}
67
68struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
69 struct erofs_workgroup *grp)
70{
71 struct erofs_sb_info *const sbi = EROFS_SB(sb);
72 struct erofs_workgroup *pre;
73
74 /*
75 * Bump up a reference count before making this visible
76 * to others for the XArray in order to avoid potential
77 * UAF without serialized by xa_lock.
78 */
79 atomic_inc(&grp->refcount);
80
81repeat:
82 xa_lock(&sbi->managed_pslots);
83 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
84 NULL, grp, GFP_NOFS);
85 if (pre) {
86 if (xa_is_err(pre)) {
87 pre = ERR_PTR(xa_err(pre));
88 } else if (erofs_workgroup_get(pre)) {
89 /* try to legitimize the current in-tree one */
90 xa_unlock(&sbi->managed_pslots);
91 cond_resched();
92 goto repeat;
93 }
94 atomic_dec(&grp->refcount);
95 grp = pre;
96 }
97 xa_unlock(&sbi->managed_pslots);
98 return grp;
99}
100
101static void __erofs_workgroup_free(struct erofs_workgroup *grp)
102{
103 atomic_long_dec(&erofs_global_shrink_cnt);
104 erofs_workgroup_free_rcu(grp);
105}
106
107int erofs_workgroup_put(struct erofs_workgroup *grp)
108{
109 int count = atomic_dec_return(&grp->refcount);
110
111 if (count == 1)
112 atomic_long_inc(&erofs_global_shrink_cnt);
113 else if (!count)
114 __erofs_workgroup_free(grp);
115 return count;
116}
117
118static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
119 struct erofs_workgroup *grp)
120{
121 /*
122 * If managed cache is on, refcount of workgroups
123 * themselves could be < 0 (freezed). In other words,
124 * there is no guarantee that all refcounts > 0.
125 */
126 if (!erofs_workgroup_try_to_freeze(grp, 1))
127 return false;
128
129 /*
130 * Note that all cached pages should be unattached
131 * before deleted from the XArray. Otherwise some
132 * cached pages could be still attached to the orphan
133 * old workgroup when the new one is available in the tree.
134 */
135 if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
136 erofs_workgroup_unfreeze(grp, 1);
137 return false;
138 }
139
140 /*
141 * It's impossible to fail after the workgroup is freezed,
142 * however in order to avoid some race conditions, add a
143 * DBG_BUGON to observe this in advance.
144 */
145 DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp);
146
147 /* last refcount should be connected with its managed pslot. */
148 erofs_workgroup_unfreeze(grp, 0);
149 __erofs_workgroup_free(grp);
150 return true;
151}
152
153static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
154 unsigned long nr_shrink)
155{
156 struct erofs_workgroup *grp;
157 unsigned int freed = 0;
158 unsigned long index;
159
160 xa_for_each(&sbi->managed_pslots, index, grp) {
161 /* try to shrink each valid workgroup */
162 if (!erofs_try_to_release_workgroup(sbi, grp))
163 continue;
164
165 ++freed;
166 if (!--nr_shrink)
167 break;
168 }
169 return freed;
170}
171
172/* protected by 'erofs_sb_list_lock' */
173static unsigned int shrinker_run_no;
174
175/* protects the mounted 'erofs_sb_list' */
176static DEFINE_SPINLOCK(erofs_sb_list_lock);
177static LIST_HEAD(erofs_sb_list);
178
179void erofs_shrinker_register(struct super_block *sb)
180{
181 struct erofs_sb_info *sbi = EROFS_SB(sb);
182
183 mutex_init(&sbi->umount_mutex);
184
185 spin_lock(&erofs_sb_list_lock);
186 list_add(&sbi->list, &erofs_sb_list);
187 spin_unlock(&erofs_sb_list_lock);
188}
189
190void erofs_shrinker_unregister(struct super_block *sb)
191{
192 struct erofs_sb_info *const sbi = EROFS_SB(sb);
193
194 mutex_lock(&sbi->umount_mutex);
195 /* clean up all remaining workgroups in memory */
196 erofs_shrink_workstation(sbi, ~0UL);
197
198 spin_lock(&erofs_sb_list_lock);
199 list_del(&sbi->list);
200 spin_unlock(&erofs_sb_list_lock);
201 mutex_unlock(&sbi->umount_mutex);
202}
203
204static unsigned long erofs_shrink_count(struct shrinker *shrink,
205 struct shrink_control *sc)
206{
207 return atomic_long_read(&erofs_global_shrink_cnt);
208}
209
210static unsigned long erofs_shrink_scan(struct shrinker *shrink,
211 struct shrink_control *sc)
212{
213 struct erofs_sb_info *sbi;
214 struct list_head *p;
215
216 unsigned long nr = sc->nr_to_scan;
217 unsigned int run_no;
218 unsigned long freed = 0;
219
220 spin_lock(&erofs_sb_list_lock);
221 do {
222 run_no = ++shrinker_run_no;
223 } while (run_no == 0);
224
225 /* Iterate over all mounted superblocks and try to shrink them */
226 p = erofs_sb_list.next;
227 while (p != &erofs_sb_list) {
228 sbi = list_entry(p, struct erofs_sb_info, list);
229
230 /*
231 * We move the ones we do to the end of the list, so we stop
232 * when we see one we have already done.
233 */
234 if (sbi->shrinker_run_no == run_no)
235 break;
236
237 if (!mutex_trylock(&sbi->umount_mutex)) {
238 p = p->next;
239 continue;
240 }
241
242 spin_unlock(&erofs_sb_list_lock);
243 sbi->shrinker_run_no = run_no;
244
245 freed += erofs_shrink_workstation(sbi, nr - freed);
246
247 spin_lock(&erofs_sb_list_lock);
248 /* Get the next list element before we move this one */
249 p = p->next;
250
251 /*
252 * Move this one to the end of the list to provide some
253 * fairness.
254 */
255 list_move_tail(&sbi->list, &erofs_sb_list);
256 mutex_unlock(&sbi->umount_mutex);
257
258 if (freed >= nr)
259 break;
260 }
261 spin_unlock(&erofs_sb_list_lock);
262 return freed;
263}
264
265static struct shrinker erofs_shrinker_info = {
266 .scan_objects = erofs_shrink_scan,
267 .count_objects = erofs_shrink_count,
268 .seeks = DEFAULT_SEEKS,
269};
270
271int __init erofs_init_shrinker(void)
272{
273 return register_shrinker(&erofs_shrinker_info);
274}
275
276void erofs_exit_shrinker(void)
277{
278 unregister_shrinker(&erofs_shrinker_info);
279}
280#endif /* !CONFIG_EROFS_FS_ZIP */
281