drivers/staging/erofs/utils.c at v5.2 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / drivers / staging / erofs / utils.c
at v5.2 341 lines 8.0 kB view raw
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * linux/drivers/staging/erofs/utils.c
  4 *
  5 * Copyright (C) 2018 HUAWEI, Inc.
  6 *             http://www.huawei.com/
  7 * Created by Gao Xiang <gaoxiang25@huawei.com>
  8 *
  9 * This file is subject to the terms and conditions of the GNU General Public
 10 * License.  See the file COPYING in the main directory of the Linux
 11 * distribution for more details.
 12 */
 13
 14#include "internal.h"
 15#include <linux/pagevec.h>
 16
 17struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
 18{
 19	struct page *page;
 20
 21	if (!list_empty(pool)) {
 22		page = lru_to_page(pool);
 23		list_del(&page->lru);
 24	} else {
 25		page = alloc_pages(gfp | __GFP_NOFAIL, 0);
 26	}
 27	return page;
 28}
 29
 30/* global shrink count (for all mounted EROFS instances) */
 31static atomic_long_t erofs_global_shrink_cnt;
 32
 33#ifdef CONFIG_EROFS_FS_ZIP
 34#define __erofs_workgroup_get(grp)	atomic_inc(&(grp)->refcount)
 35#define __erofs_workgroup_put(grp)	atomic_dec(&(grp)->refcount)
 36
 37static int erofs_workgroup_get(struct erofs_workgroup *grp)
 38{
 39	int o;
 40
 41repeat:
 42	o = erofs_wait_on_workgroup_freezed(grp);
 43	if (unlikely(o <= 0))
 44		return -1;
 45
 46	if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o))
 47		goto repeat;
 48
 49	/* decrease refcount paired by erofs_workgroup_put */
 50	if (unlikely(o == 1))
 51		atomic_long_dec(&erofs_global_shrink_cnt);
 52	return 0;
 53}
 54
 55struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
 56					     pgoff_t index, bool *tag)
 57{
 58	struct erofs_sb_info *sbi = EROFS_SB(sb);
 59	struct erofs_workgroup *grp;
 60
 61repeat:
 62	rcu_read_lock();
 63	grp = radix_tree_lookup(&sbi->workstn_tree, index);
 64	if (grp) {
 65		*tag = xa_pointer_tag(grp);
 66		grp = xa_untag_pointer(grp);
 67
 68		if (erofs_workgroup_get(grp)) {
 69			/* prefer to relax rcu read side */
 70			rcu_read_unlock();
 71			goto repeat;
 72		}
 73
 74		DBG_BUGON(index != grp->index);
 75	}
 76	rcu_read_unlock();
 77	return grp;
 78}
 79
 80int erofs_register_workgroup(struct super_block *sb,
 81			     struct erofs_workgroup *grp,
 82			     bool tag)
 83{
 84	struct erofs_sb_info *sbi;
 85	int err;
 86
 87	/* grp shouldn't be broken or used before */
 88	if (unlikely(atomic_read(&grp->refcount) != 1)) {
 89		DBG_BUGON(1);
 90		return -EINVAL;
 91	}
 92
 93	err = radix_tree_preload(GFP_NOFS);
 94	if (err)
 95		return err;
 96
 97	sbi = EROFS_SB(sb);
 98	erofs_workstn_lock(sbi);
 99
100	grp = xa_tag_pointer(grp, tag);
101
102	/*
103	 * Bump up reference count before making this workgroup
104	 * visible to other users in order to avoid potential UAF
105	 * without serialized by erofs_workstn_lock.
106	 */
107	__erofs_workgroup_get(grp);
108
109	err = radix_tree_insert(&sbi->workstn_tree,
110				grp->index, grp);
111	if (unlikely(err))
112		/*
113		 * it's safe to decrease since the workgroup isn't visible
114		 * and refcount >= 2 (cannot be freezed).
115		 */
116		__erofs_workgroup_put(grp);
117
118	erofs_workstn_unlock(sbi);
119	radix_tree_preload_end();
120	return err;
121}
122
123static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
124{
125	atomic_long_dec(&erofs_global_shrink_cnt);
126	erofs_workgroup_free_rcu(grp);
127}
128
129int erofs_workgroup_put(struct erofs_workgroup *grp)
130{
131	int count = atomic_dec_return(&grp->refcount);
132
133	if (count == 1)
134		atomic_long_inc(&erofs_global_shrink_cnt);
135	else if (!count)
136		__erofs_workgroup_free(grp);
137	return count;
138}
139
140#ifdef EROFS_FS_HAS_MANAGED_CACHE
141/* for cache-managed case, customized reclaim paths exist */
142static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
143{
144	erofs_workgroup_unfreeze(grp, 0);
145	__erofs_workgroup_free(grp);
146}
147
148static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
149					   struct erofs_workgroup *grp,
150					   bool cleanup)
151{
152	/*
153	 * for managed cache enabled, the refcount of workgroups
154	 * themselves could be < 0 (freezed). So there is no guarantee
155	 * that all refcount > 0 if managed cache is enabled.
156	 */
157	if (!erofs_workgroup_try_to_freeze(grp, 1))
158		return false;
159
160	/*
161	 * note that all cached pages should be unlinked
162	 * before delete it from the radix tree.
163	 * Otherwise some cached pages of an orphan old workgroup
164	 * could be still linked after the new one is available.
165	 */
166	if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
167		erofs_workgroup_unfreeze(grp, 1);
168		return false;
169	}
170
171	/*
172	 * it is impossible to fail after the workgroup is freezed,
173	 * however in order to avoid some race conditions, add a
174	 * DBG_BUGON to observe this in advance.
175	 */
176	DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
177						     grp->index)) != grp);
178
179	/*
180	 * if managed cache is enable, the last refcount
181	 * should indicate the related workstation.
182	 */
183	erofs_workgroup_unfreeze_final(grp);
184	return true;
185}
186
187#else
188/* for nocache case, no customized reclaim path at all */
189static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
190					   struct erofs_workgroup *grp,
191					   bool cleanup)
192{
193	int cnt = atomic_read(&grp->refcount);
194
195	DBG_BUGON(cnt <= 0);
196	DBG_BUGON(cleanup && cnt != 1);
197
198	if (cnt > 1)
199		return false;
200
201	DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
202						     grp->index)) != grp);
203
204	/* (rarely) could be grabbed again when freeing */
205	erofs_workgroup_put(grp);
206	return true;
207}
208
209#endif
210
211unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
212				       unsigned long nr_shrink,
213				       bool cleanup)
214{
215	pgoff_t first_index = 0;
216	void *batch[PAGEVEC_SIZE];
217	unsigned int freed = 0;
218
219	int i, found;
220repeat:
221	erofs_workstn_lock(sbi);
222
223	found = radix_tree_gang_lookup(&sbi->workstn_tree,
224				       batch, first_index, PAGEVEC_SIZE);
225
226	for (i = 0; i < found; ++i) {
227		struct erofs_workgroup *grp = xa_untag_pointer(batch[i]);
228
229		first_index = grp->index + 1;
230
231		/* try to shrink each valid workgroup */
232		if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
233			continue;
234
235		++freed;
236		if (unlikely(!--nr_shrink))
237			break;
238	}
239	erofs_workstn_unlock(sbi);
240
241	if (i && nr_shrink)
242		goto repeat;
243	return freed;
244}
245
246#endif
247
248/* protected by 'erofs_sb_list_lock' */
249static unsigned int shrinker_run_no;
250
251/* protects the mounted 'erofs_sb_list' */
252static DEFINE_SPINLOCK(erofs_sb_list_lock);
253static LIST_HEAD(erofs_sb_list);
254
255void erofs_register_super(struct super_block *sb)
256{
257	struct erofs_sb_info *sbi = EROFS_SB(sb);
258
259	mutex_init(&sbi->umount_mutex);
260
261	spin_lock(&erofs_sb_list_lock);
262	list_add(&sbi->list, &erofs_sb_list);
263	spin_unlock(&erofs_sb_list_lock);
264}
265
266void erofs_unregister_super(struct super_block *sb)
267{
268	spin_lock(&erofs_sb_list_lock);
269	list_del(&EROFS_SB(sb)->list);
270	spin_unlock(&erofs_sb_list_lock);
271}
272
273static unsigned long erofs_shrink_count(struct shrinker *shrink,
274					struct shrink_control *sc)
275{
276	return atomic_long_read(&erofs_global_shrink_cnt);
277}
278
279static unsigned long erofs_shrink_scan(struct shrinker *shrink,
280				       struct shrink_control *sc)
281{
282	struct erofs_sb_info *sbi;
283	struct list_head *p;
284
285	unsigned long nr = sc->nr_to_scan;
286	unsigned int run_no;
287	unsigned long freed = 0;
288
289	spin_lock(&erofs_sb_list_lock);
290	do
291		run_no = ++shrinker_run_no;
292	while (run_no == 0);
293
294	/* Iterate over all mounted superblocks and try to shrink them */
295	p = erofs_sb_list.next;
296	while (p != &erofs_sb_list) {
297		sbi = list_entry(p, struct erofs_sb_info, list);
298
299		/*
300		 * We move the ones we do to the end of the list, so we stop
301		 * when we see one we have already done.
302		 */
303		if (sbi->shrinker_run_no == run_no)
304			break;
305
306		if (!mutex_trylock(&sbi->umount_mutex)) {
307			p = p->next;
308			continue;
309		}
310
311		spin_unlock(&erofs_sb_list_lock);
312		sbi->shrinker_run_no = run_no;
313
314#ifdef CONFIG_EROFS_FS_ZIP
315		freed += erofs_shrink_workstation(sbi, nr, false);
316#endif
317
318		spin_lock(&erofs_sb_list_lock);
319		/* Get the next list element before we move this one */
320		p = p->next;
321
322		/*
323		 * Move this one to the end of the list to provide some
324		 * fairness.
325		 */
326		list_move_tail(&sbi->list, &erofs_sb_list);
327		mutex_unlock(&sbi->umount_mutex);
328
329		if (freed >= nr)
330			break;
331	}
332	spin_unlock(&erofs_sb_list_lock);
333	return freed;
334}
335
336struct shrinker erofs_shrinker_info = {
337	.scan_objects = erofs_shrink_scan,
338	.count_objects = erofs_shrink_count,
339	.seeks = DEFAULT_SEEKS,
340};
341