drivers/staging/erofs/utils.c at v5.3

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / drivers / staging / erofs / utils.c
at v5.3 353 lines 8.3 kB view raw
wrap content
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * linux/drivers/staging/erofs/utils.c
  4 *
  5 * Copyright (C) 2018 HUAWEI, Inc.
  6 *             http://www.huawei.com/
  7 * Created by Gao Xiang <gaoxiang25@huawei.com>
  8 *
  9 * This file is subject to the terms and conditions of the GNU General Public
 10 * License.  See the file COPYING in the main directory of the Linux
 11 * distribution for more details.
 12 */
 13
 14#include "internal.h"
 15#include <linux/pagevec.h>
 16
 17struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
 18{
 19	struct page *page;
 20
 21	if (!list_empty(pool)) {
 22		page = lru_to_page(pool);
 23		list_del(&page->lru);
 24	} else {
 25		page = alloc_pages(gfp | __GFP_NOFAIL, 0);
 26	}
 27	return page;
 28}
 29
 30#if (EROFS_PCPUBUF_NR_PAGES > 0)
 31static struct {
 32	u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES];
 33} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS];
 34
 35void *erofs_get_pcpubuf(unsigned int pagenr)
 36{
 37	preempt_disable();
 38	return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE];
 39}
 40#endif
 41
 42/* global shrink count (for all mounted EROFS instances) */
 43static atomic_long_t erofs_global_shrink_cnt;
 44
 45#ifdef CONFIG_EROFS_FS_ZIP
 46#define __erofs_workgroup_get(grp)	atomic_inc(&(grp)->refcount)
 47#define __erofs_workgroup_put(grp)	atomic_dec(&(grp)->refcount)
 48
 49static int erofs_workgroup_get(struct erofs_workgroup *grp)
 50{
 51	int o;
 52
 53repeat:
 54	o = erofs_wait_on_workgroup_freezed(grp);
 55	if (unlikely(o <= 0))
 56		return -1;
 57
 58	if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o))
 59		goto repeat;
 60
 61	/* decrease refcount paired by erofs_workgroup_put */
 62	if (unlikely(o == 1))
 63		atomic_long_dec(&erofs_global_shrink_cnt);
 64	return 0;
 65}
 66
 67struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
 68					     pgoff_t index, bool *tag)
 69{
 70	struct erofs_sb_info *sbi = EROFS_SB(sb);
 71	struct erofs_workgroup *grp;
 72
 73repeat:
 74	rcu_read_lock();
 75	grp = radix_tree_lookup(&sbi->workstn_tree, index);
 76	if (grp) {
 77		*tag = xa_pointer_tag(grp);
 78		grp = xa_untag_pointer(grp);
 79
 80		if (erofs_workgroup_get(grp)) {
 81			/* prefer to relax rcu read side */
 82			rcu_read_unlock();
 83			goto repeat;
 84		}
 85
 86		DBG_BUGON(index != grp->index);
 87	}
 88	rcu_read_unlock();
 89	return grp;
 90}
 91
 92int erofs_register_workgroup(struct super_block *sb,
 93			     struct erofs_workgroup *grp,
 94			     bool tag)
 95{
 96	struct erofs_sb_info *sbi;
 97	int err;
 98
 99	/* grp shouldn't be broken or used before */
100	if (unlikely(atomic_read(&grp->refcount) != 1)) {
101		DBG_BUGON(1);
102		return -EINVAL;
103	}
104
105	err = radix_tree_preload(GFP_NOFS);
106	if (err)
107		return err;
108
109	sbi = EROFS_SB(sb);
110	erofs_workstn_lock(sbi);
111
112	grp = xa_tag_pointer(grp, tag);
113
114	/*
115	 * Bump up reference count before making this workgroup
116	 * visible to other users in order to avoid potential UAF
117	 * without serialized by erofs_workstn_lock.
118	 */
119	__erofs_workgroup_get(grp);
120
121	err = radix_tree_insert(&sbi->workstn_tree,
122				grp->index, grp);
123	if (unlikely(err))
124		/*
125		 * it's safe to decrease since the workgroup isn't visible
126		 * and refcount >= 2 (cannot be freezed).
127		 */
128		__erofs_workgroup_put(grp);
129
130	erofs_workstn_unlock(sbi);
131	radix_tree_preload_end();
132	return err;
133}
134
135static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
136{
137	atomic_long_dec(&erofs_global_shrink_cnt);
138	erofs_workgroup_free_rcu(grp);
139}
140
141int erofs_workgroup_put(struct erofs_workgroup *grp)
142{
143	int count = atomic_dec_return(&grp->refcount);
144
145	if (count == 1)
146		atomic_long_inc(&erofs_global_shrink_cnt);
147	else if (!count)
148		__erofs_workgroup_free(grp);
149	return count;
150}
151
152#ifdef EROFS_FS_HAS_MANAGED_CACHE
153/* for cache-managed case, customized reclaim paths exist */
154static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
155{
156	erofs_workgroup_unfreeze(grp, 0);
157	__erofs_workgroup_free(grp);
158}
159
160static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
161					   struct erofs_workgroup *grp,
162					   bool cleanup)
163{
164	/*
165	 * for managed cache enabled, the refcount of workgroups
166	 * themselves could be < 0 (freezed). So there is no guarantee
167	 * that all refcount > 0 if managed cache is enabled.
168	 */
169	if (!erofs_workgroup_try_to_freeze(grp, 1))
170		return false;
171
172	/*
173	 * note that all cached pages should be unlinked
174	 * before delete it from the radix tree.
175	 * Otherwise some cached pages of an orphan old workgroup
176	 * could be still linked after the new one is available.
177	 */
178	if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
179		erofs_workgroup_unfreeze(grp, 1);
180		return false;
181	}
182
183	/*
184	 * it is impossible to fail after the workgroup is freezed,
185	 * however in order to avoid some race conditions, add a
186	 * DBG_BUGON to observe this in advance.
187	 */
188	DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
189						     grp->index)) != grp);
190
191	/*
192	 * if managed cache is enable, the last refcount
193	 * should indicate the related workstation.
194	 */
195	erofs_workgroup_unfreeze_final(grp);
196	return true;
197}
198
199#else
200/* for nocache case, no customized reclaim path at all */
201static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
202					   struct erofs_workgroup *grp,
203					   bool cleanup)
204{
205	int cnt = atomic_read(&grp->refcount);
206
207	DBG_BUGON(cnt <= 0);
208	DBG_BUGON(cleanup && cnt != 1);
209
210	if (cnt > 1)
211		return false;
212
213	DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
214						     grp->index)) != grp);
215
216	/* (rarely) could be grabbed again when freeing */
217	erofs_workgroup_put(grp);
218	return true;
219}
220
221#endif
222
223unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
224				       unsigned long nr_shrink,
225				       bool cleanup)
226{
227	pgoff_t first_index = 0;
228	void *batch[PAGEVEC_SIZE];
229	unsigned int freed = 0;
230
231	int i, found;
232repeat:
233	erofs_workstn_lock(sbi);
234
235	found = radix_tree_gang_lookup(&sbi->workstn_tree,
236				       batch, first_index, PAGEVEC_SIZE);
237
238	for (i = 0; i < found; ++i) {
239		struct erofs_workgroup *grp = xa_untag_pointer(batch[i]);
240
241		first_index = grp->index + 1;
242
243		/* try to shrink each valid workgroup */
244		if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
245			continue;
246
247		++freed;
248		if (unlikely(!--nr_shrink))
249			break;
250	}
251	erofs_workstn_unlock(sbi);
252
253	if (i && nr_shrink)
254		goto repeat;
255	return freed;
256}
257
258#endif
259
260/* protected by 'erofs_sb_list_lock' */
261static unsigned int shrinker_run_no;
262
263/* protects the mounted 'erofs_sb_list' */
264static DEFINE_SPINLOCK(erofs_sb_list_lock);
265static LIST_HEAD(erofs_sb_list);
266
267void erofs_register_super(struct super_block *sb)
268{
269	struct erofs_sb_info *sbi = EROFS_SB(sb);
270
271	mutex_init(&sbi->umount_mutex);
272
273	spin_lock(&erofs_sb_list_lock);
274	list_add(&sbi->list, &erofs_sb_list);
275	spin_unlock(&erofs_sb_list_lock);
276}
277
278void erofs_unregister_super(struct super_block *sb)
279{
280	spin_lock(&erofs_sb_list_lock);
281	list_del(&EROFS_SB(sb)->list);
282	spin_unlock(&erofs_sb_list_lock);
283}
284
285static unsigned long erofs_shrink_count(struct shrinker *shrink,
286					struct shrink_control *sc)
287{
288	return atomic_long_read(&erofs_global_shrink_cnt);
289}
290
291static unsigned long erofs_shrink_scan(struct shrinker *shrink,
292				       struct shrink_control *sc)
293{
294	struct erofs_sb_info *sbi;
295	struct list_head *p;
296
297	unsigned long nr = sc->nr_to_scan;
298	unsigned int run_no;
299	unsigned long freed = 0;
300
301	spin_lock(&erofs_sb_list_lock);
302	do
303		run_no = ++shrinker_run_no;
304	while (run_no == 0);
305
306	/* Iterate over all mounted superblocks and try to shrink them */
307	p = erofs_sb_list.next;
308	while (p != &erofs_sb_list) {
309		sbi = list_entry(p, struct erofs_sb_info, list);
310
311		/*
312		 * We move the ones we do to the end of the list, so we stop
313		 * when we see one we have already done.
314		 */
315		if (sbi->shrinker_run_no == run_no)
316			break;
317
318		if (!mutex_trylock(&sbi->umount_mutex)) {
319			p = p->next;
320			continue;
321		}
322
323		spin_unlock(&erofs_sb_list_lock);
324		sbi->shrinker_run_no = run_no;
325
326#ifdef CONFIG_EROFS_FS_ZIP
327		freed += erofs_shrink_workstation(sbi, nr, false);
328#endif
329
330		spin_lock(&erofs_sb_list_lock);
331		/* Get the next list element before we move this one */
332		p = p->next;
333
334		/*
335		 * Move this one to the end of the list to provide some
336		 * fairness.
337		 */
338		list_move_tail(&sbi->list, &erofs_sb_list);
339		mutex_unlock(&sbi->umount_mutex);
340
341		if (freed >= nr)
342			break;
343	}
344	spin_unlock(&erofs_sb_list_lock);
345	return freed;
346}
347
348struct shrinker erofs_shrinker_info = {
349	.scan_objects = erofs_shrink_scan,
350	.count_objects = erofs_shrink_count,
351	.seeks = DEFAULT_SEEKS,
352};
353