Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#ifndef __XFS_SCRUB_SCRUB_H__
7#define __XFS_SCRUB_SCRUB_H__
8
9struct xfs_scrub;
10
11struct xchk_relax {
12 unsigned long next_resched;
13 unsigned int resched_nr;
14 bool interruptible;
15};
16
17/* Yield to the scheduler at most 10x per second. */
18#define XCHK_RELAX_NEXT (jiffies + (HZ / 10))
19
20#define INIT_XCHK_RELAX \
21 (struct xchk_relax){ \
22 .next_resched = XCHK_RELAX_NEXT, \
23 .resched_nr = 0, \
24 .interruptible = true, \
25 }
26
27/*
28 * Relax during a scrub operation and exit if there's a fatal signal pending.
29 *
30 * If preemption is disabled, we need to yield to the scheduler every now and
31 * then so that we don't run afoul of the soft lockup watchdog or RCU stall
32 * detector. cond_resched calls are somewhat expensive (~5ns) so we want to
33 * ratelimit this to 10x per second. Amortize the cost of the other checks by
34 * only doing it once every 100 calls.
35 */
36static inline int xchk_maybe_relax(struct xchk_relax *widget)
37{
38 /* Amortize the cost of scheduling and checking signals. */
39 if (likely(++widget->resched_nr < 100))
40 return 0;
41 widget->resched_nr = 0;
42
43 if (unlikely(widget->next_resched <= jiffies)) {
44 cond_resched();
45 widget->next_resched = XCHK_RELAX_NEXT;
46 }
47
48 if (widget->interruptible && fatal_signal_pending(current))
49 return -EINTR;
50
51 return 0;
52}
53
54/*
55 * Standard flags for allocating memory within scrub. NOFS context is
56 * configured by the process allocation scope. Scrub and repair must be able
57 * to back out gracefully if there isn't enough memory. Force-cast to avoid
58 * complaints from static checkers.
59 */
60#define XCHK_GFP_FLAGS ((__force gfp_t)(GFP_KERNEL | __GFP_NOWARN | \
61 __GFP_RETRY_MAYFAIL))
62
63/*
64 * For opening files by handle for fsck operations, we don't trust the inumber
65 * or the allocation state; therefore, perform an untrusted lookup. We don't
66 * want these inodes to pollute the cache, so mark them for immediate removal.
67 */
68#define XCHK_IGET_FLAGS (XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE)
69
70/* Type info and names for the scrub types. */
71enum xchk_type {
72 ST_NONE = 1, /* disabled */
73 ST_PERAG, /* per-AG metadata */
74 ST_FS, /* per-FS metadata */
75 ST_INODE, /* per-inode metadata */
76 ST_GENERIC, /* determined by the scrubber */
77 ST_RTGROUP, /* rtgroup metadata */
78};
79
80struct xchk_meta_ops {
81 /* Acquire whatever resources are needed for the operation. */
82 int (*setup)(struct xfs_scrub *sc);
83
84 /* Examine metadata for errors. */
85 int (*scrub)(struct xfs_scrub *);
86
87 /* Repair or optimize the metadata. */
88 int (*repair)(struct xfs_scrub *);
89
90 /*
91 * Re-scrub the metadata we repaired, in case there's extra work that
92 * we need to do to check our repair work. If this is NULL, we'll use
93 * the ->scrub function pointer, assuming that the regular scrub is
94 * sufficient.
95 */
96 int (*repair_eval)(struct xfs_scrub *sc);
97
98 /* Decide if we even have this piece of metadata. */
99 bool (*has)(const struct xfs_mount *);
100
101 /* type describing required/allowed inputs */
102 enum xchk_type type;
103};
104
105/* Buffer pointers and btree cursors for an entire AG. */
106struct xchk_ag {
107 struct xfs_perag *pag;
108
109 /* AG btree roots */
110 struct xfs_buf *agf_bp;
111 struct xfs_buf *agi_bp;
112
113 /* AG btrees */
114 struct xfs_btree_cur *bno_cur;
115 struct xfs_btree_cur *cnt_cur;
116 struct xfs_btree_cur *ino_cur;
117 struct xfs_btree_cur *fino_cur;
118 struct xfs_btree_cur *rmap_cur;
119 struct xfs_btree_cur *refc_cur;
120};
121
122/* Inode lock state for the RT volume. */
123struct xchk_rt {
124 /* incore rtgroup, if applicable */
125 struct xfs_rtgroup *rtg;
126
127 /* XFS_RTGLOCK_* lock state if locked */
128 unsigned int rtlock_flags;
129
130 /* rtgroup btrees */
131 struct xfs_btree_cur *rmap_cur;
132 struct xfs_btree_cur *refc_cur;
133};
134
135struct xfs_scrub {
136 /* General scrub state. */
137 struct xfs_mount *mp;
138 struct xfs_scrub_metadata *sm;
139 const struct xchk_meta_ops *ops;
140 struct xfs_trans *tp;
141
142 /* File that scrub was called with. */
143 struct file *file;
144
145 /*
146 * File that is undergoing the scrub operation. This can differ from
147 * the file that scrub was called with if we're checking file-based fs
148 * metadata (e.g. rt bitmaps) or if we're doing a scrub-by-handle for
149 * something that can't be opened directly (e.g. symlinks).
150 */
151 struct xfs_inode *ip;
152
153 /* Kernel memory buffer used by scrubbers; freed at teardown. */
154 void *buf;
155
156 /*
157 * Clean up resources owned by whatever is in the buffer. Cleanup can
158 * be deferred with this hook as a means for scrub functions to pass
159 * data to repair functions. This function must not free the buffer
160 * itself.
161 */
162 void (*buf_cleanup)(void *buf);
163
164 /* xfile used by the scrubbers; freed at teardown. */
165 struct xfile *xfile;
166
167 /* buffer target for in-memory btrees; also freed at teardown. */
168 struct xfs_buftarg *xmbtp;
169
170 /* Lock flags for @ip. */
171 uint ilock_flags;
172
173 /* The orphanage, for stashing files that have lost their parent. */
174 uint orphanage_ilock_flags;
175 struct xfs_inode *orphanage;
176
177 /* A temporary file on this filesystem, for staging new metadata. */
178 struct xfs_inode *tempip;
179 uint temp_ilock_flags;
180
181 /* See the XCHK/XREP state flags below. */
182 unsigned int flags;
183
184 /*
185 * The XFS_SICK_* flags that correspond to the metadata being scrubbed
186 * or repaired. We will use this mask to update the in-core fs health
187 * status with whatever we find.
188 */
189 unsigned int sick_mask;
190
191 /*
192 * Clear these XFS_SICK_* flags but only if the scan is ok. Useful for
193 * removing ZAPPED flags after a repair.
194 */
195 unsigned int healthy_mask;
196
197 /* next time we want to cond_resched() */
198 struct xchk_relax relax;
199
200 /* State tracking for single-AG operations. */
201 struct xchk_ag sa;
202
203 /* State tracking for realtime operations. */
204 struct xchk_rt sr;
205};
206
207/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
208#define XCHK_TRY_HARDER (1U << 0) /* can't get resources, try again */
209#define XCHK_HAVE_FREEZE_PROT (1U << 1) /* do we have freeze protection? */
210#define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */
211#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */
212#define XCHK_FSGATES_QUOTA (1U << 4) /* quota live update enabled */
213#define XCHK_FSGATES_DIRENTS (1U << 5) /* directory live update enabled */
214#define XCHK_FSGATES_RMAP (1U << 6) /* rmapbt live update enabled */
215#define XREP_RESET_PERAG_RESV (1U << 30) /* must reset AG space reservation */
216#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */
217
218/*
219 * The XCHK_FSGATES* flags reflect functionality in the main filesystem that
220 * are only enabled for this particular online fsck. When not in use, the
221 * features are gated off via dynamic code patching, which is why the state
222 * must be enabled during scrub setup and can only be torn down afterwards.
223 */
224#define XCHK_FSGATES_ALL (XCHK_FSGATES_DRAIN | \
225 XCHK_FSGATES_QUOTA | \
226 XCHK_FSGATES_DIRENTS | \
227 XCHK_FSGATES_RMAP)
228
229struct xfs_scrub_subord {
230 struct xfs_scrub sc;
231 struct xfs_scrub *parent_sc;
232 unsigned int old_smtype;
233 unsigned int old_smflags;
234};
235
236struct xfs_scrub_subord *xchk_scrub_create_subord(struct xfs_scrub *sc,
237 unsigned int subtype);
238void xchk_scrub_free_subord(struct xfs_scrub_subord *sub);
239
240/*
241 * We /could/ terminate a scrub/repair operation early. If we're not
242 * in a good place to continue (fatal signal, etc.) then bail out.
243 * Note that we're careful not to make any judgements about *error.
244 */
245static inline bool
246xchk_should_terminate(
247 struct xfs_scrub *sc,
248 int *error)
249{
250 if (xchk_maybe_relax(&sc->relax)) {
251 if (*error == 0)
252 *error = -EINTR;
253 return true;
254 }
255 return false;
256}
257
258static inline int xchk_nothing(struct xfs_scrub *sc)
259{
260 return -ENOENT;
261}
262
263/* Metadata scrubbers */
264int xchk_tester(struct xfs_scrub *sc);
265int xchk_superblock(struct xfs_scrub *sc);
266int xchk_agf(struct xfs_scrub *sc);
267int xchk_agfl(struct xfs_scrub *sc);
268int xchk_agi(struct xfs_scrub *sc);
269int xchk_allocbt(struct xfs_scrub *sc);
270int xchk_iallocbt(struct xfs_scrub *sc);
271int xchk_rmapbt(struct xfs_scrub *sc);
272int xchk_refcountbt(struct xfs_scrub *sc);
273int xchk_inode(struct xfs_scrub *sc);
274int xchk_bmap_data(struct xfs_scrub *sc);
275int xchk_bmap_attr(struct xfs_scrub *sc);
276int xchk_bmap_cow(struct xfs_scrub *sc);
277int xchk_directory(struct xfs_scrub *sc);
278int xchk_xattr(struct xfs_scrub *sc);
279int xchk_symlink(struct xfs_scrub *sc);
280int xchk_parent(struct xfs_scrub *sc);
281int xchk_dirtree(struct xfs_scrub *sc);
282int xchk_metapath(struct xfs_scrub *sc);
283#ifdef CONFIG_XFS_RT
284int xchk_rtbitmap(struct xfs_scrub *sc);
285int xchk_rtsummary(struct xfs_scrub *sc);
286int xchk_rgsuperblock(struct xfs_scrub *sc);
287int xchk_rtrmapbt(struct xfs_scrub *sc);
288int xchk_rtrefcountbt(struct xfs_scrub *sc);
289#else
290# define xchk_rtbitmap xchk_nothing
291# define xchk_rtsummary xchk_nothing
292# define xchk_rgsuperblock xchk_nothing
293# define xchk_rtrmapbt xchk_nothing
294# define xchk_rtrefcountbt xchk_nothing
295#endif
296#ifdef CONFIG_XFS_QUOTA
297int xchk_quota(struct xfs_scrub *sc);
298int xchk_quotacheck(struct xfs_scrub *sc);
299#else
300# define xchk_quota xchk_nothing
301# define xchk_quotacheck xchk_nothing
302#endif
303int xchk_fscounters(struct xfs_scrub *sc);
304int xchk_nlinks(struct xfs_scrub *sc);
305
306/* cross-referencing helpers */
307void xchk_xref_is_used_space(struct xfs_scrub *sc, xfs_agblock_t agbno,
308 xfs_extlen_t len);
309void xchk_xref_is_not_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
310 xfs_extlen_t len);
311void xchk_xref_is_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
312 xfs_extlen_t len);
313void xchk_xref_is_only_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
314 xfs_extlen_t len, const struct xfs_owner_info *oinfo);
315void xchk_xref_is_not_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
316 xfs_extlen_t len, const struct xfs_owner_info *oinfo);
317void xchk_xref_has_no_owner(struct xfs_scrub *sc, xfs_agblock_t agbno,
318 xfs_extlen_t len);
319void xchk_xref_is_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
320 xfs_extlen_t len);
321void xchk_xref_is_not_shared(struct xfs_scrub *sc, xfs_agblock_t bno,
322 xfs_extlen_t len);
323void xchk_xref_is_not_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
324 xfs_extlen_t len);
325#ifdef CONFIG_XFS_RT
326void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno,
327 xfs_extlen_t len);
328void xchk_xref_has_no_rt_owner(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
329 xfs_extlen_t len);
330void xchk_xref_has_rt_owner(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
331 xfs_extlen_t len);
332void xchk_xref_is_only_rt_owned_by(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
333 xfs_extlen_t len, const struct xfs_owner_info *oinfo);
334void xchk_xref_is_rt_cow_staging(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
335 xfs_extlen_t len);
336void xchk_xref_is_not_rt_shared(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
337 xfs_extlen_t len);
338void xchk_xref_is_not_rt_cow_staging(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
339 xfs_extlen_t len);
340#else
341# define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
342# define xchk_xref_has_no_rt_owner(sc, rtbno, len) do { } while (0)
343# define xchk_xref_has_rt_owner(sc, rtbno, len) do { } while (0)
344# define xchk_xref_is_only_rt_owned_by(sc, bno, len, oinfo) do { } while (0)
345# define xchk_xref_is_rt_cow_staging(sc, bno, len) do { } while (0)
346# define xchk_xref_is_not_rt_shared(sc, bno, len) do { } while (0)
347# define xchk_xref_is_not_rt_cow_staging(sc, bno, len) do { } while (0)
348#endif
349
350#endif /* __XFS_SCRUB_SCRUB_H__ */