Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include "xfs_platform.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h"
15#include "xfs_defer.h"
16#include "xfs_trans.h"
17#include "xfs_metafile.h"
18#include "xfs_trace.h"
19#include "xfs_inode.h"
20#include "xfs_quota.h"
21#include "xfs_errortag.h"
22#include "xfs_error.h"
23#include "xfs_alloc.h"
24#include "xfs_rtgroup.h"
25#include "xfs_rtrmap_btree.h"
26#include "xfs_rtrefcount_btree.h"
27
28static const struct {
29 enum xfs_metafile_type mtype;
30 const char *name;
31} xfs_metafile_type_strs[] = { XFS_METAFILE_TYPE_STR };
32
33const char *
34xfs_metafile_type_str(enum xfs_metafile_type metatype)
35{
36 unsigned int i;
37
38 for (i = 0; i < ARRAY_SIZE(xfs_metafile_type_strs); i++) {
39 if (xfs_metafile_type_strs[i].mtype == metatype)
40 return xfs_metafile_type_strs[i].name;
41 }
42
43 return NULL;
44}
45
46/* Set up an inode to be recognized as a metadata directory inode. */
47void
48xfs_metafile_set_iflag(
49 struct xfs_trans *tp,
50 struct xfs_inode *ip,
51 enum xfs_metafile_type metafile_type)
52{
53 VFS_I(ip)->i_mode &= ~0777;
54 VFS_I(ip)->i_uid = GLOBAL_ROOT_UID;
55 VFS_I(ip)->i_gid = GLOBAL_ROOT_GID;
56 if (S_ISDIR(VFS_I(ip)->i_mode))
57 ip->i_diflags |= XFS_METADIR_DIFLAGS;
58 else
59 ip->i_diflags |= XFS_METAFILE_DIFLAGS;
60 ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
61 ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
62 ip->i_metatype = metafile_type;
63 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
64
65 XFS_STATS_DEC(ip->i_mount, xs_inodes_active);
66 XFS_STATS_INC(ip->i_mount, xs_inodes_meta);
67}
68
69/* Clear the metadata directory inode flag. */
70void
71xfs_metafile_clear_iflag(
72 struct xfs_trans *tp,
73 struct xfs_inode *ip)
74{
75 ASSERT(xfs_is_metadir_inode(ip));
76 ASSERT(VFS_I(ip)->i_nlink == 0);
77
78 ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
79 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
80 XFS_STATS_INC(ip->i_mount, xs_inodes_active);
81 XFS_STATS_DEC(ip->i_mount, xs_inodes_meta);
82}
83
84/*
85 * Is the metafile reservations at or beneath a certain threshold?
86 */
87static inline bool
88xfs_metafile_resv_can_cover(
89 struct xfs_mount *mp,
90 int64_t rhs)
91{
92 /*
93 * The amount of space that can be allocated to this metadata file is
94 * the remaining reservation for the particular metadata file + the
95 * global free block count. Take care of the first case to avoid
96 * touching the per-cpu counter.
97 */
98 if (mp->m_metafile_resv_avail >= rhs)
99 return true;
100
101 /*
102 * There aren't enough blocks left in the inode's reservation, but it
103 * isn't critical unless there also isn't enough free space.
104 */
105 return xfs_compare_freecounter(mp, XC_FREE_BLOCKS,
106 rhs - mp->m_metafile_resv_avail, 2048) >= 0;
107}
108
109/*
110 * Is the metafile reservation critically low on blocks? For now we'll define
111 * that as the number of blocks we can get our hands on being less than 10% of
112 * what we reserved or less than some arbitrary number (maximum btree height).
113 */
114bool
115xfs_metafile_resv_critical(
116 struct xfs_mount *mp)
117{
118 ASSERT(xfs_has_metadir(mp));
119
120 trace_xfs_metafile_resv_critical(mp, 0);
121
122 if (!xfs_metafile_resv_can_cover(mp, mp->m_rtbtree_maxlevels))
123 return true;
124
125 if (!xfs_metafile_resv_can_cover(mp,
126 div_u64(mp->m_metafile_resv_target, 10)))
127 return true;
128
129 return XFS_TEST_ERROR(mp, XFS_ERRTAG_METAFILE_RESV_CRITICAL);
130}
131
132/* Allocate a block from the metadata file's reservation. */
133void
134xfs_metafile_resv_alloc_space(
135 struct xfs_inode *ip,
136 struct xfs_alloc_arg *args)
137{
138 struct xfs_mount *mp = ip->i_mount;
139 int64_t len = args->len;
140
141 ASSERT(xfs_is_metadir_inode(ip));
142 ASSERT(args->resv == XFS_AG_RESV_METAFILE);
143
144 trace_xfs_metafile_resv_alloc_space(mp, args->len);
145
146 /*
147 * Allocate the blocks from the metadata inode's block reservation
148 * and update the ondisk sb counter.
149 */
150 mutex_lock(&mp->m_metafile_resv_lock);
151 if (mp->m_metafile_resv_avail > 0) {
152 int64_t from_resv;
153
154 from_resv = min_t(int64_t, len, mp->m_metafile_resv_avail);
155 mp->m_metafile_resv_avail -= from_resv;
156 xfs_mod_delalloc(ip, 0, -from_resv);
157 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS,
158 -from_resv);
159 len -= from_resv;
160 }
161
162 /*
163 * Any allocation in excess of the reservation requires in-core and
164 * on-disk fdblocks updates. If we can grab @len blocks from the
165 * in-core fdblocks then all we need to do is update the on-disk
166 * superblock; if not, then try to steal some from the transaction's
167 * block reservation. Overruns are only expected for rmap btrees.
168 */
169 if (len) {
170 unsigned int field;
171 int error;
172
173 error = xfs_dec_fdblocks(ip->i_mount, len, true);
174 if (error)
175 field = XFS_TRANS_SB_FDBLOCKS;
176 else
177 field = XFS_TRANS_SB_RES_FDBLOCKS;
178
179 xfs_trans_mod_sb(args->tp, field, -len);
180 }
181
182 mp->m_metafile_resv_used += args->len;
183 mutex_unlock(&mp->m_metafile_resv_lock);
184
185 ip->i_nblocks += args->len;
186 xfs_trans_log_inode(args->tp, ip, XFS_ILOG_CORE);
187}
188
189/* Free a block to the metadata file's reservation. */
190void
191xfs_metafile_resv_free_space(
192 struct xfs_inode *ip,
193 struct xfs_trans *tp,
194 xfs_filblks_t len)
195{
196 struct xfs_mount *mp = ip->i_mount;
197 int64_t to_resv;
198
199 ASSERT(xfs_is_metadir_inode(ip));
200
201 trace_xfs_metafile_resv_free_space(mp, len);
202
203 ip->i_nblocks -= len;
204 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
205
206 mutex_lock(&mp->m_metafile_resv_lock);
207 mp->m_metafile_resv_used -= len;
208
209 /*
210 * Add the freed blocks back into the inode's delalloc reservation
211 * until it reaches the maximum size. Update the ondisk fdblocks only.
212 */
213 to_resv = mp->m_metafile_resv_target -
214 (mp->m_metafile_resv_used + mp->m_metafile_resv_avail);
215 if (to_resv > 0) {
216 to_resv = min_t(int64_t, to_resv, len);
217 mp->m_metafile_resv_avail += to_resv;
218 xfs_mod_delalloc(ip, 0, to_resv);
219 xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, to_resv);
220 len -= to_resv;
221 }
222 mutex_unlock(&mp->m_metafile_resv_lock);
223
224 /*
225 * Everything else goes back to the filesystem, so update the in-core
226 * and on-disk counters.
227 */
228 if (len)
229 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len);
230}
231
232static void
233__xfs_metafile_resv_free(
234 struct xfs_mount *mp)
235{
236 if (mp->m_metafile_resv_avail) {
237 xfs_mod_sb_delalloc(mp, -(int64_t)mp->m_metafile_resv_avail);
238 xfs_add_fdblocks(mp, mp->m_metafile_resv_avail);
239 }
240 mp->m_metafile_resv_avail = 0;
241 mp->m_metafile_resv_used = 0;
242 mp->m_metafile_resv_target = 0;
243}
244
245/* Release unused metafile space reservation. */
246void
247xfs_metafile_resv_free(
248 struct xfs_mount *mp)
249{
250 if (!xfs_has_metadir(mp))
251 return;
252
253 trace_xfs_metafile_resv_free(mp, 0);
254
255 mutex_lock(&mp->m_metafile_resv_lock);
256 __xfs_metafile_resv_free(mp);
257 mutex_unlock(&mp->m_metafile_resv_lock);
258}
259
260/* Set up a metafile space reservation. */
261int
262xfs_metafile_resv_init(
263 struct xfs_mount *mp)
264{
265 struct xfs_rtgroup *rtg = NULL;
266 xfs_filblks_t used = 0, target = 0;
267 xfs_filblks_t hidden_space;
268 xfs_rfsblock_t dblocks_avail = mp->m_sb.sb_dblocks / 4;
269 int error = 0;
270
271 if (!xfs_has_metadir(mp))
272 return 0;
273
274 /*
275 * Free any previous reservation to have a clean slate.
276 */
277 mutex_lock(&mp->m_metafile_resv_lock);
278 __xfs_metafile_resv_free(mp);
279
280 /*
281 * Currently the only btree metafiles that require reservations are the
282 * rtrmap and the rtrefcount. Anything new will have to be added here
283 * as well.
284 */
285 while ((rtg = xfs_rtgroup_next(mp, rtg))) {
286 if (xfs_has_rtrmapbt(mp)) {
287 used += rtg_rmap(rtg)->i_nblocks;
288 target += xfs_rtrmapbt_calc_reserves(mp);
289 }
290 if (xfs_has_rtreflink(mp)) {
291 used += rtg_refcount(rtg)->i_nblocks;
292 target += xfs_rtrefcountbt_calc_reserves(mp);
293 }
294 }
295
296 if (!target)
297 goto out_unlock;
298
299 /*
300 * Space taken by the per-AG metadata btrees are accounted on-disk as
301 * used space. We therefore only hide the space that is reserved but
302 * not used by the trees.
303 */
304 if (used > target)
305 target = used;
306 else if (target > dblocks_avail)
307 target = dblocks_avail;
308 hidden_space = target - used;
309
310 error = xfs_dec_fdblocks(mp, hidden_space, true);
311 if (error) {
312 trace_xfs_metafile_resv_init_error(mp, 0);
313 goto out_unlock;
314 }
315
316 xfs_mod_sb_delalloc(mp, hidden_space);
317
318 mp->m_metafile_resv_target = target;
319 mp->m_metafile_resv_used = used;
320 mp->m_metafile_resv_avail = hidden_space;
321
322 trace_xfs_metafile_resv_init(mp, target);
323
324out_unlock:
325 mutex_unlock(&mp->m_metafile_resv_lock);
326 return error;
327}