fs/xfs/xfs_zone_space_resv.c at v6.19-rc8

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / fs / xfs / xfs_zone_space_resv.c
at v6.19-rc8 262 lines 7.2 kB view raw
wrap content
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright (c) 2023-2025 Christoph Hellwig.
  4 * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
  5 */
  6#include "xfs.h"
  7#include "xfs_shared.h"
  8#include "xfs_format.h"
  9#include "xfs_trans_resv.h"
 10#include "xfs_mount.h"
 11#include "xfs_inode.h"
 12#include "xfs_rtbitmap.h"
 13#include "xfs_icache.h"
 14#include "xfs_zone_alloc.h"
 15#include "xfs_zone_priv.h"
 16#include "xfs_zones.h"
 17
 18/*
 19 * Note: the zoned allocator does not support a rtextsize > 1, so this code and
 20 * the allocator itself uses file system blocks interchangeable with realtime
 21 * extents without doing the otherwise required conversions.
 22 */
 23
 24/*
 25 * Per-task space reservation.
 26 *
 27 * Tasks that need to wait for GC to free up space allocate one of these
 28 * on-stack and adds it to the per-mount zi_reclaim_reservations lists.
 29 * The GC thread will then wake the tasks in order when space becomes available.
 30 */
 31struct xfs_zone_reservation {
 32	struct list_head	entry;
 33	struct task_struct	*task;
 34	xfs_filblks_t		count_fsb;
 35};
 36
 37/*
 38 * Calculate the number of reserved blocks.
 39 *
 40 * XC_FREE_RTEXTENTS counts the user available capacity, to which the file
 41 * system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly
 42 * available for writes without waiting for GC.
 43 *
 44 * For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and
 45 * block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS
 46 * is further restricted by at least one zone as well as the optional
 47 * persistently reserved blocks.  This allows the allocator to run more
 48 * smoothly by not always triggering GC.
 49 */
 50uint64_t
 51xfs_zoned_default_resblks(
 52	struct xfs_mount	*mp,
 53	enum xfs_free_counter	ctr)
 54{
 55	switch (ctr) {
 56	case XC_FREE_RTEXTENTS:
 57		return xfs_rtgs_to_rfsbs(mp, XFS_RESERVED_ZONES) +
 58				mp->m_sb.sb_rtreserved;
 59	case XC_FREE_RTAVAILABLE:
 60		return xfs_rtgs_to_rfsbs(mp, XFS_GC_ZONES);
 61	default:
 62		ASSERT(0);
 63		return 0;
 64	}
 65}
 66
 67void
 68xfs_zoned_resv_wake_all(
 69	struct xfs_mount		*mp)
 70{
 71	struct xfs_zone_info		*zi = mp->m_zone_info;
 72	struct xfs_zone_reservation	*reservation;
 73
 74	spin_lock(&zi->zi_reservation_lock);
 75	list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry)
 76		wake_up_process(reservation->task);
 77	spin_unlock(&zi->zi_reservation_lock);
 78}
 79
 80void
 81xfs_zoned_add_available(
 82	struct xfs_mount		*mp,
 83	xfs_filblks_t			count_fsb)
 84{
 85	struct xfs_zone_info		*zi = mp->m_zone_info;
 86	struct xfs_zone_reservation	*reservation;
 87
 88	if (list_empty_careful(&zi->zi_reclaim_reservations)) {
 89		xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
 90		return;
 91	}
 92
 93	spin_lock(&zi->zi_reservation_lock);
 94	xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
 95	count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE);
 96	list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) {
 97		if (reservation->count_fsb > count_fsb)
 98			break;
 99		wake_up_process(reservation->task);
100		count_fsb -= reservation->count_fsb;
101
102	}
103	spin_unlock(&zi->zi_reservation_lock);
104}
105
106static int
107xfs_zoned_space_wait_error(
108	struct xfs_mount		*mp)
109{
110	if (xfs_is_shutdown(mp))
111		return -EIO;
112	if (fatal_signal_pending(current))
113		return -EINTR;
114	return 0;
115}
116
117static int
118xfs_zoned_reserve_available(
119	struct xfs_mount		*mp,
120	xfs_filblks_t			count_fsb,
121	unsigned int			flags)
122{
123	struct xfs_zone_info		*zi = mp->m_zone_info;
124	struct xfs_zone_reservation	reservation = {
125		.task		= current,
126		.count_fsb	= count_fsb,
127	};
128	int				error;
129
130	/*
131	 * If there are no waiters, try to directly grab the available blocks
132	 * from the percpu counter.
133	 *
134	 * If the caller wants to dip into the reserved pool also bypass the
135	 * wait list.  This relies on the fact that we have a very graciously
136	 * sized reserved pool that always has enough space.  If the reserved
137	 * allocations fail we're in trouble.
138	 */
139	if (likely(list_empty_careful(&zi->zi_reclaim_reservations) ||
140	    (flags & XFS_ZR_RESERVED))) {
141		error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
142				flags & XFS_ZR_RESERVED);
143		if (error != -ENOSPC)
144			return error;
145	}
146
147	if (flags & XFS_ZR_NOWAIT)
148		return -EAGAIN;
149
150	spin_lock(&zi->zi_reservation_lock);
151	list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations);
152	while ((error = xfs_zoned_space_wait_error(mp)) == 0) {
153		set_current_state(TASK_KILLABLE);
154
155		error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
156				flags & XFS_ZR_RESERVED);
157		if (error != -ENOSPC)
158			break;
159
160		/*
161		 * Make sure to start GC if it is not running already. As we
162		 * check the rtavailable count when filling up zones, GC is
163		 * normally already running at this point, but in some setups
164		 * with very few zones we may completely run out of non-
165		 * reserved blocks in between filling zones.
166		 */
167		if (!xfs_is_zonegc_running(mp))
168			wake_up_process(zi->zi_gc_thread);
169
170		/*
171		 * If there is no reclaimable group left and we aren't still
172		 * processing a pending GC request give up as we're fully out
173		 * of space.
174		 */
175		if (!xfs_zoned_have_reclaimable(mp->m_zone_info) &&
176		    !xfs_is_zonegc_running(mp))
177			break;
178
179		spin_unlock(&zi->zi_reservation_lock);
180		schedule();
181		spin_lock(&zi->zi_reservation_lock);
182	}
183	list_del(&reservation.entry);
184	spin_unlock(&zi->zi_reservation_lock);
185
186	__set_current_state(TASK_RUNNING);
187	return error;
188}
189
190/*
191 * Implement greedy space allocation for short writes by trying to grab all
192 * that is left after locking out other threads from trying to do the same.
193 *
194 * This isn't exactly optimal and can hopefully be replaced by a proper
195 * percpu_counter primitive one day.
196 */
197static int
198xfs_zoned_reserve_extents_greedy(
199	struct xfs_mount		*mp,
200	xfs_filblks_t			*count_fsb,
201	unsigned int			flags)
202{
203	struct xfs_zone_info		*zi = mp->m_zone_info;
204	s64				len = *count_fsb;
205	int				error = -ENOSPC;
206
207	spin_lock(&zi->zi_reservation_lock);
208	len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
209	if (len > 0) {
210		*count_fsb = len;
211		error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb,
212				flags & XFS_ZR_RESERVED);
213	}
214	spin_unlock(&zi->zi_reservation_lock);
215	return error;
216}
217
218int
219xfs_zoned_space_reserve(
220	struct xfs_mount		*mp,
221	xfs_filblks_t			count_fsb,
222	unsigned int			flags,
223	struct xfs_zone_alloc_ctx	*ac)
224{
225	int				error;
226
227	ASSERT(ac->reserved_blocks == 0);
228	ASSERT(ac->open_zone == NULL);
229
230	error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
231			flags & XFS_ZR_RESERVED);
232	if (error == -ENOSPC && !(flags & XFS_ZR_NOWAIT)) {
233		xfs_inodegc_flush(mp);
234		error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
235				flags & XFS_ZR_RESERVED);
236	}
237	if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1)
238		error = xfs_zoned_reserve_extents_greedy(mp, &count_fsb, flags);
239	if (error)
240		return error;
241
242	error = xfs_zoned_reserve_available(mp, count_fsb, flags);
243	if (error) {
244		xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb);
245		return error;
246	}
247	ac->reserved_blocks = count_fsb;
248	return 0;
249}
250
251void
252xfs_zoned_space_unreserve(
253	struct xfs_mount		*mp,
254	struct xfs_zone_alloc_ctx	*ac)
255{
256	if (ac->reserved_blocks > 0) {
257		xfs_zoned_add_available(mp, ac->reserved_blocks);
258		xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks);
259	}
260	if (ac->open_zone)
261		xfs_open_zone_put(ac->open_zone);
262}
Configure Feed

Configure Feed