fs/xfs/xfs_refcount_item.c at v5.7

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / fs / xfs / xfs_refcount_item.c
at v5.7 592 lines 16 kB view raw
wrap content
  1// SPDX-License-Identifier: GPL-2.0+
  2/*
  3 * Copyright (C) 2016 Oracle.  All Rights Reserved.
  4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
  5 */
  6#include "xfs.h"
  7#include "xfs_fs.h"
  8#include "xfs_format.h"
  9#include "xfs_log_format.h"
 10#include "xfs_trans_resv.h"
 11#include "xfs_bit.h"
 12#include "xfs_shared.h"
 13#include "xfs_mount.h"
 14#include "xfs_defer.h"
 15#include "xfs_trans.h"
 16#include "xfs_trans_priv.h"
 17#include "xfs_refcount_item.h"
 18#include "xfs_log.h"
 19#include "xfs_refcount.h"
 20#include "xfs_error.h"
 21
 22kmem_zone_t	*xfs_cui_zone;
 23kmem_zone_t	*xfs_cud_zone;
 24
 25static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip)
 26{
 27	return container_of(lip, struct xfs_cui_log_item, cui_item);
 28}
 29
 30void
 31xfs_cui_item_free(
 32	struct xfs_cui_log_item	*cuip)
 33{
 34	if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
 35		kmem_free(cuip);
 36	else
 37		kmem_cache_free(xfs_cui_zone, cuip);
 38}
 39
 40/*
 41 * Freeing the CUI requires that we remove it from the AIL if it has already
 42 * been placed there. However, the CUI may not yet have been placed in the AIL
 43 * when called by xfs_cui_release() from CUD processing due to the ordering of
 44 * committed vs unpin operations in bulk insert operations. Hence the reference
 45 * count to ensure only the last caller frees the CUI.
 46 */
 47void
 48xfs_cui_release(
 49	struct xfs_cui_log_item	*cuip)
 50{
 51	ASSERT(atomic_read(&cuip->cui_refcount) > 0);
 52	if (atomic_dec_and_test(&cuip->cui_refcount)) {
 53		xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
 54		xfs_cui_item_free(cuip);
 55	}
 56}
 57
 58
 59STATIC void
 60xfs_cui_item_size(
 61	struct xfs_log_item	*lip,
 62	int			*nvecs,
 63	int			*nbytes)
 64{
 65	struct xfs_cui_log_item	*cuip = CUI_ITEM(lip);
 66
 67	*nvecs += 1;
 68	*nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents);
 69}
 70
 71/*
 72 * This is called to fill in the vector of log iovecs for the
 73 * given cui log item. We use only 1 iovec, and we point that
 74 * at the cui_log_format structure embedded in the cui item.
 75 * It is at this point that we assert that all of the extent
 76 * slots in the cui item have been filled.
 77 */
 78STATIC void
 79xfs_cui_item_format(
 80	struct xfs_log_item	*lip,
 81	struct xfs_log_vec	*lv)
 82{
 83	struct xfs_cui_log_item	*cuip = CUI_ITEM(lip);
 84	struct xfs_log_iovec	*vecp = NULL;
 85
 86	ASSERT(atomic_read(&cuip->cui_next_extent) ==
 87			cuip->cui_format.cui_nextents);
 88
 89	cuip->cui_format.cui_type = XFS_LI_CUI;
 90	cuip->cui_format.cui_size = 1;
 91
 92	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format,
 93			xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents));
 94}
 95
 96/*
 97 * The unpin operation is the last place an CUI is manipulated in the log. It is
 98 * either inserted in the AIL or aborted in the event of a log I/O error. In
 99 * either case, the CUI transaction has been successfully committed to make it
100 * this far. Therefore, we expect whoever committed the CUI to either construct
101 * and commit the CUD or drop the CUD's reference in the event of error. Simply
102 * drop the log's CUI reference now that the log is done with it.
103 */
104STATIC void
105xfs_cui_item_unpin(
106	struct xfs_log_item	*lip,
107	int			remove)
108{
109	struct xfs_cui_log_item	*cuip = CUI_ITEM(lip);
110
111	xfs_cui_release(cuip);
112}
113
114/*
115 * The CUI has been either committed or aborted if the transaction has been
116 * cancelled. If the transaction was cancelled, an CUD isn't going to be
117 * constructed and thus we free the CUI here directly.
118 */
119STATIC void
120xfs_cui_item_release(
121	struct xfs_log_item	*lip)
122{
123	xfs_cui_release(CUI_ITEM(lip));
124}
125
126static const struct xfs_item_ops xfs_cui_item_ops = {
127	.iop_size	= xfs_cui_item_size,
128	.iop_format	= xfs_cui_item_format,
129	.iop_unpin	= xfs_cui_item_unpin,
130	.iop_release	= xfs_cui_item_release,
131};
132
133/*
134 * Allocate and initialize an cui item with the given number of extents.
135 */
136struct xfs_cui_log_item *
137xfs_cui_init(
138	struct xfs_mount		*mp,
139	uint				nextents)
140
141{
142	struct xfs_cui_log_item		*cuip;
143
144	ASSERT(nextents > 0);
145	if (nextents > XFS_CUI_MAX_FAST_EXTENTS)
146		cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents),
147				0);
148	else
149		cuip = kmem_zone_zalloc(xfs_cui_zone, 0);
150
151	xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops);
152	cuip->cui_format.cui_nextents = nextents;
153	cuip->cui_format.cui_id = (uintptr_t)(void *)cuip;
154	atomic_set(&cuip->cui_next_extent, 0);
155	atomic_set(&cuip->cui_refcount, 2);
156
157	return cuip;
158}
159
160static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip)
161{
162	return container_of(lip, struct xfs_cud_log_item, cud_item);
163}
164
165STATIC void
166xfs_cud_item_size(
167	struct xfs_log_item	*lip,
168	int			*nvecs,
169	int			*nbytes)
170{
171	*nvecs += 1;
172	*nbytes += sizeof(struct xfs_cud_log_format);
173}
174
175/*
176 * This is called to fill in the vector of log iovecs for the
177 * given cud log item. We use only 1 iovec, and we point that
178 * at the cud_log_format structure embedded in the cud item.
179 * It is at this point that we assert that all of the extent
180 * slots in the cud item have been filled.
181 */
182STATIC void
183xfs_cud_item_format(
184	struct xfs_log_item	*lip,
185	struct xfs_log_vec	*lv)
186{
187	struct xfs_cud_log_item	*cudp = CUD_ITEM(lip);
188	struct xfs_log_iovec	*vecp = NULL;
189
190	cudp->cud_format.cud_type = XFS_LI_CUD;
191	cudp->cud_format.cud_size = 1;
192
193	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format,
194			sizeof(struct xfs_cud_log_format));
195}
196
197/*
198 * The CUD is either committed or aborted if the transaction is cancelled. If
199 * the transaction is cancelled, drop our reference to the CUI and free the
200 * CUD.
201 */
202STATIC void
203xfs_cud_item_release(
204	struct xfs_log_item	*lip)
205{
206	struct xfs_cud_log_item	*cudp = CUD_ITEM(lip);
207
208	xfs_cui_release(cudp->cud_cuip);
209	kmem_cache_free(xfs_cud_zone, cudp);
210}
211
212static const struct xfs_item_ops xfs_cud_item_ops = {
213	.flags		= XFS_ITEM_RELEASE_WHEN_COMMITTED,
214	.iop_size	= xfs_cud_item_size,
215	.iop_format	= xfs_cud_item_format,
216	.iop_release	= xfs_cud_item_release,
217};
218
219static struct xfs_cud_log_item *
220xfs_trans_get_cud(
221	struct xfs_trans		*tp,
222	struct xfs_cui_log_item		*cuip)
223{
224	struct xfs_cud_log_item		*cudp;
225
226	cudp = kmem_zone_zalloc(xfs_cud_zone, 0);
227	xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD,
228			  &xfs_cud_item_ops);
229	cudp->cud_cuip = cuip;
230	cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
231
232	xfs_trans_add_item(tp, &cudp->cud_item);
233	return cudp;
234}
235
236/*
237 * Finish an refcount update and log it to the CUD. Note that the
238 * transaction is marked dirty regardless of whether the refcount
239 * update succeeds or fails to support the CUI/CUD lifecycle rules.
240 */
241static int
242xfs_trans_log_finish_refcount_update(
243	struct xfs_trans		*tp,
244	struct xfs_cud_log_item		*cudp,
245	enum xfs_refcount_intent_type	type,
246	xfs_fsblock_t			startblock,
247	xfs_extlen_t			blockcount,
248	xfs_fsblock_t			*new_fsb,
249	xfs_extlen_t			*new_len,
250	struct xfs_btree_cur		**pcur)
251{
252	int				error;
253
254	error = xfs_refcount_finish_one(tp, type, startblock,
255			blockcount, new_fsb, new_len, pcur);
256
257	/*
258	 * Mark the transaction dirty, even on error. This ensures the
259	 * transaction is aborted, which:
260	 *
261	 * 1.) releases the CUI and frees the CUD
262	 * 2.) shuts down the filesystem
263	 */
264	tp->t_flags |= XFS_TRANS_DIRTY;
265	set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
266
267	return error;
268}
269
270/* Sort refcount intents by AG. */
271static int
272xfs_refcount_update_diff_items(
273	void				*priv,
274	struct list_head		*a,
275	struct list_head		*b)
276{
277	struct xfs_mount		*mp = priv;
278	struct xfs_refcount_intent	*ra;
279	struct xfs_refcount_intent	*rb;
280
281	ra = container_of(a, struct xfs_refcount_intent, ri_list);
282	rb = container_of(b, struct xfs_refcount_intent, ri_list);
283	return  XFS_FSB_TO_AGNO(mp, ra->ri_startblock) -
284		XFS_FSB_TO_AGNO(mp, rb->ri_startblock);
285}
286
287/* Get an CUI. */
288STATIC void *
289xfs_refcount_update_create_intent(
290	struct xfs_trans		*tp,
291	unsigned int			count)
292{
293	struct xfs_cui_log_item		*cuip;
294
295	ASSERT(tp != NULL);
296	ASSERT(count > 0);
297
298	cuip = xfs_cui_init(tp->t_mountp, count);
299	ASSERT(cuip != NULL);
300
301	/*
302	 * Get a log_item_desc to point at the new item.
303	 */
304	xfs_trans_add_item(tp, &cuip->cui_item);
305	return cuip;
306}
307
308/* Set the phys extent flags for this reverse mapping. */
309static void
310xfs_trans_set_refcount_flags(
311	struct xfs_phys_extent		*refc,
312	enum xfs_refcount_intent_type	type)
313{
314	refc->pe_flags = 0;
315	switch (type) {
316	case XFS_REFCOUNT_INCREASE:
317	case XFS_REFCOUNT_DECREASE:
318	case XFS_REFCOUNT_ALLOC_COW:
319	case XFS_REFCOUNT_FREE_COW:
320		refc->pe_flags |= type;
321		break;
322	default:
323		ASSERT(0);
324	}
325}
326
327/* Log refcount updates in the intent item. */
328STATIC void
329xfs_refcount_update_log_item(
330	struct xfs_trans		*tp,
331	void				*intent,
332	struct list_head		*item)
333{
334	struct xfs_cui_log_item		*cuip = intent;
335	struct xfs_refcount_intent	*refc;
336	uint				next_extent;
337	struct xfs_phys_extent		*ext;
338
339	refc = container_of(item, struct xfs_refcount_intent, ri_list);
340
341	tp->t_flags |= XFS_TRANS_DIRTY;
342	set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
343
344	/*
345	 * atomic_inc_return gives us the value after the increment;
346	 * we want to use it as an array index so we need to subtract 1 from
347	 * it.
348	 */
349	next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1;
350	ASSERT(next_extent < cuip->cui_format.cui_nextents);
351	ext = &cuip->cui_format.cui_extents[next_extent];
352	ext->pe_startblock = refc->ri_startblock;
353	ext->pe_len = refc->ri_blockcount;
354	xfs_trans_set_refcount_flags(ext, refc->ri_type);
355}
356
357/* Get an CUD so we can process all the deferred refcount updates. */
358STATIC void *
359xfs_refcount_update_create_done(
360	struct xfs_trans		*tp,
361	void				*intent,
362	unsigned int			count)
363{
364	return xfs_trans_get_cud(tp, intent);
365}
366
367/* Process a deferred refcount update. */
368STATIC int
369xfs_refcount_update_finish_item(
370	struct xfs_trans		*tp,
371	struct list_head		*item,
372	void				*done_item,
373	void				**state)
374{
375	struct xfs_refcount_intent	*refc;
376	xfs_fsblock_t			new_fsb;
377	xfs_extlen_t			new_aglen;
378	int				error;
379
380	refc = container_of(item, struct xfs_refcount_intent, ri_list);
381	error = xfs_trans_log_finish_refcount_update(tp, done_item,
382			refc->ri_type,
383			refc->ri_startblock,
384			refc->ri_blockcount,
385			&new_fsb, &new_aglen,
386			(struct xfs_btree_cur **)state);
387	/* Did we run out of reservation?  Requeue what we didn't finish. */
388	if (!error && new_aglen > 0) {
389		ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
390		       refc->ri_type == XFS_REFCOUNT_DECREASE);
391		refc->ri_startblock = new_fsb;
392		refc->ri_blockcount = new_aglen;
393		return -EAGAIN;
394	}
395	kmem_free(refc);
396	return error;
397}
398
399/* Clean up after processing deferred refcounts. */
400STATIC void
401xfs_refcount_update_finish_cleanup(
402	struct xfs_trans	*tp,
403	void			*state,
404	int			error)
405{
406	struct xfs_btree_cur	*rcur = state;
407
408	xfs_refcount_finish_one_cleanup(tp, rcur, error);
409}
410
411/* Abort all pending CUIs. */
412STATIC void
413xfs_refcount_update_abort_intent(
414	void				*intent)
415{
416	xfs_cui_release(intent);
417}
418
419/* Cancel a deferred refcount update. */
420STATIC void
421xfs_refcount_update_cancel_item(
422	struct list_head		*item)
423{
424	struct xfs_refcount_intent	*refc;
425
426	refc = container_of(item, struct xfs_refcount_intent, ri_list);
427	kmem_free(refc);
428}
429
430const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
431	.max_items	= XFS_CUI_MAX_FAST_EXTENTS,
432	.diff_items	= xfs_refcount_update_diff_items,
433	.create_intent	= xfs_refcount_update_create_intent,
434	.abort_intent	= xfs_refcount_update_abort_intent,
435	.log_item	= xfs_refcount_update_log_item,
436	.create_done	= xfs_refcount_update_create_done,
437	.finish_item	= xfs_refcount_update_finish_item,
438	.finish_cleanup = xfs_refcount_update_finish_cleanup,
439	.cancel_item	= xfs_refcount_update_cancel_item,
440};
441
442/*
443 * Process a refcount update intent item that was recovered from the log.
444 * We need to update the refcountbt.
445 */
446int
447xfs_cui_recover(
448	struct xfs_trans		*parent_tp,
449	struct xfs_cui_log_item		*cuip)
450{
451	int				i;
452	int				error = 0;
453	unsigned int			refc_type;
454	struct xfs_phys_extent		*refc;
455	xfs_fsblock_t			startblock_fsb;
456	bool				op_ok;
457	struct xfs_cud_log_item		*cudp;
458	struct xfs_trans		*tp;
459	struct xfs_btree_cur		*rcur = NULL;
460	enum xfs_refcount_intent_type	type;
461	xfs_fsblock_t			new_fsb;
462	xfs_extlen_t			new_len;
463	struct xfs_bmbt_irec		irec;
464	bool				requeue_only = false;
465	struct xfs_mount		*mp = parent_tp->t_mountp;
466
467	ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
468
469	/*
470	 * First check the validity of the extents described by the
471	 * CUI.  If any are bad, then assume that all are bad and
472	 * just toss the CUI.
473	 */
474	for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
475		refc = &cuip->cui_format.cui_extents[i];
476		startblock_fsb = XFS_BB_TO_FSB(mp,
477				   XFS_FSB_TO_DADDR(mp, refc->pe_startblock));
478		switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) {
479		case XFS_REFCOUNT_INCREASE:
480		case XFS_REFCOUNT_DECREASE:
481		case XFS_REFCOUNT_ALLOC_COW:
482		case XFS_REFCOUNT_FREE_COW:
483			op_ok = true;
484			break;
485		default:
486			op_ok = false;
487			break;
488		}
489		if (!op_ok || startblock_fsb == 0 ||
490		    refc->pe_len == 0 ||
491		    startblock_fsb >= mp->m_sb.sb_dblocks ||
492		    refc->pe_len >= mp->m_sb.sb_agblocks ||
493		    (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) {
494			/*
495			 * This will pull the CUI from the AIL and
496			 * free the memory associated with it.
497			 */
498			set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
499			xfs_cui_release(cuip);
500			return -EFSCORRUPTED;
501		}
502	}
503
504	/*
505	 * Under normal operation, refcount updates are deferred, so we
506	 * wouldn't be adding them directly to a transaction.  All
507	 * refcount updates manage reservation usage internally and
508	 * dynamically by deferring work that won't fit in the
509	 * transaction.  Normally, any work that needs to be deferred
510	 * gets attached to the same defer_ops that scheduled the
511	 * refcount update.  However, we're in log recovery here, so we
512	 * we use the passed in defer_ops and to finish up any work that
513	 * doesn't fit.  We need to reserve enough blocks to handle a
514	 * full btree split on either end of the refcount range.
515	 */
516	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
517			mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
518	if (error)
519		return error;
520	/*
521	 * Recovery stashes all deferred ops during intent processing and
522	 * finishes them on completion. Transfer current dfops state to this
523	 * transaction and transfer the result back before we return.
524	 */
525	xfs_defer_move(tp, parent_tp);
526	cudp = xfs_trans_get_cud(tp, cuip);
527
528	for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
529		refc = &cuip->cui_format.cui_extents[i];
530		refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
531		switch (refc_type) {
532		case XFS_REFCOUNT_INCREASE:
533		case XFS_REFCOUNT_DECREASE:
534		case XFS_REFCOUNT_ALLOC_COW:
535		case XFS_REFCOUNT_FREE_COW:
536			type = refc_type;
537			break;
538		default:
539			XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
540			error = -EFSCORRUPTED;
541			goto abort_error;
542		}
543		if (requeue_only) {
544			new_fsb = refc->pe_startblock;
545			new_len = refc->pe_len;
546		} else
547			error = xfs_trans_log_finish_refcount_update(tp, cudp,
548				type, refc->pe_startblock, refc->pe_len,
549				&new_fsb, &new_len, &rcur);
550		if (error)
551			goto abort_error;
552
553		/* Requeue what we didn't finish. */
554		if (new_len > 0) {
555			irec.br_startblock = new_fsb;
556			irec.br_blockcount = new_len;
557			switch (type) {
558			case XFS_REFCOUNT_INCREASE:
559				xfs_refcount_increase_extent(tp, &irec);
560				break;
561			case XFS_REFCOUNT_DECREASE:
562				xfs_refcount_decrease_extent(tp, &irec);
563				break;
564			case XFS_REFCOUNT_ALLOC_COW:
565				xfs_refcount_alloc_cow_extent(tp,
566						irec.br_startblock,
567						irec.br_blockcount);
568				break;
569			case XFS_REFCOUNT_FREE_COW:
570				xfs_refcount_free_cow_extent(tp,
571						irec.br_startblock,
572						irec.br_blockcount);
573				break;
574			default:
575				ASSERT(0);
576			}
577			requeue_only = true;
578		}
579	}
580
581	xfs_refcount_finish_one_cleanup(tp, rcur, error);
582	set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
583	xfs_defer_move(parent_tp, tp);
584	error = xfs_trans_commit(tp);
585	return error;
586
587abort_error:
588	xfs_refcount_finish_one_cleanup(tp, rcur, error);
589	xfs_defer_move(parent_tp, tp);
590	xfs_trans_cancel(tp);
591	return error;
592}
Configure Feed

Configure Feed