fs/gfs2/meta_io.c at master · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / fs / gfs2 / meta_io.c
at master 13 kB view raw
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
  4 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
  5 */
  6
  7#include <linux/sched.h>
  8#include <linux/slab.h>
  9#include <linux/spinlock.h>
 10#include <linux/completion.h>
 11#include <linux/buffer_head.h>
 12#include <linux/mm.h>
 13#include <linux/pagemap.h>
 14#include <linux/writeback.h>
 15#include <linux/swap.h>
 16#include <linux/delay.h>
 17#include <linux/bio.h>
 18#include <linux/gfs2_ondisk.h>
 19
 20#include "gfs2.h"
 21#include "incore.h"
 22#include "glock.h"
 23#include "glops.h"
 24#include "inode.h"
 25#include "log.h"
 26#include "lops.h"
 27#include "meta_io.h"
 28#include "rgrp.h"
 29#include "trans.h"
 30#include "util.h"
 31#include "trace_gfs2.h"
 32
 33static void gfs2_aspace_write_folio(struct folio *folio,
 34		struct writeback_control *wbc)
 35{
 36	struct buffer_head *bh, *head;
 37	int nr_underway = 0;
 38	blk_opf_t write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc);
 39
 40	BUG_ON(!folio_test_locked(folio));
 41
 42	head = folio_buffers(folio);
 43	bh = head;
 44
 45	do {
 46		if (!buffer_mapped(bh))
 47			continue;
 48		/*
 49		 * If it's a fully non-blocking write attempt and we cannot
 50		 * lock the buffer then redirty the page.  Note that this can
 51		 * potentially cause a busy-wait loop from flusher thread and kswapd
 52		 * activity, but those code paths have their own higher-level
 53		 * throttling.
 54		 */
 55		if (wbc->sync_mode != WB_SYNC_NONE) {
 56			lock_buffer(bh);
 57		} else if (!trylock_buffer(bh)) {
 58			folio_redirty_for_writepage(wbc, folio);
 59			continue;
 60		}
 61		if (test_clear_buffer_dirty(bh)) {
 62			mark_buffer_async_write(bh);
 63		} else {
 64			unlock_buffer(bh);
 65		}
 66	} while ((bh = bh->b_this_page) != head);
 67
 68	/*
 69	 * The folio and its buffers are protected from truncation by
 70	 * the writeback flag, so we can drop the bh refcounts early.
 71	 */
 72	BUG_ON(folio_test_writeback(folio));
 73	folio_start_writeback(folio);
 74
 75	do {
 76		struct buffer_head *next = bh->b_this_page;
 77		if (buffer_async_write(bh)) {
 78			submit_bh(REQ_OP_WRITE | write_flags, bh);
 79			nr_underway++;
 80		}
 81		bh = next;
 82	} while (bh != head);
 83	folio_unlock(folio);
 84
 85	if (nr_underway == 0)
 86		folio_end_writeback(folio);
 87}
 88
 89static int gfs2_aspace_writepages(struct address_space *mapping,
 90		struct writeback_control *wbc)
 91{
 92	struct folio *folio = NULL;
 93	int error;
 94
 95	while ((folio = writeback_iter(mapping, wbc, folio, &error)))
 96		gfs2_aspace_write_folio(folio, wbc);
 97
 98	return error;
 99}
100
101const struct address_space_operations gfs2_meta_aops = {
102	.dirty_folio	= block_dirty_folio,
103	.invalidate_folio = block_invalidate_folio,
104	.writepages = gfs2_aspace_writepages,
105	.release_folio = gfs2_release_folio,
106	.migrate_folio = buffer_migrate_folio_norefs,
107};
108
109const struct address_space_operations gfs2_rgrp_aops = {
110	.dirty_folio	= block_dirty_folio,
111	.invalidate_folio = block_invalidate_folio,
112	.writepages = gfs2_aspace_writepages,
113	.release_folio = gfs2_release_folio,
114	.migrate_folio = buffer_migrate_folio_norefs,
115};
116
117/**
118 * gfs2_getbuf - Get a buffer with a given address space
119 * @gl: the glock
120 * @blkno: the block number (filesystem scope)
121 * @create: 1 if the buffer should be created
122 *
123 * Returns: the buffer
124 */
125
126struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
127{
128	struct address_space *mapping = gfs2_glock2aspace(gl);
129	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
130	struct folio *folio;
131	struct buffer_head *bh;
132	unsigned int shift;
133	unsigned long index;
134	unsigned int bufnum;
135
136	if (mapping == NULL)
137		mapping = gfs2_aspace(sdp);
138
139	shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
140	index = blkno >> shift;             /* convert block to page */
141	bufnum = blkno - (index << shift);  /* block buf index within page */
142
143	if (create) {
144		folio = __filemap_get_folio(mapping, index,
145				FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
146				mapping_gfp_mask(mapping) | __GFP_NOFAIL);
147		bh = folio_buffers(folio);
148		if (!bh)
149			bh = create_empty_buffers(folio,
150				sdp->sd_sb.sb_bsize, 0);
151	} else {
152		folio = __filemap_get_folio(mapping, index,
153				FGP_LOCK | FGP_ACCESSED, 0);
154		if (IS_ERR(folio))
155			return NULL;
156		bh = folio_buffers(folio);
157	}
158
159	if (!bh)
160		goto out_unlock;
161
162	bh = get_nth_bh(bh, bufnum);
163	if (!buffer_mapped(bh))
164		map_bh(bh, sdp->sd_vfs, blkno);
165
166out_unlock:
167	folio_unlock(folio);
168	folio_put(folio);
169
170	return bh;
171}
172
173static void meta_prep_new(struct buffer_head *bh)
174{
175	struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
176
177	lock_buffer(bh);
178	clear_buffer_dirty(bh);
179	set_buffer_uptodate(bh);
180	unlock_buffer(bh);
181
182	mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
183}
184
185/**
186 * gfs2_meta_new - Get a block
187 * @gl: The glock associated with this block
188 * @blkno: The block number
189 *
190 * Returns: The buffer
191 */
192
193struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
194{
195	struct buffer_head *bh;
196	bh = gfs2_getbuf(gl, blkno, CREATE);
197	meta_prep_new(bh);
198	return bh;
199}
200
201static void gfs2_meta_read_endio(struct bio *bio)
202{
203	struct folio_iter fi;
204
205	bio_for_each_folio_all(fi, bio) {
206		struct folio *folio = fi.folio;
207		struct buffer_head *bh = folio_buffers(folio);
208		size_t len = fi.length;
209
210		while (bh_offset(bh) < fi.offset)
211			bh = bh->b_this_page;
212		do {
213			struct buffer_head *next = bh->b_this_page;
214			len -= bh->b_size;
215			bh->b_end_io(bh, !bio->bi_status);
216			bh = next;
217		} while (bh && len);
218	}
219	bio_put(bio);
220}
221
222/*
223 * Submit several consecutive buffer head I/O requests as a single bio I/O
224 * request.  (See submit_bh_wbc.)
225 */
226static void gfs2_submit_bhs(blk_opf_t opf, struct buffer_head *bhs[], int num)
227{
228	while (num > 0) {
229		struct buffer_head *bh = *bhs;
230		struct bio *bio;
231
232		bio = bio_alloc(bh->b_bdev, num, opf, GFP_NOIO);
233		bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> SECTOR_SHIFT);
234		while (num > 0) {
235			bh = *bhs;
236			if (!bio_add_folio(bio, bh->b_folio, bh->b_size, bh_offset(bh))) {
237				BUG_ON(bio->bi_iter.bi_size == 0);
238				break;
239			}
240			bhs++;
241			num--;
242		}
243		bio->bi_end_io = gfs2_meta_read_endio;
244		submit_bio(bio);
245	}
246}
247
248/**
249 * gfs2_meta_read - Read a block from disk
250 * @gl: The glock covering the block
251 * @blkno: The block number
252 * @flags: flags
253 * @rahead: Do read-ahead
254 * @bhp: the place where the buffer is returned (NULL on failure)
255 *
256 * Returns: errno
257 */
258
259int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
260		   int rahead, struct buffer_head **bhp)
261{
262	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
263	struct buffer_head *bh, *bhs[2];
264	int num = 0;
265
266	if (gfs2_withdrawn(sdp)) {
267		*bhp = NULL;
268		return -EIO;
269	}
270
271	*bhp = bh = gfs2_getbuf(gl, blkno, CREATE);
272
273	lock_buffer(bh);
274	if (buffer_uptodate(bh)) {
275		unlock_buffer(bh);
276		flags &= ~DIO_WAIT;
277	} else {
278		bh->b_end_io = end_buffer_read_sync;
279		get_bh(bh);
280		bhs[num++] = bh;
281	}
282
283	if (rahead) {
284		bh = gfs2_getbuf(gl, blkno + 1, CREATE);
285
286		lock_buffer(bh);
287		if (buffer_uptodate(bh)) {
288			unlock_buffer(bh);
289			brelse(bh);
290		} else {
291			bh->b_end_io = end_buffer_read_sync;
292			bhs[num++] = bh;
293		}
294	}
295
296	gfs2_submit_bhs(REQ_OP_READ | REQ_META | REQ_PRIO, bhs, num);
297	if (!(flags & DIO_WAIT))
298		return 0;
299
300	bh = *bhp;
301	wait_on_buffer(bh);
302	if (unlikely(!buffer_uptodate(bh))) {
303		struct gfs2_trans *tr = current->journal_info;
304		if (tr && test_bit(TR_TOUCHED, &tr->tr_flags))
305			gfs2_io_error_bh(sdp, bh);
306		brelse(bh);
307		*bhp = NULL;
308		return -EIO;
309	}
310
311	return 0;
312}
313
314/**
315 * gfs2_meta_wait - Reread a block from disk
316 * @sdp: the filesystem
317 * @bh: The block to wait for
318 *
319 * Returns: errno
320 */
321
322int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
323{
324	if (gfs2_withdrawn(sdp))
325		return -EIO;
326
327	wait_on_buffer(bh);
328
329	if (!buffer_uptodate(bh)) {
330		struct gfs2_trans *tr = current->journal_info;
331		if (tr && test_bit(TR_TOUCHED, &tr->tr_flags))
332			gfs2_io_error_bh(sdp, bh);
333		return -EIO;
334	}
335	if (gfs2_withdrawn(sdp))
336		return -EIO;
337
338	return 0;
339}
340
341void gfs2_remove_from_journal(struct buffer_head *bh, int meta)
342{
343	struct address_space *mapping = bh->b_folio->mapping;
344	struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
345	struct gfs2_bufdata *bd = bh->b_private;
346	struct gfs2_trans *tr = current->journal_info;
347	int was_pinned = 0;
348
349	if (test_clear_buffer_pinned(bh)) {
350		trace_gfs2_pin(bd, 0);
351		atomic_dec(&sdp->sd_log_pinned);
352		list_del_init(&bd->bd_list);
353		if (meta == REMOVE_META)
354			tr->tr_num_buf_rm++;
355		else
356			tr->tr_num_databuf_rm++;
357		set_bit(TR_TOUCHED, &tr->tr_flags);
358		was_pinned = 1;
359		brelse(bh);
360	}
361	if (bd) {
362		if (bd->bd_tr) {
363			gfs2_trans_add_revoke(sdp, bd);
364		} else if (was_pinned) {
365			bh->b_private = NULL;
366			kmem_cache_free(gfs2_bufdata_cachep, bd);
367		} else if (!list_empty(&bd->bd_ail_st_list) &&
368					!list_empty(&bd->bd_ail_gl_list)) {
369			gfs2_remove_from_ail(bd);
370		}
371	}
372	clear_buffer_dirty(bh);
373	clear_buffer_uptodate(bh);
374}
375
376/**
377 * gfs2_ail1_wipe - remove deleted/freed buffers from the ail1 list
378 * @sdp: superblock
379 * @bstart: starting block address of buffers to remove
380 * @blen: length of buffers to be removed
381 *
382 * This function is called from gfs2_journal wipe, whose job is to remove
383 * buffers, corresponding to deleted blocks, from the journal. If we find any
384 * bufdata elements on the system ail1 list, they haven't been written to
385 * the journal yet. So we remove them.
386 */
387static void gfs2_ail1_wipe(struct gfs2_sbd *sdp, u64 bstart, u32 blen)
388{
389	struct gfs2_trans *tr, *s;
390	struct gfs2_bufdata *bd, *bs;
391	struct buffer_head *bh;
392	u64 end = bstart + blen;
393
394	gfs2_log_lock(sdp);
395	spin_lock(&sdp->sd_ail_lock);
396	list_for_each_entry_safe(tr, s, &sdp->sd_ail1_list, tr_list) {
397		list_for_each_entry_safe(bd, bs, &tr->tr_ail1_list,
398					 bd_ail_st_list) {
399			bh = bd->bd_bh;
400			if (bh->b_blocknr < bstart || bh->b_blocknr >= end)
401				continue;
402
403			gfs2_remove_from_journal(bh, REMOVE_JDATA);
404		}
405	}
406	spin_unlock(&sdp->sd_ail_lock);
407	gfs2_log_unlock(sdp);
408}
409
410static struct buffer_head *gfs2_getjdatabuf(struct gfs2_inode *ip, u64 blkno)
411{
412	struct address_space *mapping = ip->i_inode.i_mapping;
413	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
414	struct folio *folio;
415	struct buffer_head *bh;
416	unsigned int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
417	unsigned long index = blkno >> shift; /* convert block to page */
418	unsigned int bufnum = blkno - (index << shift);
419
420	folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED, 0);
421	if (IS_ERR(folio))
422		return NULL;
423	bh = folio_buffers(folio);
424	if (bh)
425		bh = get_nth_bh(bh, bufnum);
426	folio_unlock(folio);
427	folio_put(folio);
428	return bh;
429}
430
431/**
432 * gfs2_journal_wipe - make inode's buffers so they aren't dirty/pinned anymore
433 * @ip: the inode who owns the buffers
434 * @bstart: the first buffer in the run
435 * @blen: the number of buffers in the run
436 *
437 */
438
439void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
440{
441	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
442	struct buffer_head *bh;
443	int ty;
444
445	/* This can only happen during incomplete inode creation. */
446	if (!ip->i_gl)
447		return;
448
449	gfs2_ail1_wipe(sdp, bstart, blen);
450	while (blen) {
451		ty = REMOVE_META;
452		bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE);
453		if (!bh && gfs2_is_jdata(ip)) {
454			bh = gfs2_getjdatabuf(ip, bstart);
455			ty = REMOVE_JDATA;
456		}
457		if (bh) {
458			lock_buffer(bh);
459			gfs2_log_lock(sdp);
460			spin_lock(&sdp->sd_ail_lock);
461			gfs2_remove_from_journal(bh, ty);
462			spin_unlock(&sdp->sd_ail_lock);
463			gfs2_log_unlock(sdp);
464			unlock_buffer(bh);
465			brelse(bh);
466		}
467
468		bstart++;
469		blen--;
470	}
471}
472
473/**
474 * gfs2_meta_buffer - Get a metadata buffer
475 * @ip: The GFS2 inode
476 * @mtype: The block type (GFS2_METATYPE_*)
477 * @num: The block number (device relative) of the buffer
478 * @bhp: the buffer is returned here
479 *
480 * Returns: errno
481 */
482
483int gfs2_meta_buffer(struct gfs2_inode *ip, u32 mtype, u64 num,
484		     struct buffer_head **bhp)
485{
486	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
487	struct gfs2_glock *gl = ip->i_gl;
488	struct buffer_head *bh;
489	int ret = 0;
490	int rahead = 0;
491
492	if (num == ip->i_no_addr)
493		rahead = ip->i_rahead;
494
495	ret = gfs2_meta_read(gl, num, DIO_WAIT, rahead, &bh);
496	if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
497		brelse(bh);
498		ret = -EIO;
499	} else {
500		*bhp = bh;
501	}
502	return ret;
503}
504
505/**
506 * gfs2_meta_ra - start readahead on an extent of a file
507 * @gl: the glock the blocks belong to
508 * @dblock: the starting disk block
509 * @extlen: the number of blocks in the extent
510 *
511 * returns: the first buffer in the extent
512 */
513
514struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
515{
516	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
517	struct buffer_head *first_bh, *bh;
518	u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
519			  sdp->sd_sb.sb_bsize_shift;
520
521	BUG_ON(!extlen);
522
523	if (max_ra < 1)
524		max_ra = 1;
525	if (extlen > max_ra)
526		extlen = max_ra;
527
528	first_bh = gfs2_getbuf(gl, dblock, CREATE);
529
530	if (buffer_uptodate(first_bh))
531		goto out;
532	bh_read_nowait(first_bh, REQ_META | REQ_PRIO);
533
534	dblock++;
535	extlen--;
536
537	while (extlen) {
538		bh = gfs2_getbuf(gl, dblock, CREATE);
539
540		bh_readahead(bh, REQ_RAHEAD | REQ_META | REQ_PRIO);
541		brelse(bh);
542		dblock++;
543		extlen--;
544		if (!buffer_locked(first_bh) && buffer_uptodate(first_bh))
545			goto out;
546	}
547
548	wait_on_buffer(first_bh);
549out:
550	return first_bh;
551}
552