Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
4 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
5 */
6
7#include <linux/sched.h>
8#include <linux/slab.h>
9#include <linux/spinlock.h>
10#include <linux/completion.h>
11#include <linux/buffer_head.h>
12#include <linux/mm.h>
13#include <linux/pagemap.h>
14#include <linux/writeback.h>
15#include <linux/swap.h>
16#include <linux/delay.h>
17#include <linux/bio.h>
18#include <linux/gfs2_ondisk.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "glock.h"
23#include "glops.h"
24#include "inode.h"
25#include "log.h"
26#include "lops.h"
27#include "meta_io.h"
28#include "rgrp.h"
29#include "trans.h"
30#include "util.h"
31#include "trace_gfs2.h"
32
33static void gfs2_aspace_write_folio(struct folio *folio,
34 struct writeback_control *wbc)
35{
36 struct buffer_head *bh, *head;
37 int nr_underway = 0;
38 blk_opf_t write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc);
39
40 BUG_ON(!folio_test_locked(folio));
41
42 head = folio_buffers(folio);
43 bh = head;
44
45 do {
46 if (!buffer_mapped(bh))
47 continue;
48 /*
49 * If it's a fully non-blocking write attempt and we cannot
50 * lock the buffer then redirty the page. Note that this can
51 * potentially cause a busy-wait loop from flusher thread and kswapd
52 * activity, but those code paths have their own higher-level
53 * throttling.
54 */
55 if (wbc->sync_mode != WB_SYNC_NONE) {
56 lock_buffer(bh);
57 } else if (!trylock_buffer(bh)) {
58 folio_redirty_for_writepage(wbc, folio);
59 continue;
60 }
61 if (test_clear_buffer_dirty(bh)) {
62 mark_buffer_async_write(bh);
63 } else {
64 unlock_buffer(bh);
65 }
66 } while ((bh = bh->b_this_page) != head);
67
68 /*
69 * The folio and its buffers are protected from truncation by
70 * the writeback flag, so we can drop the bh refcounts early.
71 */
72 BUG_ON(folio_test_writeback(folio));
73 folio_start_writeback(folio);
74
75 do {
76 struct buffer_head *next = bh->b_this_page;
77 if (buffer_async_write(bh)) {
78 submit_bh(REQ_OP_WRITE | write_flags, bh);
79 nr_underway++;
80 }
81 bh = next;
82 } while (bh != head);
83 folio_unlock(folio);
84
85 if (nr_underway == 0)
86 folio_end_writeback(folio);
87}
88
89static int gfs2_aspace_writepages(struct address_space *mapping,
90 struct writeback_control *wbc)
91{
92 struct folio *folio = NULL;
93 int error;
94
95 while ((folio = writeback_iter(mapping, wbc, folio, &error)))
96 gfs2_aspace_write_folio(folio, wbc);
97
98 return error;
99}
100
101const struct address_space_operations gfs2_meta_aops = {
102 .dirty_folio = block_dirty_folio,
103 .invalidate_folio = block_invalidate_folio,
104 .writepages = gfs2_aspace_writepages,
105 .release_folio = gfs2_release_folio,
106 .migrate_folio = buffer_migrate_folio_norefs,
107};
108
109const struct address_space_operations gfs2_rgrp_aops = {
110 .dirty_folio = block_dirty_folio,
111 .invalidate_folio = block_invalidate_folio,
112 .writepages = gfs2_aspace_writepages,
113 .release_folio = gfs2_release_folio,
114 .migrate_folio = buffer_migrate_folio_norefs,
115};
116
117/**
118 * gfs2_getbuf - Get a buffer with a given address space
119 * @gl: the glock
120 * @blkno: the block number (filesystem scope)
121 * @create: 1 if the buffer should be created
122 *
123 * Returns: the buffer
124 */
125
126struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
127{
128 struct address_space *mapping = gfs2_glock2aspace(gl);
129 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
130 struct folio *folio;
131 struct buffer_head *bh;
132 unsigned int shift;
133 unsigned long index;
134 unsigned int bufnum;
135
136 if (mapping == NULL)
137 mapping = gfs2_aspace(sdp);
138
139 shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
140 index = blkno >> shift; /* convert block to page */
141 bufnum = blkno - (index << shift); /* block buf index within page */
142
143 if (create) {
144 folio = __filemap_get_folio(mapping, index,
145 FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
146 mapping_gfp_mask(mapping) | __GFP_NOFAIL);
147 bh = folio_buffers(folio);
148 if (!bh)
149 bh = create_empty_buffers(folio,
150 sdp->sd_sb.sb_bsize, 0);
151 } else {
152 folio = __filemap_get_folio(mapping, index,
153 FGP_LOCK | FGP_ACCESSED, 0);
154 if (IS_ERR(folio))
155 return NULL;
156 bh = folio_buffers(folio);
157 }
158
159 if (!bh)
160 goto out_unlock;
161
162 bh = get_nth_bh(bh, bufnum);
163 if (!buffer_mapped(bh))
164 map_bh(bh, sdp->sd_vfs, blkno);
165
166out_unlock:
167 folio_unlock(folio);
168 folio_put(folio);
169
170 return bh;
171}
172
173static void meta_prep_new(struct buffer_head *bh)
174{
175 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
176
177 lock_buffer(bh);
178 clear_buffer_dirty(bh);
179 set_buffer_uptodate(bh);
180 unlock_buffer(bh);
181
182 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
183}
184
185/**
186 * gfs2_meta_new - Get a block
187 * @gl: The glock associated with this block
188 * @blkno: The block number
189 *
190 * Returns: The buffer
191 */
192
193struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
194{
195 struct buffer_head *bh;
196 bh = gfs2_getbuf(gl, blkno, CREATE);
197 meta_prep_new(bh);
198 return bh;
199}
200
201static void gfs2_meta_read_endio(struct bio *bio)
202{
203 struct folio_iter fi;
204
205 bio_for_each_folio_all(fi, bio) {
206 struct folio *folio = fi.folio;
207 struct buffer_head *bh = folio_buffers(folio);
208 size_t len = fi.length;
209
210 while (bh_offset(bh) < fi.offset)
211 bh = bh->b_this_page;
212 do {
213 struct buffer_head *next = bh->b_this_page;
214 len -= bh->b_size;
215 bh->b_end_io(bh, !bio->bi_status);
216 bh = next;
217 } while (bh && len);
218 }
219 bio_put(bio);
220}
221
222/*
223 * Submit several consecutive buffer head I/O requests as a single bio I/O
224 * request. (See submit_bh_wbc.)
225 */
226static void gfs2_submit_bhs(blk_opf_t opf, struct buffer_head *bhs[], int num)
227{
228 while (num > 0) {
229 struct buffer_head *bh = *bhs;
230 struct bio *bio;
231
232 bio = bio_alloc(bh->b_bdev, num, opf, GFP_NOIO);
233 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> SECTOR_SHIFT);
234 while (num > 0) {
235 bh = *bhs;
236 if (!bio_add_folio(bio, bh->b_folio, bh->b_size, bh_offset(bh))) {
237 BUG_ON(bio->bi_iter.bi_size == 0);
238 break;
239 }
240 bhs++;
241 num--;
242 }
243 bio->bi_end_io = gfs2_meta_read_endio;
244 submit_bio(bio);
245 }
246}
247
248/**
249 * gfs2_meta_read - Read a block from disk
250 * @gl: The glock covering the block
251 * @blkno: The block number
252 * @flags: flags
253 * @rahead: Do read-ahead
254 * @bhp: the place where the buffer is returned (NULL on failure)
255 *
256 * Returns: errno
257 */
258
259int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
260 int rahead, struct buffer_head **bhp)
261{
262 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
263 struct buffer_head *bh, *bhs[2];
264 int num = 0;
265
266 if (gfs2_withdrawn(sdp)) {
267 *bhp = NULL;
268 return -EIO;
269 }
270
271 *bhp = bh = gfs2_getbuf(gl, blkno, CREATE);
272
273 lock_buffer(bh);
274 if (buffer_uptodate(bh)) {
275 unlock_buffer(bh);
276 flags &= ~DIO_WAIT;
277 } else {
278 bh->b_end_io = end_buffer_read_sync;
279 get_bh(bh);
280 bhs[num++] = bh;
281 }
282
283 if (rahead) {
284 bh = gfs2_getbuf(gl, blkno + 1, CREATE);
285
286 lock_buffer(bh);
287 if (buffer_uptodate(bh)) {
288 unlock_buffer(bh);
289 brelse(bh);
290 } else {
291 bh->b_end_io = end_buffer_read_sync;
292 bhs[num++] = bh;
293 }
294 }
295
296 gfs2_submit_bhs(REQ_OP_READ | REQ_META | REQ_PRIO, bhs, num);
297 if (!(flags & DIO_WAIT))
298 return 0;
299
300 bh = *bhp;
301 wait_on_buffer(bh);
302 if (unlikely(!buffer_uptodate(bh))) {
303 struct gfs2_trans *tr = current->journal_info;
304 if (tr && test_bit(TR_TOUCHED, &tr->tr_flags))
305 gfs2_io_error_bh(sdp, bh);
306 brelse(bh);
307 *bhp = NULL;
308 return -EIO;
309 }
310
311 return 0;
312}
313
314/**
315 * gfs2_meta_wait - Reread a block from disk
316 * @sdp: the filesystem
317 * @bh: The block to wait for
318 *
319 * Returns: errno
320 */
321
322int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
323{
324 if (gfs2_withdrawn(sdp))
325 return -EIO;
326
327 wait_on_buffer(bh);
328
329 if (!buffer_uptodate(bh)) {
330 struct gfs2_trans *tr = current->journal_info;
331 if (tr && test_bit(TR_TOUCHED, &tr->tr_flags))
332 gfs2_io_error_bh(sdp, bh);
333 return -EIO;
334 }
335 if (gfs2_withdrawn(sdp))
336 return -EIO;
337
338 return 0;
339}
340
341void gfs2_remove_from_journal(struct buffer_head *bh, int meta)
342{
343 struct address_space *mapping = bh->b_folio->mapping;
344 struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
345 struct gfs2_bufdata *bd = bh->b_private;
346 struct gfs2_trans *tr = current->journal_info;
347 int was_pinned = 0;
348
349 if (test_clear_buffer_pinned(bh)) {
350 trace_gfs2_pin(bd, 0);
351 atomic_dec(&sdp->sd_log_pinned);
352 list_del_init(&bd->bd_list);
353 if (meta == REMOVE_META)
354 tr->tr_num_buf_rm++;
355 else
356 tr->tr_num_databuf_rm++;
357 set_bit(TR_TOUCHED, &tr->tr_flags);
358 was_pinned = 1;
359 brelse(bh);
360 }
361 if (bd) {
362 if (bd->bd_tr) {
363 gfs2_trans_add_revoke(sdp, bd);
364 } else if (was_pinned) {
365 bh->b_private = NULL;
366 kmem_cache_free(gfs2_bufdata_cachep, bd);
367 } else if (!list_empty(&bd->bd_ail_st_list) &&
368 !list_empty(&bd->bd_ail_gl_list)) {
369 gfs2_remove_from_ail(bd);
370 }
371 }
372 clear_buffer_dirty(bh);
373 clear_buffer_uptodate(bh);
374}
375
376/**
377 * gfs2_ail1_wipe - remove deleted/freed buffers from the ail1 list
378 * @sdp: superblock
379 * @bstart: starting block address of buffers to remove
380 * @blen: length of buffers to be removed
381 *
382 * This function is called from gfs2_journal wipe, whose job is to remove
383 * buffers, corresponding to deleted blocks, from the journal. If we find any
384 * bufdata elements on the system ail1 list, they haven't been written to
385 * the journal yet. So we remove them.
386 */
387static void gfs2_ail1_wipe(struct gfs2_sbd *sdp, u64 bstart, u32 blen)
388{
389 struct gfs2_trans *tr, *s;
390 struct gfs2_bufdata *bd, *bs;
391 struct buffer_head *bh;
392 u64 end = bstart + blen;
393
394 gfs2_log_lock(sdp);
395 spin_lock(&sdp->sd_ail_lock);
396 list_for_each_entry_safe(tr, s, &sdp->sd_ail1_list, tr_list) {
397 list_for_each_entry_safe(bd, bs, &tr->tr_ail1_list,
398 bd_ail_st_list) {
399 bh = bd->bd_bh;
400 if (bh->b_blocknr < bstart || bh->b_blocknr >= end)
401 continue;
402
403 gfs2_remove_from_journal(bh, REMOVE_JDATA);
404 }
405 }
406 spin_unlock(&sdp->sd_ail_lock);
407 gfs2_log_unlock(sdp);
408}
409
410static struct buffer_head *gfs2_getjdatabuf(struct gfs2_inode *ip, u64 blkno)
411{
412 struct address_space *mapping = ip->i_inode.i_mapping;
413 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
414 struct folio *folio;
415 struct buffer_head *bh;
416 unsigned int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
417 unsigned long index = blkno >> shift; /* convert block to page */
418 unsigned int bufnum = blkno - (index << shift);
419
420 folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED, 0);
421 if (IS_ERR(folio))
422 return NULL;
423 bh = folio_buffers(folio);
424 if (bh)
425 bh = get_nth_bh(bh, bufnum);
426 folio_unlock(folio);
427 folio_put(folio);
428 return bh;
429}
430
431/**
432 * gfs2_journal_wipe - make inode's buffers so they aren't dirty/pinned anymore
433 * @ip: the inode who owns the buffers
434 * @bstart: the first buffer in the run
435 * @blen: the number of buffers in the run
436 *
437 */
438
439void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
440{
441 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
442 struct buffer_head *bh;
443 int ty;
444
445 /* This can only happen during incomplete inode creation. */
446 if (!ip->i_gl)
447 return;
448
449 gfs2_ail1_wipe(sdp, bstart, blen);
450 while (blen) {
451 ty = REMOVE_META;
452 bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE);
453 if (!bh && gfs2_is_jdata(ip)) {
454 bh = gfs2_getjdatabuf(ip, bstart);
455 ty = REMOVE_JDATA;
456 }
457 if (bh) {
458 lock_buffer(bh);
459 gfs2_log_lock(sdp);
460 spin_lock(&sdp->sd_ail_lock);
461 gfs2_remove_from_journal(bh, ty);
462 spin_unlock(&sdp->sd_ail_lock);
463 gfs2_log_unlock(sdp);
464 unlock_buffer(bh);
465 brelse(bh);
466 }
467
468 bstart++;
469 blen--;
470 }
471}
472
473/**
474 * gfs2_meta_buffer - Get a metadata buffer
475 * @ip: The GFS2 inode
476 * @mtype: The block type (GFS2_METATYPE_*)
477 * @num: The block number (device relative) of the buffer
478 * @bhp: the buffer is returned here
479 *
480 * Returns: errno
481 */
482
483int gfs2_meta_buffer(struct gfs2_inode *ip, u32 mtype, u64 num,
484 struct buffer_head **bhp)
485{
486 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
487 struct gfs2_glock *gl = ip->i_gl;
488 struct buffer_head *bh;
489 int ret = 0;
490 int rahead = 0;
491
492 if (num == ip->i_no_addr)
493 rahead = ip->i_rahead;
494
495 ret = gfs2_meta_read(gl, num, DIO_WAIT, rahead, &bh);
496 if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
497 brelse(bh);
498 ret = -EIO;
499 } else {
500 *bhp = bh;
501 }
502 return ret;
503}
504
505/**
506 * gfs2_meta_ra - start readahead on an extent of a file
507 * @gl: the glock the blocks belong to
508 * @dblock: the starting disk block
509 * @extlen: the number of blocks in the extent
510 *
511 * returns: the first buffer in the extent
512 */
513
514struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
515{
516 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
517 struct buffer_head *first_bh, *bh;
518 u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
519 sdp->sd_sb.sb_bsize_shift;
520
521 BUG_ON(!extlen);
522
523 if (max_ra < 1)
524 max_ra = 1;
525 if (extlen > max_ra)
526 extlen = max_ra;
527
528 first_bh = gfs2_getbuf(gl, dblock, CREATE);
529
530 if (buffer_uptodate(first_bh))
531 goto out;
532 bh_read_nowait(first_bh, REQ_META | REQ_PRIO);
533
534 dblock++;
535 extlen--;
536
537 while (extlen) {
538 bh = gfs2_getbuf(gl, dblock, CREATE);
539
540 bh_readahead(bh, REQ_RAHEAD | REQ_META | REQ_PRIO);
541 brelse(bh);
542 dblock++;
543 extlen--;
544 if (!buffer_locked(first_bh) && buffer_uptodate(first_bh))
545 goto out;
546 }
547
548 wait_on_buffer(first_bh);
549out:
550 return first_bh;
551}
552