Merge tag 'gfs2-4.19.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2

+2 -4

fs/gfs2/acl.c

··· 82 82 int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) 83 83 { 84 84 int error; 85 - int len; 85 + size_t len; 86 86 char *data; 87 87 const char *name = gfs2_acl_name(type); 88 88 89 89 if (acl) { 90 - len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); 91 - if (len == 0) 92 - return 0; 90 + len = posix_acl_xattr_size(acl->a_count); 93 91 data = kmalloc(len, GFP_NOFS); 94 92 if (data == NULL) 95 93 return -ENOMEM;

+17 -320

fs/gfs2/aops.c

··· 22 22 #include <linux/backing-dev.h> 23 23 #include <linux/uio.h> 24 24 #include <trace/events/writeback.h> 25 + #include <linux/sched/signal.h> 25 26 26 27 #include "gfs2.h" 27 28 #include "incore.h" ··· 37 36 #include "super.h" 38 37 #include "util.h" 39 38 #include "glops.h" 39 + #include "aops.h" 40 40 41 41 42 - static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, 43 - unsigned int from, unsigned int len) 42 + void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, 43 + unsigned int from, unsigned int len) 44 44 { 45 45 struct buffer_head *head = page_buffers(page); 46 46 unsigned int bsize = head->b_size; ··· 82 80 if (!buffer_mapped(bh_result)) 83 81 return -EIO; 84 82 return 0; 85 - } 86 - 87 - static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, 88 - struct buffer_head *bh_result, int create) 89 - { 90 - return gfs2_block_map(inode, lblock, bh_result, 0); 91 83 } 92 84 93 85 /** ··· 458 462 * Returns: errno 459 463 */ 460 464 461 - static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) 465 + int stuffed_readpage(struct gfs2_inode *ip, struct page *page) 462 466 { 463 467 struct buffer_head *dibh; 464 468 u64 dsize = i_size_read(&ip->i_inode); ··· 508 512 { 509 513 struct gfs2_inode *ip = GFS2_I(page->mapping->host); 510 514 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); 515 + 511 516 int error; 512 517 513 - if (gfs2_is_stuffed(ip)) { 518 + if (i_blocksize(page->mapping->host) == PAGE_SIZE && 519 + !page_has_buffers(page)) { 520 + error = iomap_readpage(page, &gfs2_iomap_ops); 521 + } else if (gfs2_is_stuffed(ip)) { 514 522 error = stuffed_readpage(ip, page); 515 523 unlock_page(page); 516 524 } else { ··· 644 644 } 645 645 646 646 /** 647 - * gfs2_write_begin - Begin to write to a file 648 - * @file: The file to write to 649 - * @mapping: The mapping in which to write 650 - * @pos: The file offset at which to start writing 651 - * @len: Length of the write 652 - * @flags: Various flags 653 - * @pagep: Pointer to return the page 654 - * @fsdata: Pointer to return fs data (unused by GFS2) 655 - * 656 - * Returns: errno 657 - */ 658 - 659 - static int gfs2_write_begin(struct file *file, struct address_space *mapping, 660 - loff_t pos, unsigned len, unsigned flags, 661 - struct page **pagep, void **fsdata) 662 - { 663 - struct gfs2_inode *ip = GFS2_I(mapping->host); 664 - struct gfs2_sbd *sdp = GFS2_SB(mapping->host); 665 - struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 666 - unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 667 - unsigned requested = 0; 668 - int alloc_required; 669 - int error = 0; 670 - pgoff_t index = pos >> PAGE_SHIFT; 671 - unsigned from = pos & (PAGE_SIZE - 1); 672 - struct page *page; 673 - 674 - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 675 - error = gfs2_glock_nq(&ip->i_gh); 676 - if (unlikely(error)) 677 - goto out_uninit; 678 - if (&ip->i_inode == sdp->sd_rindex) { 679 - error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 680 - GL_NOCACHE, &m_ip->i_gh); 681 - if (unlikely(error)) { 682 - gfs2_glock_dq(&ip->i_gh); 683 - goto out_uninit; 684 - } 685 - } 686 - 687 - alloc_required = gfs2_write_alloc_required(ip, pos, len); 688 - 689 - if (alloc_required || gfs2_is_jdata(ip)) 690 - gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); 691 - 692 - if (alloc_required) { 693 - struct gfs2_alloc_parms ap = { .aflags = 0, }; 694 - requested = data_blocks + ind_blocks; 695 - ap.target = requested; 696 - error = gfs2_quota_lock_check(ip, &ap); 697 - if (error) 698 - goto out_unlock; 699 - 700 - error = gfs2_inplace_reserve(ip, &ap); 701 - if (error) 702 - goto out_qunlock; 703 - } 704 - 705 - rblocks = RES_DINODE + ind_blocks; 706 - if (gfs2_is_jdata(ip)) 707 - rblocks += data_blocks ? data_blocks : 1; 708 - if (ind_blocks || data_blocks) 709 - rblocks += RES_STATFS + RES_QUOTA; 710 - if (&ip->i_inode == sdp->sd_rindex) 711 - rblocks += 2 * RES_STATFS; 712 - if (alloc_required) 713 - rblocks += gfs2_rg_blocks(ip, requested); 714 - 715 - error = gfs2_trans_begin(sdp, rblocks, 716 - PAGE_SIZE/sdp->sd_sb.sb_bsize); 717 - if (error) 718 - goto out_trans_fail; 719 - 720 - error = -ENOMEM; 721 - flags |= AOP_FLAG_NOFS; 722 - page = grab_cache_page_write_begin(mapping, index, flags); 723 - *pagep = page; 724 - if (unlikely(!page)) 725 - goto out_endtrans; 726 - 727 - if (gfs2_is_stuffed(ip)) { 728 - error = 0; 729 - if (pos + len > gfs2_max_stuffed_size(ip)) { 730 - error = gfs2_unstuff_dinode(ip, page); 731 - if (error == 0) 732 - goto prepare_write; 733 - } else if (!PageUptodate(page)) { 734 - error = stuffed_readpage(ip, page); 735 - } 736 - goto out; 737 - } 738 - 739 - prepare_write: 740 - error = __block_write_begin(page, from, len, gfs2_block_map); 741 - out: 742 - if (error == 0) 743 - return 0; 744 - 745 - unlock_page(page); 746 - put_page(page); 747 - 748 - gfs2_trans_end(sdp); 749 - if (alloc_required) { 750 - gfs2_inplace_release(ip); 751 - if (pos + len > ip->i_inode.i_size) 752 - gfs2_trim_blocks(&ip->i_inode); 753 - } 754 - goto out_qunlock; 755 - 756 - out_endtrans: 757 - gfs2_trans_end(sdp); 758 - out_trans_fail: 759 - if (alloc_required) 760 - gfs2_inplace_release(ip); 761 - out_qunlock: 762 - if (alloc_required) 763 - gfs2_quota_unlock(ip); 764 - out_unlock: 765 - if (&ip->i_inode == sdp->sd_rindex) { 766 - gfs2_glock_dq(&m_ip->i_gh); 767 - gfs2_holder_uninit(&m_ip->i_gh); 768 - } 769 - gfs2_glock_dq(&ip->i_gh); 770 - out_uninit: 771 - gfs2_holder_uninit(&ip->i_gh); 772 - return error; 773 - } 774 - 775 - /** 776 647 * adjust_fs_space - Adjusts the free space available due to gfs2_grow 777 648 * @inode: the rindex inode 778 649 */ 779 - static void adjust_fs_space(struct inode *inode) 650 + void adjust_fs_space(struct inode *inode) 780 651 { 781 652 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 782 653 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); ··· 693 822 * This copies the data from the page into the inode block after 694 823 * the inode data structure itself. 695 824 * 696 - * Returns: errno 825 + * Returns: copied bytes or errno 697 826 */ 698 - static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, 699 - loff_t pos, unsigned copied, 700 - struct page *page) 827 + int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, 828 + loff_t pos, unsigned copied, 829 + struct page *page) 701 830 { 702 831 struct gfs2_inode *ip = GFS2_I(inode); 703 832 u64 to = pos + copied; ··· 721 850 mark_inode_dirty(inode); 722 851 } 723 852 return copied; 724 - } 725 - 726 - /** 727 - * gfs2_write_end 728 - * @file: The file to write to 729 - * @mapping: The address space to write to 730 - * @pos: The file position 731 - * @len: The length of the data 732 - * @copied: How much was actually copied by the VFS 733 - * @page: The page that has been written 734 - * @fsdata: The fsdata (unused in GFS2) 735 - * 736 - * The main write_end function for GFS2. We just put our locking around the VFS 737 - * provided functions. 738 - * 739 - * Returns: errno 740 - */ 741 - 742 - static int gfs2_write_end(struct file *file, struct address_space *mapping, 743 - loff_t pos, unsigned len, unsigned copied, 744 - struct page *page, void *fsdata) 745 - { 746 - struct inode *inode = page->mapping->host; 747 - struct gfs2_inode *ip = GFS2_I(inode); 748 - struct gfs2_sbd *sdp = GFS2_SB(inode); 749 - struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 750 - struct buffer_head *dibh; 751 - int ret; 752 - struct gfs2_trans *tr = current->journal_info; 753 - BUG_ON(!tr); 754 - 755 - BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL); 756 - 757 - ret = gfs2_meta_inode_buffer(ip, &dibh); 758 - if (unlikely(ret)) 759 - goto out; 760 - 761 - if (gfs2_is_stuffed(ip)) { 762 - ret = gfs2_stuffed_write_end(inode, dibh, pos, copied, page); 763 - page = NULL; 764 - goto out2; 765 - } 766 - 767 - if (gfs2_is_jdata(ip)) 768 - gfs2_page_add_databufs(ip, page, pos & ~PAGE_MASK, len); 769 - else 770 - gfs2_ordered_add_inode(ip); 771 - 772 - ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); 773 - page = NULL; 774 - if (tr->tr_num_buf_new) 775 - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 776 - else 777 - gfs2_trans_add_meta(ip->i_gl, dibh); 778 - 779 - out2: 780 - if (inode == sdp->sd_rindex) { 781 - adjust_fs_space(inode); 782 - sdp->sd_rindex_uptodate = 0; 783 - } 784 - 785 - brelse(dibh); 786 - out: 787 - if (page) { 788 - unlock_page(page); 789 - put_page(page); 790 - } 791 - gfs2_trans_end(sdp); 792 - gfs2_inplace_release(ip); 793 - if (ip->i_qadata && ip->i_qadata->qa_qd_num) 794 - gfs2_quota_unlock(ip); 795 - if (inode == sdp->sd_rindex) { 796 - gfs2_glock_dq(&m_ip->i_gh); 797 - gfs2_holder_uninit(&m_ip->i_gh); 798 - } 799 - gfs2_glock_dq(&ip->i_gh); 800 - gfs2_holder_uninit(&ip->i_gh); 801 - return ret; 802 853 } 803 854 804 855 /** ··· 816 1023 } 817 1024 818 1025 /** 819 - * gfs2_ok_for_dio - check that dio is valid on this file 820 - * @ip: The inode 821 - * @offset: The offset at which we are reading or writing 822 - * 823 - * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) 824 - * 1 (to accept the i/o request) 825 - */ 826 - static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset) 827 - { 828 - /* 829 - * Should we return an error here? I can't see that O_DIRECT for 830 - * a stuffed file makes any sense. For now we'll silently fall 831 - * back to buffered I/O 832 - */ 833 - if (gfs2_is_stuffed(ip)) 834 - return 0; 835 - 836 - if (offset >= i_size_read(&ip->i_inode)) 837 - return 0; 838 - return 1; 839 - } 840 - 841 - 842 - 843 - static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 844 - { 845 - struct file *file = iocb->ki_filp; 846 - struct inode *inode = file->f_mapping->host; 847 - struct address_space *mapping = inode->i_mapping; 848 - struct gfs2_inode *ip = GFS2_I(inode); 849 - loff_t offset = iocb->ki_pos; 850 - struct gfs2_holder gh; 851 - int rv; 852 - 853 - /* 854 - * Deferred lock, even if its a write, since we do no allocation 855 - * on this path. All we need change is atime, and this lock mode 856 - * ensures that other nodes have flushed their buffered read caches 857 - * (i.e. their page cache entries for this inode). We do not, 858 - * unfortunately have the option of only flushing a range like 859 - * the VFS does. 860 - */ 861 - gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); 862 - rv = gfs2_glock_nq(&gh); 863 - if (rv) 864 - goto out_uninit; 865 - rv = gfs2_ok_for_dio(ip, offset); 866 - if (rv != 1) 867 - goto out; /* dio not valid, fall back to buffered i/o */ 868 - 869 - /* 870 - * Now since we are holding a deferred (CW) lock at this point, you 871 - * might be wondering why this is ever needed. There is a case however 872 - * where we've granted a deferred local lock against a cached exclusive 873 - * glock. That is ok provided all granted local locks are deferred, but 874 - * it also means that it is possible to encounter pages which are 875 - * cached and possibly also mapped. So here we check for that and sort 876 - * them out ahead of the dio. The glock state machine will take care of 877 - * everything else. 878 - * 879 - * If in fact the cached glock state (gl->gl_state) is deferred (CW) in 880 - * the first place, mapping->nr_pages will always be zero. 881 - */ 882 - if (mapping->nrpages) { 883 - loff_t lstart = offset & ~(PAGE_SIZE - 1); 884 - loff_t len = iov_iter_count(iter); 885 - loff_t end = PAGE_ALIGN(offset + len) - 1; 886 - 887 - rv = 0; 888 - if (len == 0) 889 - goto out; 890 - if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) 891 - unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); 892 - rv = filemap_write_and_wait_range(mapping, lstart, end); 893 - if (rv) 894 - goto out; 895 - if (iov_iter_rw(iter) == WRITE) 896 - truncate_inode_pages_range(mapping, lstart, end); 897 - } 898 - 899 - rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, 900 - gfs2_get_block_direct, NULL, NULL, 0); 901 - out: 902 - gfs2_glock_dq(&gh); 903 - out_uninit: 904 - gfs2_holder_uninit(&gh); 905 - return rv; 906 - } 907 - 908 - /** 909 1026 * gfs2_releasepage - free the metadata associated with a page 910 1027 * @page: the page that's being released 911 1028 * @gfp_mask: passed from Linux VFS, ignored by us ··· 890 1187 .writepages = gfs2_writepages, 891 1188 .readpage = gfs2_readpage, 892 1189 .readpages = gfs2_readpages, 893 - .write_begin = gfs2_write_begin, 894 - .write_end = gfs2_write_end, 895 1190 .bmap = gfs2_bmap, 896 1191 .invalidatepage = gfs2_invalidatepage, 897 1192 .releasepage = gfs2_releasepage, 898 - .direct_IO = gfs2_direct_IO, 1193 + .direct_IO = noop_direct_IO, 899 1194 .migratepage = buffer_migrate_page, 900 1195 .is_partially_uptodate = block_is_partially_uptodate, 901 1196 .error_remove_page = generic_error_remove_page, ··· 904 1203 .writepages = gfs2_writepages, 905 1204 .readpage = gfs2_readpage, 906 1205 .readpages = gfs2_readpages, 907 - .write_begin = gfs2_write_begin, 908 - .write_end = gfs2_write_end, 909 1206 .set_page_dirty = __set_page_dirty_buffers, 910 1207 .bmap = gfs2_bmap, 911 1208 .invalidatepage = gfs2_invalidatepage, 912 1209 .releasepage = gfs2_releasepage, 913 - .direct_IO = gfs2_direct_IO, 1210 + .direct_IO = noop_direct_IO, 914 1211 .migratepage = buffer_migrate_page, 915 1212 .is_partially_uptodate = block_is_partially_uptodate, 916 1213 .error_remove_page = generic_error_remove_page, ··· 919 1220 .writepages = gfs2_jdata_writepages, 920 1221 .readpage = gfs2_readpage, 921 1222 .readpages = gfs2_readpages, 922 - .write_begin = gfs2_write_begin, 923 - .write_end = gfs2_write_end, 924 1223 .set_page_dirty = jdata_set_page_dirty, 925 1224 .bmap = gfs2_bmap, 926 1225 .invalidatepage = gfs2_invalidatepage,

+19

fs/gfs2/aops.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (C) 2018 Red Hat, Inc. All rights reserved. 4 + */ 5 + 6 + #ifndef __AOPS_DOT_H__ 7 + #define __AOPS_DOT_H__ 8 + 9 + #include "incore.h" 10 + 11 + extern int stuffed_readpage(struct gfs2_inode *ip, struct page *page); 12 + extern int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, 13 + loff_t pos, unsigned copied, 14 + struct page *page); 15 + extern void adjust_fs_space(struct inode *inode); 16 + extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, 17 + unsigned int from, unsigned int len); 18 + 19 + #endif /* __AOPS_DOT_H__ */

+335 -79

fs/gfs2/bmap.c

··· 28 28 #include "trans.h" 29 29 #include "dir.h" 30 30 #include "util.h" 31 + #include "aops.h" 31 32 #include "trace_gfs2.h" 32 33 33 34 /* This doesn't need to be that large as max 64 bit pointers in a 4k ··· 41 40 int mp_fheight; /* find_metapath height */ 42 41 int mp_aheight; /* actual height (lookup height) */ 43 42 }; 43 + 44 + static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length); 44 45 45 46 /** 46 47 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page ··· 392 389 return mp->mp_aheight - x - 1; 393 390 } 394 391 395 - static inline void release_metapath(struct metapath *mp) 392 + static void release_metapath(struct metapath *mp) 396 393 { 397 394 int i; 398 395 ··· 400 397 if (mp->mp_bh[i] == NULL) 401 398 break; 402 399 brelse(mp->mp_bh[i]); 400 + mp->mp_bh[i] = NULL; 403 401 } 404 402 } 405 403 406 404 /** 407 405 * gfs2_extent_length - Returns length of an extent of blocks 408 - * @start: Start of the buffer 409 - * @len: Length of the buffer in bytes 410 - * @ptr: Current position in the buffer 411 - * @limit: Max extent length to return (0 = unlimited) 406 + * @bh: The metadata block 407 + * @ptr: Current position in @bh 408 + * @limit: Max extent length to return 412 409 * @eob: Set to 1 if we hit "end of block" 413 - * 414 - * If the first block is zero (unallocated) it will return the number of 415 - * unallocated blocks in the extent, otherwise it will return the number 416 - * of contiguous blocks in the extent. 417 410 * 418 411 * Returns: The length of the extent (minimum of one block) 419 412 */ 420 413 421 - static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob) 414 + static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob) 422 415 { 423 - const __be64 *end = (start + len); 416 + const __be64 *end = (__be64 *)(bh->b_data + bh->b_size); 424 417 const __be64 *first = ptr; 425 418 u64 d = be64_to_cpu(*ptr); 426 419 ··· 425 426 ptr++; 426 427 if (ptr >= end) 427 428 break; 428 - if (limit && --limit == 0) 429 - break; 430 - if (d) 431 - d++; 429 + d++; 432 430 } while(be64_to_cpu(*ptr) == d); 433 431 if (ptr >= end) 434 432 *eob = 1; 435 - return (ptr - first); 433 + return ptr - first; 436 434 } 437 435 438 436 typedef const __be64 *(*gfs2_metadata_walker)( ··· 605 609 * ii) Indirect blocks to fill in lower part of the metadata tree 606 610 * iii) Data blocks 607 611 * 608 - * The function is in two parts. The first part works out the total 609 - * number of blocks which we need. The second part does the actual 610 - * allocation asking for an extent at a time (if enough contiguous free 611 - * blocks are available, there will only be one request per bmap call) 612 - * and uses the state machine to initialise the blocks in order. 612 + * This function is called after gfs2_iomap_get, which works out the 613 + * total number of blocks which we need via gfs2_alloc_size. 614 + * 615 + * We then do the actual allocation asking for an extent at a time (if 616 + * enough contiguous free blocks are available, there will only be one 617 + * allocation request per call) and uses the state machine to initialise 618 + * the blocks in order. 613 619 * 614 620 * Right now, this function will allocate at most one indirect block 615 621 * worth of data -- with a default block size of 4K, that's slightly ··· 631 633 struct buffer_head *dibh = mp->mp_bh[0]; 632 634 u64 bn; 633 635 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; 634 - unsigned dblks = 0; 635 - unsigned ptrs_per_blk; 636 + size_t dblks = iomap->length >> inode->i_blkbits; 636 637 const unsigned end_of_metadata = mp->mp_fheight - 1; 637 638 int ret; 638 639 enum alloc_state state; 639 640 __be64 *ptr; 640 641 __be64 zero_bn = 0; 641 - size_t maxlen = iomap->length >> inode->i_blkbits; 642 642 643 643 BUG_ON(mp->mp_aheight < 1); 644 644 BUG_ON(dibh == NULL); 645 + BUG_ON(dblks < 1); 645 646 646 647 gfs2_trans_add_meta(ip->i_gl, dibh); 647 648 648 649 down_write(&ip->i_rw_mutex); 649 650 650 651 if (mp->mp_fheight == mp->mp_aheight) { 651 - struct buffer_head *bh; 652 - int eob; 653 - 654 - /* Bottom indirect block exists, find unalloced extent size */ 655 - ptr = metapointer(end_of_metadata, mp); 656 - bh = mp->mp_bh[end_of_metadata]; 657 - dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, 658 - maxlen, &eob); 659 - BUG_ON(dblks < 1); 652 + /* Bottom indirect block exists */ 660 653 state = ALLOC_DATA; 661 654 } else { 662 655 /* Need to allocate indirect blocks */ 663 - ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs : 664 - sdp->sd_diptrs; 665 - dblks = min(maxlen, (size_t)(ptrs_per_blk - 666 - mp->mp_list[end_of_metadata])); 667 656 if (mp->mp_fheight == ip->i_height) { 668 657 /* Writing into existing tree, extend tree down */ 669 658 iblks = mp->mp_fheight - mp->mp_aheight; ··· 735 750 } 736 751 } while (iomap->addr == IOMAP_NULL_ADDR); 737 752 753 + iomap->type = IOMAP_MAPPED; 738 754 iomap->length = (u64)dblks << inode->i_blkbits; 739 755 ip->i_height = mp->mp_fheight; 740 756 gfs2_add_inode_blocks(&ip->i_inode, alloced); ··· 745 759 return ret; 746 760 } 747 761 748 - static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap) 762 + #define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE 763 + 764 + /** 765 + * gfs2_alloc_size - Compute the maximum allocation size 766 + * @inode: The inode 767 + * @mp: The metapath 768 + * @size: Requested size in blocks 769 + * 770 + * Compute the maximum size of the next allocation at @mp. 771 + * 772 + * Returns: size in blocks 773 + */ 774 + static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size) 749 775 { 750 776 struct gfs2_inode *ip = GFS2_I(inode); 777 + struct gfs2_sbd *sdp = GFS2_SB(inode); 778 + const __be64 *first, *ptr, *end; 751 779 752 - iomap->addr = (ip->i_no_addr << inode->i_blkbits) + 753 - sizeof(struct gfs2_dinode); 754 - iomap->offset = 0; 755 - iomap->length = i_size_read(inode); 756 - iomap->type = IOMAP_INLINE; 780 + /* 781 + * For writes to stuffed files, this function is called twice via 782 + * gfs2_iomap_get, before and after unstuffing. The size we return the 783 + * first time needs to be large enough to get the reservation and 784 + * allocation sizes right. The size we return the second time must 785 + * be exact or else gfs2_iomap_alloc won't do the right thing. 786 + */ 787 + 788 + if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) { 789 + unsigned int maxsize = mp->mp_fheight > 1 ? 790 + sdp->sd_inptrs : sdp->sd_diptrs; 791 + maxsize -= mp->mp_list[mp->mp_fheight - 1]; 792 + if (size > maxsize) 793 + size = maxsize; 794 + return size; 795 + } 796 + 797 + first = metapointer(ip->i_height - 1, mp); 798 + end = metaend(ip->i_height - 1, mp); 799 + if (end - first > size) 800 + end = first + size; 801 + for (ptr = first; ptr < end; ptr++) { 802 + if (*ptr) 803 + break; 804 + } 805 + return ptr - first; 757 806 } 758 - 759 - #define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE 760 807 761 808 /** 762 809 * gfs2_iomap_get - Map blocks from an inode to disk blocks ··· 808 789 { 809 790 struct gfs2_inode *ip = GFS2_I(inode); 810 791 struct gfs2_sbd *sdp = GFS2_SB(inode); 792 + loff_t size = i_size_read(inode); 811 793 __be64 *ptr; 812 794 sector_t lblock; 813 795 sector_t lblock_stop; 814 796 int ret; 815 797 int eob; 816 798 u64 len; 817 - struct buffer_head *bh; 799 + struct buffer_head *dibh = NULL, *bh; 818 800 u8 height; 819 801 820 802 if (!length) 821 803 return -EINVAL; 822 804 805 + down_read(&ip->i_rw_mutex); 806 + 807 + ret = gfs2_meta_inode_buffer(ip, &dibh); 808 + if (ret) 809 + goto unlock; 810 + iomap->private = dibh; 811 + 823 812 if (gfs2_is_stuffed(ip)) { 824 - if (flags & IOMAP_REPORT) { 825 - if (pos >= i_size_read(inode)) 826 - return -ENOENT; 827 - gfs2_stuffed_iomap(inode, iomap); 828 - return 0; 813 + if (flags & IOMAP_WRITE) { 814 + loff_t max_size = gfs2_max_stuffed_size(ip); 815 + 816 + if (pos + length > max_size) 817 + goto unstuff; 818 + iomap->length = max_size; 819 + } else { 820 + if (pos >= size) { 821 + if (flags & IOMAP_REPORT) { 822 + ret = -ENOENT; 823 + goto unlock; 824 + } else { 825 + /* report a hole */ 826 + iomap->offset = pos; 827 + iomap->length = length; 828 + goto do_alloc; 829 + } 830 + } 831 + iomap->length = size; 829 832 } 830 - BUG_ON(!(flags & IOMAP_WRITE)); 833 + iomap->addr = (ip->i_no_addr << inode->i_blkbits) + 834 + sizeof(struct gfs2_dinode); 835 + iomap->type = IOMAP_INLINE; 836 + iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode); 837 + goto out; 831 838 } 839 + 840 + unstuff: 832 841 lblock = pos >> inode->i_blkbits; 833 842 iomap->offset = lblock << inode->i_blkbits; 834 843 lblock_stop = (pos + length - 1) >> inode->i_blkbits; 835 844 len = lblock_stop - lblock + 1; 845 + iomap->length = len << inode->i_blkbits; 836 846 837 - down_read(&ip->i_rw_mutex); 838 - 839 - ret = gfs2_meta_inode_buffer(ip, &mp->mp_bh[0]); 840 - if (ret) 841 - goto unlock; 847 + get_bh(dibh); 848 + mp->mp_bh[0] = dibh; 842 849 843 850 height = ip->i_height; 844 851 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height]) ··· 885 840 goto do_alloc; 886 841 887 842 bh = mp->mp_bh[ip->i_height - 1]; 888 - len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, len, &eob); 843 + len = gfs2_extent_length(bh, ptr, len, &eob); 889 844 890 845 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits; 891 846 iomap->length = len << inode->i_blkbits; 892 847 iomap->type = IOMAP_MAPPED; 893 - iomap->flags = IOMAP_F_MERGED; 848 + iomap->flags |= IOMAP_F_MERGED; 894 849 if (eob) 895 850 iomap->flags |= IOMAP_F_GFS2_BOUNDARY; 896 851 ··· 898 853 iomap->bdev = inode->i_sb->s_bdev; 899 854 unlock: 900 855 up_read(&ip->i_rw_mutex); 856 + if (ret && dibh) 857 + brelse(dibh); 901 858 return ret; 902 859 903 860 do_alloc: 904 861 iomap->addr = IOMAP_NULL_ADDR; 905 - iomap->length = len << inode->i_blkbits; 906 862 iomap->type = IOMAP_HOLE; 907 - iomap->flags = 0; 908 863 if (flags & IOMAP_REPORT) { 909 - loff_t size = i_size_read(inode); 910 864 if (pos >= size) 911 865 ret = -ENOENT; 912 866 else if (height == ip->i_height) 913 867 ret = gfs2_hole_size(inode, lblock, len, mp, iomap); 914 868 else 915 869 iomap->length = size - pos; 870 + } else if (flags & IOMAP_WRITE) { 871 + u64 alloc_size; 872 + 873 + if (flags & IOMAP_DIRECT) 874 + goto out; /* (see gfs2_file_direct_write) */ 875 + 876 + len = gfs2_alloc_size(inode, mp, len); 877 + alloc_size = len << inode->i_blkbits; 878 + if (alloc_size < iomap->length) 879 + iomap->length = alloc_size; 880 + } else { 881 + if (pos < size && height == ip->i_height) 882 + ret = gfs2_hole_size(inode, lblock, len, mp, iomap); 916 883 } 917 884 goto out; 885 + } 886 + 887 + static int gfs2_write_lock(struct inode *inode) 888 + { 889 + struct gfs2_inode *ip = GFS2_I(inode); 890 + struct gfs2_sbd *sdp = GFS2_SB(inode); 891 + int error; 892 + 893 + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 894 + error = gfs2_glock_nq(&ip->i_gh); 895 + if (error) 896 + goto out_uninit; 897 + if (&ip->i_inode == sdp->sd_rindex) { 898 + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 899 + 900 + error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 901 + GL_NOCACHE, &m_ip->i_gh); 902 + if (error) 903 + goto out_unlock; 904 + } 905 + return 0; 906 + 907 + out_unlock: 908 + gfs2_glock_dq(&ip->i_gh); 909 + out_uninit: 910 + gfs2_holder_uninit(&ip->i_gh); 911 + return error; 912 + } 913 + 914 + static void gfs2_write_unlock(struct inode *inode) 915 + { 916 + struct gfs2_inode *ip = GFS2_I(inode); 917 + struct gfs2_sbd *sdp = GFS2_SB(inode); 918 + 919 + if (&ip->i_inode == sdp->sd_rindex) { 920 + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 921 + 922 + gfs2_glock_dq_uninit(&m_ip->i_gh); 923 + } 924 + gfs2_glock_dq_uninit(&ip->i_gh); 925 + } 926 + 927 + static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos, 928 + unsigned copied, struct page *page, 929 + struct iomap *iomap) 930 + { 931 + struct gfs2_inode *ip = GFS2_I(inode); 932 + 933 + gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied); 934 + } 935 + 936 + static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, 937 + loff_t length, unsigned flags, 938 + struct iomap *iomap) 939 + { 940 + struct metapath mp = { .mp_aheight = 1, }; 941 + struct gfs2_inode *ip = GFS2_I(inode); 942 + struct gfs2_sbd *sdp = GFS2_SB(inode); 943 + unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 944 + bool unstuff, alloc_required; 945 + int ret; 946 + 947 + ret = gfs2_write_lock(inode); 948 + if (ret) 949 + return ret; 950 + 951 + unstuff = gfs2_is_stuffed(ip) && 952 + pos + length > gfs2_max_stuffed_size(ip); 953 + 954 + ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); 955 + if (ret) 956 + goto out_release; 957 + 958 + alloc_required = unstuff || iomap->type == IOMAP_HOLE; 959 + 960 + if (alloc_required || gfs2_is_jdata(ip)) 961 + gfs2_write_calc_reserv(ip, iomap->length, &data_blocks, 962 + &ind_blocks); 963 + 964 + if (alloc_required) { 965 + struct gfs2_alloc_parms ap = { 966 + .target = data_blocks + ind_blocks 967 + }; 968 + 969 + ret = gfs2_quota_lock_check(ip, &ap); 970 + if (ret) 971 + goto out_release; 972 + 973 + ret = gfs2_inplace_reserve(ip, &ap); 974 + if (ret) 975 + goto out_qunlock; 976 + } 977 + 978 + rblocks = RES_DINODE + ind_blocks; 979 + if (gfs2_is_jdata(ip)) 980 + rblocks += data_blocks; 981 + if (ind_blocks || data_blocks) 982 + rblocks += RES_STATFS + RES_QUOTA; 983 + if (inode == sdp->sd_rindex) 984 + rblocks += 2 * RES_STATFS; 985 + if (alloc_required) 986 + rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); 987 + 988 + ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits); 989 + if (ret) 990 + goto out_trans_fail; 991 + 992 + if (unstuff) { 993 + ret = gfs2_unstuff_dinode(ip, NULL); 994 + if (ret) 995 + goto out_trans_end; 996 + release_metapath(&mp); 997 + brelse(iomap->private); 998 + iomap->private = NULL; 999 + ret = gfs2_iomap_get(inode, iomap->offset, iomap->length, 1000 + flags, iomap, &mp); 1001 + if (ret) 1002 + goto out_trans_end; 1003 + } 1004 + 1005 + if (iomap->type == IOMAP_HOLE) { 1006 + ret = gfs2_iomap_alloc(inode, iomap, flags, &mp); 1007 + if (ret) { 1008 + gfs2_trans_end(sdp); 1009 + gfs2_inplace_release(ip); 1010 + punch_hole(ip, iomap->offset, iomap->length); 1011 + goto out_qunlock; 1012 + } 1013 + } 1014 + release_metapath(&mp); 1015 + if (gfs2_is_jdata(ip)) 1016 + iomap->page_done = gfs2_iomap_journaled_page_done; 1017 + return 0; 1018 + 1019 + out_trans_end: 1020 + gfs2_trans_end(sdp); 1021 + out_trans_fail: 1022 + if (alloc_required) 1023 + gfs2_inplace_release(ip); 1024 + out_qunlock: 1025 + if (alloc_required) 1026 + gfs2_quota_unlock(ip); 1027 + out_release: 1028 + if (iomap->private) 1029 + brelse(iomap->private); 1030 + release_metapath(&mp); 1031 + gfs2_write_unlock(inode); 1032 + return ret; 918 1033 } 919 1034 920 1035 static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, ··· 1084 879 struct metapath mp = { .mp_aheight = 1, }; 1085 880 int ret; 1086 881 882 + iomap->flags |= IOMAP_F_BUFFER_HEAD; 883 + 1087 884 trace_gfs2_iomap_start(ip, pos, length, flags); 1088 - if (flags & IOMAP_WRITE) { 1089 - ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); 1090 - if (!ret && iomap->type == IOMAP_HOLE) 1091 - ret = gfs2_iomap_alloc(inode, iomap, flags, &mp); 1092 - release_metapath(&mp); 885 + if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) { 886 + ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap); 1093 887 } else { 1094 888 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); 1095 889 release_metapath(&mp); 890 + /* 891 + * Silently fall back to buffered I/O for stuffed files or if 892 + * we've hot a hole (see gfs2_file_direct_write). 893 + */ 894 + if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) && 895 + iomap->type != IOMAP_MAPPED) 896 + ret = -ENOTBLK; 1096 897 } 1097 898 trace_gfs2_iomap_end(ip, iomap, ret); 1098 899 return ret; 1099 900 } 1100 901 902 + static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, 903 + ssize_t written, unsigned flags, struct iomap *iomap) 904 + { 905 + struct gfs2_inode *ip = GFS2_I(inode); 906 + struct gfs2_sbd *sdp = GFS2_SB(inode); 907 + struct gfs2_trans *tr = current->journal_info; 908 + struct buffer_head *dibh = iomap->private; 909 + 910 + if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE) 911 + goto out; 912 + 913 + if (iomap->type != IOMAP_INLINE) { 914 + gfs2_ordered_add_inode(ip); 915 + 916 + if (tr->tr_num_buf_new) 917 + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 918 + else 919 + gfs2_trans_add_meta(ip->i_gl, dibh); 920 + } 921 + 922 + if (inode == sdp->sd_rindex) { 923 + adjust_fs_space(inode); 924 + sdp->sd_rindex_uptodate = 0; 925 + } 926 + 927 + gfs2_trans_end(sdp); 928 + gfs2_inplace_release(ip); 929 + 930 + if (length != written && (iomap->flags & IOMAP_F_NEW)) { 931 + /* Deallocate blocks that were just allocated. */ 932 + loff_t blockmask = i_blocksize(inode) - 1; 933 + loff_t end = (pos + length) & ~blockmask; 934 + 935 + pos = (pos + written + blockmask) & ~blockmask; 936 + if (pos < end) { 937 + truncate_pagecache_range(inode, pos, end - 1); 938 + punch_hole(ip, pos, end - pos); 939 + } 940 + } 941 + 942 + if (ip->i_qadata && ip->i_qadata->qa_qd_num) 943 + gfs2_quota_unlock(ip); 944 + gfs2_write_unlock(inode); 945 + 946 + out: 947 + if (dibh) 948 + brelse(dibh); 949 + return 0; 950 + } 951 + 1101 952 const struct iomap_ops gfs2_iomap_ops = { 1102 953 .iomap_begin = gfs2_iomap_begin, 954 + .iomap_end = gfs2_iomap_end, 1103 955 }; 1104 956 1105 957 /** ··· 1203 941 } else { 1204 942 ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp); 1205 943 release_metapath(&mp); 1206 - 1207 - /* Return unmapped buffer beyond the end of file. */ 1208 - if (ret == -ENOENT) { 1209 - ret = 0; 1210 - goto out; 1211 - } 1212 944 } 1213 945 if (ret) 1214 946 goto out; ··· 2316 2060 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; 2317 2061 lblock = offset >> shift; 2318 2062 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; 2319 - if (lblock_stop > end_of_file) 2063 + if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex)) 2320 2064 return 1; 2321 2065 2322 2066 size = (lblock_stop - lblock) << shift; ··· 2410 2154 if (error) 2411 2155 goto out; 2412 2156 } else { 2413 - unsigned int start_off, end_off, blocksize; 2157 + unsigned int start_off, end_len, blocksize; 2414 2158 2415 2159 blocksize = i_blocksize(inode); 2416 2160 start_off = offset & (blocksize - 1); 2417 - end_off = (offset + length) & (blocksize - 1); 2161 + end_len = (offset + length) & (blocksize - 1); 2418 2162 if (start_off) { 2419 2163 unsigned int len = length; 2420 2164 if (length > blocksize - start_off) ··· 2423 2167 if (error) 2424 2168 goto out; 2425 2169 if (start_off + length < blocksize) 2426 - end_off = 0; 2170 + end_len = 0; 2427 2171 } 2428 - if (end_off) { 2172 + if (end_len) { 2429 2173 error = gfs2_block_zero_range(inode, 2430 - offset + length - end_off, end_off); 2174 + offset + length - end_len, end_len); 2431 2175 if (error) 2432 2176 goto out; 2433 2177 }

+1 -3

fs/gfs2/dir.c

··· 1011 1011 u64 bn, leaf_no; 1012 1012 __be64 *lp; 1013 1013 u32 index; 1014 - int x, moved = 0; 1014 + int x; 1015 1015 int error; 1016 1016 1017 1017 index = name->hash >> (32 - dip->i_depth); ··· 1113 1113 1114 1114 if (!prev) 1115 1115 prev = dent; 1116 - 1117 - moved = 1; 1118 1116 } else { 1119 1117 prev = dent; 1120 1118 }

+150 -11

fs/gfs2/file.c

··· 26 26 #include <linux/dlm.h> 27 27 #include <linux/dlm_plock.h> 28 28 #include <linux/delay.h> 29 + #include <linux/backing-dev.h> 29 30 30 31 #include "gfs2.h" 31 32 #include "incore.h" 32 33 #include "bmap.h" 34 + #include "aops.h" 33 35 #include "dir.h" 34 36 #include "glock.h" 35 37 #include "glops.h" ··· 389 387 * blocks allocated on disk to back that page. 390 388 */ 391 389 392 - static int gfs2_page_mkwrite(struct vm_fault *vmf) 390 + static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) 393 391 { 394 392 struct page *page = vmf->page; 395 393 struct inode *inode = file_inode(vmf->vma->vm_file); ··· 690 688 return ret ? ret : ret1; 691 689 } 692 690 691 + static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to) 692 + { 693 + struct file *file = iocb->ki_filp; 694 + struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 695 + size_t count = iov_iter_count(to); 696 + struct gfs2_holder gh; 697 + ssize_t ret; 698 + 699 + if (!count) 700 + return 0; /* skip atime */ 701 + 702 + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); 703 + ret = gfs2_glock_nq(&gh); 704 + if (ret) 705 + goto out_uninit; 706 + 707 + ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL); 708 + 709 + gfs2_glock_dq(&gh); 710 + out_uninit: 711 + gfs2_holder_uninit(&gh); 712 + return ret; 713 + } 714 + 715 + static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from) 716 + { 717 + struct file *file = iocb->ki_filp; 718 + struct inode *inode = file->f_mapping->host; 719 + struct gfs2_inode *ip = GFS2_I(inode); 720 + size_t len = iov_iter_count(from); 721 + loff_t offset = iocb->ki_pos; 722 + struct gfs2_holder gh; 723 + ssize_t ret; 724 + 725 + /* 726 + * Deferred lock, even if its a write, since we do no allocation on 727 + * this path. All we need to change is the atime, and this lock mode 728 + * ensures that other nodes have flushed their buffered read caches 729 + * (i.e. their page cache entries for this inode). We do not, 730 + * unfortunately, have the option of only flushing a range like the 731 + * VFS does. 732 + */ 733 + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); 734 + ret = gfs2_glock_nq(&gh); 735 + if (ret) 736 + goto out_uninit; 737 + 738 + /* Silently fall back to buffered I/O when writing beyond EOF */ 739 + if (offset + len > i_size_read(&ip->i_inode)) 740 + goto out; 741 + 742 + ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL); 743 + 744 + out: 745 + gfs2_glock_dq(&gh); 746 + out_uninit: 747 + gfs2_holder_uninit(&gh); 748 + return ret; 749 + } 750 + 751 + static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 752 + { 753 + ssize_t ret; 754 + 755 + if (iocb->ki_flags & IOCB_DIRECT) { 756 + ret = gfs2_file_direct_read(iocb, to); 757 + if (likely(ret != -ENOTBLK)) 758 + return ret; 759 + iocb->ki_flags &= ~IOCB_DIRECT; 760 + } 761 + return generic_file_read_iter(iocb, to); 762 + } 763 + 693 764 /** 694 765 * gfs2_file_write_iter - Perform a write to a file 695 766 * @iocb: The io context 696 - * @iov: The data to write 697 - * @nr_segs: Number of @iov segments 698 - * @pos: The file position 767 + * @from: The data to write 699 768 * 700 769 * We have to do a lock/unlock here to refresh the inode size for 701 770 * O_APPEND writes, otherwise we can land up writing at the wrong ··· 778 705 static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 779 706 { 780 707 struct file *file = iocb->ki_filp; 781 - struct gfs2_inode *ip = GFS2_I(file_inode(file)); 782 - int ret; 708 + struct inode *inode = file_inode(file); 709 + struct gfs2_inode *ip = GFS2_I(inode); 710 + ssize_t written = 0, ret; 783 711 784 712 ret = gfs2_rsqa_alloc(ip); 785 713 if (ret) ··· 797 723 gfs2_glock_dq_uninit(&gh); 798 724 } 799 725 800 - return generic_file_write_iter(iocb, from); 726 + inode_lock(inode); 727 + ret = generic_write_checks(iocb, from); 728 + if (ret <= 0) 729 + goto out; 730 + 731 + /* We can write back this queue in page reclaim */ 732 + current->backing_dev_info = inode_to_bdi(inode); 733 + 734 + ret = file_remove_privs(file); 735 + if (ret) 736 + goto out2; 737 + 738 + ret = file_update_time(file); 739 + if (ret) 740 + goto out2; 741 + 742 + if (iocb->ki_flags & IOCB_DIRECT) { 743 + struct address_space *mapping = file->f_mapping; 744 + loff_t pos, endbyte; 745 + ssize_t buffered; 746 + 747 + written = gfs2_file_direct_write(iocb, from); 748 + if (written < 0 || !iov_iter_count(from)) 749 + goto out2; 750 + 751 + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); 752 + if (unlikely(ret < 0)) 753 + goto out2; 754 + buffered = ret; 755 + 756 + /* 757 + * We need to ensure that the page cache pages are written to 758 + * disk and invalidated to preserve the expected O_DIRECT 759 + * semantics. 760 + */ 761 + pos = iocb->ki_pos; 762 + endbyte = pos + buffered - 1; 763 + ret = filemap_write_and_wait_range(mapping, pos, endbyte); 764 + if (!ret) { 765 + iocb->ki_pos += buffered; 766 + written += buffered; 767 + invalidate_mapping_pages(mapping, 768 + pos >> PAGE_SHIFT, 769 + endbyte >> PAGE_SHIFT); 770 + } else { 771 + /* 772 + * We don't know how much we wrote, so just return 773 + * the number of bytes which were direct-written 774 + */ 775 + } 776 + } else { 777 + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); 778 + if (likely(ret > 0)) 779 + iocb->ki_pos += ret; 780 + } 781 + 782 + out2: 783 + current->backing_dev_info = NULL; 784 + out: 785 + inode_unlock(inode); 786 + if (likely(ret > 0)) { 787 + /* Handle various SYNC-type writes */ 788 + ret = generic_write_sync(iocb, ret); 789 + } 790 + return written ? written : ret; 801 791 } 802 792 803 793 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, ··· 871 733 struct gfs2_inode *ip = GFS2_I(inode); 872 734 loff_t end = offset + len; 873 735 struct buffer_head *dibh; 874 - struct iomap iomap = { }; 875 736 int error; 876 737 877 738 error = gfs2_meta_inode_buffer(ip, &dibh); ··· 886 749 } 887 750 888 751 while (offset < end) { 752 + struct iomap iomap = { }; 753 + 889 754 error = gfs2_iomap_get_alloc(inode, offset, end - offset, 890 755 &iomap); 891 756 if (error) 892 757 goto out; 893 758 offset = iomap.offset + iomap.length; 894 - if (iomap.type != IOMAP_HOLE) 759 + if (!(iomap.flags & IOMAP_F_NEW)) 895 760 continue; 896 761 error = sb_issue_zeroout(sb, iomap.addr >> inode->i_blkbits, 897 762 iomap.length >> inode->i_blkbits, ··· 1264 1125 1265 1126 const struct file_operations gfs2_file_fops = { 1266 1127 .llseek = gfs2_llseek, 1267 - .read_iter = generic_file_read_iter, 1128 + .read_iter = gfs2_file_read_iter, 1268 1129 .write_iter = gfs2_file_write_iter, 1269 1130 .unlocked_ioctl = gfs2_ioctl, 1270 1131 .mmap = gfs2_mmap, ··· 1294 1155 1295 1156 const struct file_operations gfs2_file_fops_nolock = { 1296 1157 .llseek = gfs2_llseek, 1297 - .read_iter = generic_file_read_iter, 1158 + .read_iter = gfs2_file_read_iter, 1298 1159 .write_iter = gfs2_file_write_iter, 1299 1160 .unlocked_ioctl = gfs2_ioctl, 1300 1161 .mmap = gfs2_mmap,

+21 -2

fs/gfs2/incore.h

··· 65 65 66 66 #define GBF_FULL 1 67 67 68 + /** 69 + * Clone bitmaps (bi_clone): 70 + * 71 + * - When a block is freed, we remember the previous state of the block in the 72 + * clone bitmap, and only mark the block as free in the real bitmap. 73 + * 74 + * - When looking for a block to allocate, we check for a free block in the 75 + * clone bitmap, and if no clone bitmap exists, in the real bitmap. 76 + * 77 + * - For allocating a block, we mark it as allocated in the real bitmap, and if 78 + * a clone bitmap exists, also in the clone bitmap. 79 + * 80 + * - At the end of a log_flush, we copy the real bitmap into the clone bitmap 81 + * to make the clone bitmap reflect the current allocation state. 82 + * (Alternatively, we could remove the clone bitmap.) 83 + * 84 + * The clone bitmaps are in-core only, and is never written to disk. 85 + * 86 + * These steps ensure that blocks which have been freed in a transaction cannot 87 + * be reallocated in that same transaction. 88 + */ 68 89 struct gfs2_bitmap { 69 90 struct buffer_head *bi_bh; 70 91 char *bi_clone; ··· 316 295 struct rb_node rs_node; /* link to other block reservations */ 317 296 struct gfs2_rbm rs_rbm; /* Start of reservation */ 318 297 u32 rs_free; /* how many blocks are still free */ 319 - u64 rs_inum; /* Inode number for reservation */ 320 298 }; 321 299 322 300 /* ··· 418 398 struct gfs2_holder i_gh; /* for prepare/commit_write only */ 419 399 struct gfs2_qadata *i_qadata; /* quota allocation data */ 420 400 struct gfs2_blkreserv i_res; /* rgrp multi-block reservation */ 421 - struct gfs2_rgrpd *i_rgd; 422 401 u64 i_goal; /* goal block for allocations */ 423 402 struct rw_semaphore i_rw_mutex; 424 403 struct list_head i_ordered;

+17 -3

fs/gfs2/lock_dlm.c

··· 821 821 goto fail; 822 822 } 823 823 824 + /** 825 + * If we're a spectator, we don't want to take the lock in EX because 826 + * we cannot do the first-mount responsibility it implies: recovery. 827 + */ 828 + if (sdp->sd_args.ar_spectator) 829 + goto locks_done; 830 + 824 831 error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); 825 832 if (!error) { 826 833 mounted_mode = DLM_LOCK_EX; ··· 903 896 if (lvb_gen < mount_gen) { 904 897 /* wait for mounted nodes to update control_lock lvb to our 905 898 generation, which might include new recovery bits set */ 906 - fs_info(sdp, "control_mount wait1 block %u start %u mount %u " 907 - "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, 908 - lvb_gen, ls->ls_recover_flags); 899 + if (sdp->sd_args.ar_spectator) { 900 + fs_info(sdp, "Recovery is required. Waiting for a " 901 + "non-spectator to mount.\n"); 902 + msleep_interruptible(1000); 903 + } else { 904 + fs_info(sdp, "control_mount wait1 block %u start %u " 905 + "mount %u lvb %u flags %lx\n", block_gen, 906 + start_gen, mount_gen, lvb_gen, 907 + ls->ls_recover_flags); 908 + } 909 909 spin_unlock(&ls->ls_recover_spin); 910 910 goto restart; 911 911 }

+20 -8

fs/gfs2/log.c

··· 92 92 93 93 static int gfs2_ail1_start_one(struct gfs2_sbd *sdp, 94 94 struct writeback_control *wbc, 95 - struct gfs2_trans *tr) 95 + struct gfs2_trans *tr, 96 + bool *withdraw) 96 97 __releases(&sdp->sd_ail_lock) 97 98 __acquires(&sdp->sd_ail_lock) 98 99 { ··· 108 107 gfs2_assert(sdp, bd->bd_tr == tr); 109 108 110 109 if (!buffer_busy(bh)) { 111 - if (!buffer_uptodate(bh)) 110 + if (!buffer_uptodate(bh)) { 112 111 gfs2_io_error_bh(sdp, bh); 112 + *withdraw = true; 113 + } 113 114 list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); 114 115 continue; 115 116 } ··· 151 148 struct list_head *head = &sdp->sd_ail1_list; 152 149 struct gfs2_trans *tr; 153 150 struct blk_plug plug; 151 + bool withdraw = false; 154 152 155 153 trace_gfs2_ail_flush(sdp, wbc, 1); 156 154 blk_start_plug(&plug); ··· 160 156 list_for_each_entry_reverse(tr, head, tr_list) { 161 157 if (wbc->nr_to_write <= 0) 162 158 break; 163 - if (gfs2_ail1_start_one(sdp, wbc, tr)) 159 + if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw)) 164 160 goto restart; 165 161 } 166 162 spin_unlock(&sdp->sd_ail_lock); 167 163 blk_finish_plug(&plug); 164 + if (withdraw) 165 + gfs2_lm_withdraw(sdp, NULL); 168 166 trace_gfs2_ail_flush(sdp, wbc, 0); 169 167 } 170 168 ··· 194 188 * 195 189 */ 196 190 197 - static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) 191 + static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, 192 + bool *withdraw) 198 193 { 199 194 struct gfs2_bufdata *bd, *s; 200 195 struct buffer_head *bh; ··· 206 199 gfs2_assert(sdp, bd->bd_tr == tr); 207 200 if (buffer_busy(bh)) 208 201 continue; 209 - if (!buffer_uptodate(bh)) 202 + if (!buffer_uptodate(bh)) { 210 203 gfs2_io_error_bh(sdp, bh); 204 + *withdraw = true; 205 + } 211 206 list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); 212 207 } 213 - 214 208 } 215 209 216 210 /** ··· 226 218 struct gfs2_trans *tr, *s; 227 219 int oldest_tr = 1; 228 220 int ret; 221 + bool withdraw = false; 229 222 230 223 spin_lock(&sdp->sd_ail_lock); 231 224 list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { 232 - gfs2_ail1_empty_one(sdp, tr); 225 + gfs2_ail1_empty_one(sdp, tr, &withdraw); 233 226 if (list_empty(&tr->tr_ail1_list) && oldest_tr) 234 227 list_move(&tr->tr_list, &sdp->sd_ail2_list); 235 228 else ··· 238 229 } 239 230 ret = list_empty(&sdp->sd_ail1_list); 240 231 spin_unlock(&sdp->sd_ail_lock); 232 + 233 + if (withdraw) 234 + gfs2_lm_withdraw(sdp, "fatal: I/O error(s)\n"); 241 235 242 236 return ret; 243 237 } ··· 701 689 hash = ~crc32(~0, lh, LH_V1_SIZE); 702 690 lh->lh_hash = cpu_to_be32(hash); 703 691 704 - tv = current_kernel_time64(); 692 + ktime_get_coarse_real_ts64(&tv); 705 693 lh->lh_nsec = cpu_to_be32(tv.tv_nsec); 706 694 lh->lh_sec = cpu_to_be64(tv.tv_sec); 707 695 addr = gfs2_log_bmap(sdp);

+1 -1

fs/gfs2/lops.c

··· 49 49 if (test_set_buffer_pinned(bh)) 50 50 gfs2_assert_withdraw(sdp, 0); 51 51 if (!buffer_uptodate(bh)) 52 - gfs2_io_error_bh(sdp, bh); 52 + gfs2_io_error_bh_wd(sdp, bh); 53 53 bd = bh->b_private; 54 54 /* If this buffer is in the AIL and it has already been written 55 55 * to in-place disk block, remove it from the AIL.

+2 -2

fs/gfs2/meta_io.c

··· 293 293 if (unlikely(!buffer_uptodate(bh))) { 294 294 struct gfs2_trans *tr = current->journal_info; 295 295 if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) 296 - gfs2_io_error_bh(sdp, bh); 296 + gfs2_io_error_bh_wd(sdp, bh); 297 297 brelse(bh); 298 298 *bhp = NULL; 299 299 return -EIO; ··· 320 320 if (!buffer_uptodate(bh)) { 321 321 struct gfs2_trans *tr = current->journal_info; 322 322 if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) 323 - gfs2_io_error_bh(sdp, bh); 323 + gfs2_io_error_bh_wd(sdp, bh); 324 324 return -EIO; 325 325 } 326 326 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))

+4 -3

fs/gfs2/recovery.c

··· 413 413 ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep; 414 414 int ro = 0; 415 415 unsigned int pass; 416 - int error; 416 + int error = 0; 417 417 int jlocked = 0; 418 418 419 419 t_start = ktime_get(); 420 - if (sdp->sd_args.ar_spectator || 421 - (jd->jd_jid != sdp->sd_lockstruct.ls_jid)) { 420 + if (sdp->sd_args.ar_spectator) 421 + goto fail; 422 + if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { 422 423 fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", 423 424 jd->jd_jid); 424 425 jlocked = 1;

+99 -70

fs/gfs2/rgrp.c

··· 123 123 /** 124 124 * gfs2_testbit - test a bit in the bitmaps 125 125 * @rbm: The bit to test 126 + * @use_clone: If true, test the clone bitmap, not the official bitmap. 127 + * 128 + * Some callers like gfs2_unaligned_extlen need to test the clone bitmaps, 129 + * not the "real" bitmaps, to avoid allocating recently freed blocks. 126 130 * 127 131 * Returns: The two bit block state of the requested bit 128 132 */ 129 133 130 - static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) 134 + static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone) 131 135 { 132 136 struct gfs2_bitmap *bi = rbm_bi(rbm); 133 - const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset; 137 + const u8 *buffer; 134 138 const u8 *byte; 135 139 unsigned int bit; 136 140 141 + if (use_clone && bi->bi_clone) 142 + buffer = bi->bi_clone; 143 + else 144 + buffer = bi->bi_bh->b_data; 145 + buffer += bi->bi_offset; 137 146 byte = buffer + (rbm->offset / GFS2_NBBY); 138 147 bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; 139 148 ··· 331 322 u8 res; 332 323 333 324 for (n = 0; n < n_unaligned; n++) { 334 - res = gfs2_testbit(rbm); 325 + res = gfs2_testbit(rbm, true); 335 326 if (res != GFS2_BLKST_FREE) 336 327 return true; 337 328 (*len)--; ··· 616 607 617 608 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) 618 609 { 610 + struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res); 611 + 619 612 gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", 620 - (unsigned long long)rs->rs_inum, 613 + (unsigned long long)ip->i_no_addr, 621 614 (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), 622 615 rs->rs_rbm.offset, rs->rs_free); 623 616 } ··· 1062 1051 /* rd_data0, rd_data and rd_bitbytes already set from rindex */ 1063 1052 } 1064 1053 1054 + static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) 1055 + { 1056 + const struct gfs2_rgrp *str = buf; 1057 + 1058 + rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); 1059 + rgl->rl_flags = str->rg_flags; 1060 + rgl->rl_free = str->rg_free; 1061 + rgl->rl_dinodes = str->rg_dinodes; 1062 + rgl->rl_igeneration = str->rg_igeneration; 1063 + rgl->__pad = 0UL; 1064 + } 1065 + 1065 1066 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) 1066 1067 { 1067 1068 struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd); ··· 1096 1073 str->rg_crc = cpu_to_be32(crc); 1097 1074 1098 1075 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); 1076 + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, buf); 1099 1077 } 1100 1078 1101 1079 static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) ··· 1109 1085 rgl->rl_igeneration != str->rg_igeneration) 1110 1086 return 0; 1111 1087 return 1; 1112 - } 1113 - 1114 - static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) 1115 - { 1116 - const struct gfs2_rgrp *str = buf; 1117 - 1118 - rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); 1119 - rgl->rl_flags = str->rg_flags; 1120 - rgl->rl_free = str->rg_free; 1121 - rgl->rl_dinodes = str->rg_dinodes; 1122 - rgl->rl_igeneration = str->rg_igeneration; 1123 - rgl->__pad = 0UL; 1124 - } 1125 - 1126 - static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change) 1127 - { 1128 - struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; 1129 - u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change; 1130 - rgl->rl_unlinked = cpu_to_be32(unlinked); 1131 1088 } 1132 1089 1133 1090 static u32 count_unlinked(struct gfs2_rgrpd *rgd) ··· 1429 1424 rgd->rd_flags |= GFS2_RGF_TRIMMED; 1430 1425 gfs2_trans_add_meta(rgd->rd_gl, bh); 1431 1426 gfs2_rgrp_out(rgd, bh->b_data); 1432 - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); 1433 1427 gfs2_trans_end(sdp); 1434 1428 } 1435 1429 } ··· 1492 1488 } 1493 1489 1494 1490 /** 1491 + * rgd_free - return the number of free blocks we can allocate. 1492 + * @rgd: the resource group 1493 + * 1494 + * This function returns the number of free blocks for an rgrp. 1495 + * That's the clone-free blocks (blocks that are free, not including those 1496 + * still being used for unlinked files that haven't been deleted.) 1497 + * 1498 + * It also subtracts any blocks reserved by someone else, but does not 1499 + * include free blocks that are still part of our current reservation, 1500 + * because obviously we can (and will) allocate them. 1501 + */ 1502 + static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs) 1503 + { 1504 + u32 tot_reserved, tot_free; 1505 + 1506 + if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free)) 1507 + return 0; 1508 + tot_reserved = rgd->rd_reserved - rs->rs_free; 1509 + 1510 + if (rgd->rd_free_clone < tot_reserved) 1511 + tot_reserved = 0; 1512 + 1513 + tot_free = rgd->rd_free_clone - tot_reserved; 1514 + 1515 + return tot_free; 1516 + } 1517 + 1518 + /** 1495 1519 * rg_mblk_search - find a group of multiple free blocks to form a reservation 1496 1520 * @rgd: the resource group descriptor 1497 1521 * @ip: pointer to the inode for which we're reserving blocks ··· 1534 1502 u64 goal; 1535 1503 struct gfs2_blkreserv *rs = &ip->i_res; 1536 1504 u32 extlen; 1537 - u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; 1505 + u32 free_blocks = rgd_free(rgd, rs); 1538 1506 int ret; 1539 1507 struct inode *inode = &ip->i_inode; 1540 1508 ··· 1560 1528 if (ret == 0) { 1561 1529 rs->rs_rbm = rbm; 1562 1530 rs->rs_free = extlen; 1563 - rs->rs_inum = ip->i_no_addr; 1564 1531 rs_insert(ip); 1565 1532 } else { 1566 1533 if (goal == rgd->rd_last_alloc + rgd->rd_data0) ··· 1717 1686 1718 1687 while(1) { 1719 1688 bi = rbm_bi(rbm); 1720 - if (test_bit(GBF_FULL, &bi->bi_flags) && 1689 + if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) && 1690 + test_bit(GBF_FULL, &bi->bi_flags) && 1721 1691 (state == GFS2_BLKST_FREE)) 1722 1692 goto next_bitmap; 1723 1693 ··· 2015 1983 int error = 0, rg_locked, flags = 0; 2016 1984 u64 last_unlinked = NO_BLOCK; 2017 1985 int loops = 0; 2018 - u32 skip = 0; 1986 + u32 free_blocks, skip = 0; 2019 1987 2020 1988 if (sdp->sd_args.ar_rgrplvb) 2021 1989 flags |= GL_SKIP; ··· 2023 1991 return -EINVAL; 2024 1992 if (gfs2_rs_active(rs)) { 2025 1993 begin = rs->rs_rbm.rgd; 2026 - } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { 2027 - rs->rs_rbm.rgd = begin = ip->i_rgd; 1994 + } else if (rs->rs_rbm.rgd && 1995 + rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) { 1996 + begin = rs->rs_rbm.rgd; 2028 1997 } else { 2029 1998 check_and_update_goal(ip); 2030 1999 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); ··· 2086 2053 goto check_rgrp; 2087 2054 2088 2055 /* If rgrp has enough free space, use it */ 2089 - if (rs->rs_rbm.rgd->rd_free_clone >= ap->target || 2056 + free_blocks = rgd_free(rs->rs_rbm.rgd, rs); 2057 + if (free_blocks >= ap->target || 2090 2058 (loops == 2 && ap->min_target && 2091 - rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) { 2092 - ip->i_rgd = rs->rs_rbm.rgd; 2093 - ap->allowed = ip->i_rgd->rd_free_clone; 2059 + free_blocks >= ap->min_target)) { 2060 + ap->allowed = free_blocks; 2094 2061 return 0; 2095 2062 } 2096 2063 check_rgrp: ··· 2149 2116 } 2150 2117 2151 2118 /** 2152 - * gfs2_get_block_type - Check a block in a RG is of given type 2153 - * @rgd: the resource group holding the block 2154 - * @block: the block number 2155 - * 2156 - * Returns: The block type (GFS2_BLKST_*) 2157 - */ 2158 - 2159 - static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) 2160 - { 2161 - struct gfs2_rbm rbm = { .rgd = rgd, }; 2162 - int ret; 2163 - 2164 - ret = gfs2_rbm_from_block(&rbm, block); 2165 - WARN_ON_ONCE(ret != 0); 2166 - 2167 - return gfs2_testbit(&rbm); 2168 - } 2169 - 2170 - 2171 - /** 2172 2119 * gfs2_alloc_extent - allocate an extent from a given bitmap 2173 2120 * @rbm: the resource group information 2174 2121 * @dinode: TRUE if the first block we allocate is for a dinode ··· 2172 2159 block++; 2173 2160 while (*n < elen) { 2174 2161 ret = gfs2_rbm_from_block(&pos, block); 2175 - if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) 2162 + if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE) 2176 2163 break; 2177 2164 gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh); 2178 2165 gfs2_setbit(&pos, true, GFS2_BLKST_USED); ··· 2348 2335 { 2349 2336 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2350 2337 struct buffer_head *dibh; 2351 - struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; 2338 + struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, }; 2352 2339 unsigned int ndata; 2353 2340 u64 block; /* block, within the file system scope */ 2354 2341 int error; ··· 2406 2393 2407 2394 gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh); 2408 2395 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); 2409 - gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); 2410 2396 2411 2397 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); 2412 2398 if (dinode) ··· 2446 2434 rgd->rd_flags &= ~GFS2_RGF_TRIMMED; 2447 2435 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); 2448 2436 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2449 - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2450 2437 2451 2438 /* Directories keep their data in the metadata address space */ 2452 2439 if (meta || ip->i_depth) ··· 2482 2471 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); 2483 2472 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); 2484 2473 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2485 - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2486 - update_rgrp_lvb_unlinked(rgd, 1); 2474 + be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1); 2487 2475 } 2488 2476 2489 2477 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) ··· 2502 2492 2503 2493 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); 2504 2494 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2505 - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2506 - update_rgrp_lvb_unlinked(rgd, -1); 2495 + be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1); 2507 2496 2508 2497 gfs2_statfs_change(sdp, 0, +1, -1); 2509 2498 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); ··· 2525 2516 { 2526 2517 struct gfs2_rgrpd *rgd; 2527 2518 struct gfs2_holder rgd_gh; 2519 + struct gfs2_rbm rbm; 2528 2520 int error = -EINVAL; 2529 2521 2530 2522 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1); ··· 2536 2526 if (error) 2537 2527 goto fail; 2538 2528 2539 - if (gfs2_get_block_type(rgd, no_addr) != type) 2529 + rbm.rgd = rgd; 2530 + error = gfs2_rbm_from_block(&rbm, no_addr); 2531 + WARN_ON_ONCE(error != 0); 2532 + 2533 + if (gfs2_testbit(&rbm, false) != type) 2540 2534 error = -ESTALE; 2541 2535 2542 2536 gfs2_glock_dq_uninit(&rgd_gh); ··· 2572 2558 if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) 2573 2559 return; 2574 2560 2575 - if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) 2576 - rgd = ip->i_rgd; 2577 - else 2561 + /* 2562 + * The resource group last accessed is kept in the last position. 2563 + */ 2564 + 2565 + if (rlist->rl_rgrps) { 2566 + rgd = rlist->rl_rgd[rlist->rl_rgrps - 1]; 2567 + if (rgrp_contains_block(rgd, block)) 2568 + return; 2578 2569 rgd = gfs2_blk2rgrpd(sdp, block, 1); 2570 + } else { 2571 + rgd = ip->i_res.rs_rbm.rgd; 2572 + if (!rgd || !rgrp_contains_block(rgd, block)) 2573 + rgd = gfs2_blk2rgrpd(sdp, block, 1); 2574 + } 2575 + 2579 2576 if (!rgd) { 2580 - fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); 2577 + fs_err(sdp, "rlist_add: no rgrp for block %llu\n", 2578 + (unsigned long long)block); 2581 2579 return; 2582 2580 } 2583 - ip->i_rgd = rgd; 2584 2581 2585 - for (x = 0; x < rlist->rl_rgrps; x++) 2586 - if (rlist->rl_rgd[x] == rgd) 2582 + for (x = 0; x < rlist->rl_rgrps; x++) { 2583 + if (rlist->rl_rgd[x] == rgd) { 2584 + swap(rlist->rl_rgd[x], 2585 + rlist->rl_rgd[rlist->rl_rgrps - 1]); 2587 2586 return; 2587 + } 2588 + } 2588 2589 2589 2590 if (rlist->rl_rgrps == rlist->rl_space) { 2590 2591 new_space = rlist->rl_space + 10;

-1

fs/gfs2/super.c

··· 1729 1729 if (ip) { 1730 1730 ip->i_flags = 0; 1731 1731 ip->i_gl = NULL; 1732 - ip->i_rgd = NULL; 1733 1732 memset(&ip->i_res, 0, sizeof(ip->i_res)); 1734 1733 RB_CLEAR_NODE(&ip->i_res.rs_node); 1735 1734 ip->i_rahead = 0;

+9 -2

fs/gfs2/sys.c

··· 429 429 430 430 spin_lock(&sdp->sd_jindex_spin); 431 431 rv = -EBUSY; 432 - if (sdp->sd_jdesc->jd_jid == jid) 432 + /** 433 + * If we're a spectator, we use journal0, but it's not really ours. 434 + * So we need to wait for its recovery too. If we skip it we'd never 435 + * queue work to the recovery workqueue, and so its completion would 436 + * never clear the DFL_BLOCK_LOCKS flag, so all our locks would 437 + * permanently stop working. 438 + */ 439 + if (sdp->sd_jdesc->jd_jid == jid && !sdp->sd_args.ar_spectator) 433 440 goto out; 434 441 rv = -ENOENT; 435 442 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { 436 - if (jd->jd_jid != jid) 443 + if (jd->jd_jid != jid && !sdp->sd_args.ar_spectator) 437 444 continue; 438 445 rv = gfs2_recover_journal(jd, false); 439 446 break;

+2 -1

fs/gfs2/trace_gfs2.h

··· 606 606 __entry->rd_addr = rs->rs_rbm.rgd->rd_addr; 607 607 __entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone; 608 608 __entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved; 609 - __entry->inum = rs->rs_inum; 609 + __entry->inum = container_of(rs, struct gfs2_inode, 610 + i_res)->i_no_addr; 610 611 __entry->start = gfs2_rbm_to_block(&rs->rs_rbm); 611 612 __entry->free = rs->rs_free; 612 613 __entry->func = func;

+4 -2

fs/gfs2/trans.h

··· 30 30 * block, or all of the blocks in the rg, whichever is smaller */ 31 31 static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested) 32 32 { 33 - if (requested < ip->i_rgd->rd_length) 33 + struct gfs2_rgrpd *rgd = ip->i_res.rs_rbm.rgd; 34 + 35 + if (requested < rgd->rd_length) 34 36 return requested + 1; 35 - return ip->i_rgd->rd_length; 37 + return rgd->rd_length; 36 38 } 37 39 38 40 extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,

+20 -18

fs/gfs2/util.c

··· 46 46 test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) 47 47 return 0; 48 48 49 - va_start(args, fmt); 49 + if (fmt) { 50 + va_start(args, fmt); 50 51 51 - vaf.fmt = fmt; 52 - vaf.va = &args; 52 + vaf.fmt = fmt; 53 + vaf.va = &args; 53 54 54 - fs_err(sdp, "%pV", &vaf); 55 + fs_err(sdp, "%pV", &vaf); 55 56 56 - va_end(args); 57 + va_end(args); 58 + } 57 59 58 60 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { 59 61 fs_err(sdp, "about to withdraw this file system\n"); ··· 248 246 } 249 247 250 248 /** 251 - * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw 252 - * Returns: -1 if this call withdrew the machine, 253 - * 0 if it was already withdrawn 249 + * gfs2_io_error_bh_i - Flag a buffer I/O error 250 + * @withdraw: withdraw the filesystem 254 251 */ 255 252 256 - int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, 257 - const char *function, char *file, unsigned int line) 253 + void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, 254 + const char *function, char *file, unsigned int line, 255 + bool withdraw) 258 256 { 259 - int rv; 260 - rv = gfs2_lm_withdraw(sdp, 261 - "fatal: I/O error\n" 262 - " block = %llu\n" 263 - " function = %s, file = %s, line = %u\n", 264 - (unsigned long long)bh->b_blocknr, 265 - function, file, line); 266 - return rv; 257 + fs_err(sdp, 258 + "fatal: I/O error\n" 259 + " block = %llu\n" 260 + " function = %s, file = %s, line = %u\n", 261 + (unsigned long long)bh->b_blocknr, 262 + function, file, line); 263 + if (withdraw) 264 + gfs2_lm_withdraw(sdp, NULL); 267 265 } 268 266

+7 -3

fs/gfs2/util.h

··· 136 136 gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__); 137 137 138 138 139 - int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, 140 - const char *function, char *file, unsigned int line); 139 + void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, 140 + const char *function, char *file, unsigned int line, 141 + bool withdraw); 142 + 143 + #define gfs2_io_error_bh_wd(sdp, bh) \ 144 + gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true); 141 145 142 146 #define gfs2_io_error_bh(sdp, bh) \ 143 - gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__); 147 + gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false); 144 148 145 149 146 150 extern struct kmem_cache *gfs2_glock_cachep;

+22 -37

fs/gfs2/xattr.c

··· 343 343 unsigned int ei_size; 344 344 }; 345 345 346 - static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea) 347 - { 348 - switch (ea->ea_type) { 349 - case GFS2_EATYPE_USR: 350 - return 5 + ea->ea_name_len + 1; 351 - case GFS2_EATYPE_SYS: 352 - return 7 + ea->ea_name_len + 1; 353 - case GFS2_EATYPE_SECURITY: 354 - return 9 + ea->ea_name_len + 1; 355 - default: 356 - return 0; 357 - } 358 - } 359 - 360 346 static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh, 361 347 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, 362 348 void *private) 363 349 { 364 350 struct ea_list *ei = private; 365 351 struct gfs2_ea_request *er = ei->ei_er; 366 - unsigned int ea_size = gfs2_ea_strlen(ea); 352 + unsigned int ea_size; 353 + char *prefix; 354 + unsigned int l; 367 355 368 356 if (ea->ea_type == GFS2_EATYPE_UNUSED) 369 357 return 0; 370 358 371 - if (er->er_data_len) { 372 - char *prefix = NULL; 373 - unsigned int l = 0; 374 - char c = 0; 359 + switch (ea->ea_type) { 360 + case GFS2_EATYPE_USR: 361 + prefix = "user."; 362 + l = 5; 363 + break; 364 + case GFS2_EATYPE_SYS: 365 + prefix = "system."; 366 + l = 7; 367 + break; 368 + case GFS2_EATYPE_SECURITY: 369 + prefix = "security."; 370 + l = 9; 371 + break; 372 + default: 373 + BUG(); 374 + } 375 375 376 + ea_size = l + ea->ea_name_len + 1; 377 + if (er->er_data_len) { 376 378 if (ei->ei_size + ea_size > er->er_data_len) 377 379 return -ERANGE; 378 - 379 - switch (ea->ea_type) { 380 - case GFS2_EATYPE_USR: 381 - prefix = "user."; 382 - l = 5; 383 - break; 384 - case GFS2_EATYPE_SYS: 385 - prefix = "system."; 386 - l = 7; 387 - break; 388 - case GFS2_EATYPE_SECURITY: 389 - prefix = "security."; 390 - l = 9; 391 - break; 392 - } 393 - 394 - BUG_ON(l == 0); 395 380 396 381 memcpy(er->er_data + ei->ei_size, prefix, l); 397 382 memcpy(er->er_data + ei->ei_size + l, GFS2_EA2NAME(ea), 398 383 ea->ea_name_len); 399 - memcpy(er->er_data + ei->ei_size + ea_size - 1, &c, 1); 384 + er->er_data[ei->ei_size + ea_size - 1] = 0; 400 385 } 401 386 402 387 ei->ei_size += ea_size;