Merge tag 'xfs-for-linus-4.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

+4 -8

Documentation/filesystems/xfs.txt

··· 51 51 CRC enabled filesystems always use the attr2 format, and so 52 52 will reject the noattr2 mount option if it is set. 53 53 54 - barrier (*) 55 - nobarrier 56 - Enables/disables the use of block layer write barriers for 57 - writes into the journal and for data integrity operations. 58 - This allows for drive level write caching to be enabled, for 59 - devices that support write barriers. 60 - 61 54 discard 62 55 nodiscard (*) 63 56 Enable/disable the issuing of commands to let the block ··· 221 228 Deprecated Mount Options 222 229 ======================== 223 230 224 - None at present. 231 + Name Removal Schedule 232 + ---- ---------------- 233 + barrier no earlier than v4.15 234 + nobarrier no earlier than v4.15 225 235 226 236 227 237 Removed Mount Options

+1 -1

fs/direct-io.c

··· 554 554 * filesystems that don't need it and also allows us to create the workqueue 555 555 * late enough so the we can include s_id in the name of the workqueue. 556 556 */ 557 - static int sb_init_dio_done_wq(struct super_block *sb) 557 + int sb_init_dio_done_wq(struct super_block *sb) 558 558 { 559 559 struct workqueue_struct *old; 560 560 struct workqueue_struct *wq = alloc_workqueue("dio/%s",

+3

fs/internal.h

··· 184 184 loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, 185 185 unsigned flags, struct iomap_ops *ops, void *data, 186 186 iomap_actor_t actor); 187 + 188 + /* direct-io.c: */ 189 + int sb_init_dio_done_wq(struct super_block *sb);

+373

fs/iomap.c

··· 24 24 #include <linux/uio.h> 25 25 #include <linux/backing-dev.h> 26 26 #include <linux/buffer_head.h> 27 + #include <linux/task_io_accounting_ops.h> 27 28 #include <linux/dax.h> 28 29 #include "internal.h" 29 30 ··· 585 584 return 0; 586 585 } 587 586 EXPORT_SYMBOL_GPL(iomap_fiemap); 587 + 588 + /* 589 + * Private flags for iomap_dio, must not overlap with the public ones in 590 + * iomap.h: 591 + */ 592 + #define IOMAP_DIO_WRITE (1 << 30) 593 + #define IOMAP_DIO_DIRTY (1 << 31) 594 + 595 + struct iomap_dio { 596 + struct kiocb *iocb; 597 + iomap_dio_end_io_t *end_io; 598 + loff_t i_size; 599 + loff_t size; 600 + atomic_t ref; 601 + unsigned flags; 602 + int error; 603 + 604 + union { 605 + /* used during submission and for synchronous completion: */ 606 + struct { 607 + struct iov_iter *iter; 608 + struct task_struct *waiter; 609 + struct request_queue *last_queue; 610 + blk_qc_t cookie; 611 + } submit; 612 + 613 + /* used for aio completion: */ 614 + struct { 615 + struct work_struct work; 616 + } aio; 617 + }; 618 + }; 619 + 620 + static ssize_t iomap_dio_complete(struct iomap_dio *dio) 621 + { 622 + struct kiocb *iocb = dio->iocb; 623 + ssize_t ret; 624 + 625 + if (dio->end_io) { 626 + ret = dio->end_io(iocb, 627 + dio->error ? dio->error : dio->size, 628 + dio->flags); 629 + } else { 630 + ret = dio->error; 631 + } 632 + 633 + if (likely(!ret)) { 634 + ret = dio->size; 635 + /* check for short read */ 636 + if (iocb->ki_pos + ret > dio->i_size && 637 + !(dio->flags & IOMAP_DIO_WRITE)) 638 + ret = dio->i_size - iocb->ki_pos; 639 + iocb->ki_pos += ret; 640 + } 641 + 642 + inode_dio_end(file_inode(iocb->ki_filp)); 643 + kfree(dio); 644 + 645 + return ret; 646 + } 647 + 648 + static void iomap_dio_complete_work(struct work_struct *work) 649 + { 650 + struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work); 651 + struct kiocb *iocb = dio->iocb; 652 + bool is_write = (dio->flags & IOMAP_DIO_WRITE); 653 + ssize_t ret; 654 + 655 + ret = iomap_dio_complete(dio); 656 + if (is_write && ret > 0) 657 + ret = generic_write_sync(iocb, ret); 658 + iocb->ki_complete(iocb, ret, 0); 659 + } 660 + 661 + /* 662 + * Set an error in the dio if none is set yet. We have to use cmpxchg 663 + * as the submission context and the completion context(s) can race to 664 + * update the error. 665 + */ 666 + static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret) 667 + { 668 + cmpxchg(&dio->error, 0, ret); 669 + } 670 + 671 + static void iomap_dio_bio_end_io(struct bio *bio) 672 + { 673 + struct iomap_dio *dio = bio->bi_private; 674 + bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY); 675 + 676 + if (bio->bi_error) 677 + iomap_dio_set_error(dio, bio->bi_error); 678 + 679 + if (atomic_dec_and_test(&dio->ref)) { 680 + if (is_sync_kiocb(dio->iocb)) { 681 + struct task_struct *waiter = dio->submit.waiter; 682 + 683 + WRITE_ONCE(dio->submit.waiter, NULL); 684 + wake_up_process(waiter); 685 + } else if (dio->flags & IOMAP_DIO_WRITE) { 686 + struct inode *inode = file_inode(dio->iocb->ki_filp); 687 + 688 + INIT_WORK(&dio->aio.work, iomap_dio_complete_work); 689 + queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work); 690 + } else { 691 + iomap_dio_complete_work(&dio->aio.work); 692 + } 693 + } 694 + 695 + if (should_dirty) { 696 + bio_check_pages_dirty(bio); 697 + } else { 698 + struct bio_vec *bvec; 699 + int i; 700 + 701 + bio_for_each_segment_all(bvec, bio, i) 702 + put_page(bvec->bv_page); 703 + bio_put(bio); 704 + } 705 + } 706 + 707 + static blk_qc_t 708 + iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, 709 + unsigned len) 710 + { 711 + struct page *page = ZERO_PAGE(0); 712 + struct bio *bio; 713 + 714 + bio = bio_alloc(GFP_KERNEL, 1); 715 + bio->bi_bdev = iomap->bdev; 716 + bio->bi_iter.bi_sector = 717 + iomap->blkno + ((pos - iomap->offset) >> 9); 718 + bio->bi_private = dio; 719 + bio->bi_end_io = iomap_dio_bio_end_io; 720 + 721 + get_page(page); 722 + if (bio_add_page(bio, page, len, 0) != len) 723 + BUG(); 724 + bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE); 725 + 726 + atomic_inc(&dio->ref); 727 + return submit_bio(bio); 728 + } 729 + 730 + static loff_t 731 + iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, 732 + void *data, struct iomap *iomap) 733 + { 734 + struct iomap_dio *dio = data; 735 + unsigned blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev)); 736 + unsigned fs_block_size = (1 << inode->i_blkbits), pad; 737 + unsigned align = iov_iter_alignment(dio->submit.iter); 738 + struct iov_iter iter; 739 + struct bio *bio; 740 + bool need_zeroout = false; 741 + int nr_pages, ret; 742 + 743 + if ((pos | length | align) & ((1 << blkbits) - 1)) 744 + return -EINVAL; 745 + 746 + switch (iomap->type) { 747 + case IOMAP_HOLE: 748 + if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE)) 749 + return -EIO; 750 + /*FALLTHRU*/ 751 + case IOMAP_UNWRITTEN: 752 + if (!(dio->flags & IOMAP_DIO_WRITE)) { 753 + iov_iter_zero(length, dio->submit.iter); 754 + dio->size += length; 755 + return length; 756 + } 757 + dio->flags |= IOMAP_DIO_UNWRITTEN; 758 + need_zeroout = true; 759 + break; 760 + case IOMAP_MAPPED: 761 + if (iomap->flags & IOMAP_F_SHARED) 762 + dio->flags |= IOMAP_DIO_COW; 763 + if (iomap->flags & IOMAP_F_NEW) 764 + need_zeroout = true; 765 + break; 766 + default: 767 + WARN_ON_ONCE(1); 768 + return -EIO; 769 + } 770 + 771 + /* 772 + * Operate on a partial iter trimmed to the extent we were called for. 773 + * We'll update the iter in the dio once we're done with this extent. 774 + */ 775 + iter = *dio->submit.iter; 776 + iov_iter_truncate(&iter, length); 777 + 778 + nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES); 779 + if (nr_pages <= 0) 780 + return nr_pages; 781 + 782 + if (need_zeroout) { 783 + /* zero out from the start of the block to the write offset */ 784 + pad = pos & (fs_block_size - 1); 785 + if (pad) 786 + iomap_dio_zero(dio, iomap, pos - pad, pad); 787 + } 788 + 789 + do { 790 + if (dio->error) 791 + return 0; 792 + 793 + bio = bio_alloc(GFP_KERNEL, nr_pages); 794 + bio->bi_bdev = iomap->bdev; 795 + bio->bi_iter.bi_sector = 796 + iomap->blkno + ((pos - iomap->offset) >> 9); 797 + bio->bi_private = dio; 798 + bio->bi_end_io = iomap_dio_bio_end_io; 799 + 800 + ret = bio_iov_iter_get_pages(bio, &iter); 801 + if (unlikely(ret)) { 802 + bio_put(bio); 803 + return ret; 804 + } 805 + 806 + if (dio->flags & IOMAP_DIO_WRITE) { 807 + bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE); 808 + task_io_account_write(bio->bi_iter.bi_size); 809 + } else { 810 + bio_set_op_attrs(bio, REQ_OP_READ, 0); 811 + if (dio->flags & IOMAP_DIO_DIRTY) 812 + bio_set_pages_dirty(bio); 813 + } 814 + 815 + dio->size += bio->bi_iter.bi_size; 816 + pos += bio->bi_iter.bi_size; 817 + 818 + nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES); 819 + 820 + atomic_inc(&dio->ref); 821 + 822 + dio->submit.last_queue = bdev_get_queue(iomap->bdev); 823 + dio->submit.cookie = submit_bio(bio); 824 + } while (nr_pages); 825 + 826 + if (need_zeroout) { 827 + /* zero out from the end of the write to the end of the block */ 828 + pad = pos & (fs_block_size - 1); 829 + if (pad) 830 + iomap_dio_zero(dio, iomap, pos, fs_block_size - pad); 831 + } 832 + 833 + iov_iter_advance(dio->submit.iter, length); 834 + return length; 835 + } 836 + 837 + ssize_t 838 + iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops, 839 + iomap_dio_end_io_t end_io) 840 + { 841 + struct address_space *mapping = iocb->ki_filp->f_mapping; 842 + struct inode *inode = file_inode(iocb->ki_filp); 843 + size_t count = iov_iter_count(iter); 844 + loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0; 845 + unsigned int flags = IOMAP_DIRECT; 846 + struct blk_plug plug; 847 + struct iomap_dio *dio; 848 + 849 + lockdep_assert_held(&inode->i_rwsem); 850 + 851 + if (!count) 852 + return 0; 853 + 854 + dio = kmalloc(sizeof(*dio), GFP_KERNEL); 855 + if (!dio) 856 + return -ENOMEM; 857 + 858 + dio->iocb = iocb; 859 + atomic_set(&dio->ref, 1); 860 + dio->size = 0; 861 + dio->i_size = i_size_read(inode); 862 + dio->end_io = end_io; 863 + dio->error = 0; 864 + dio->flags = 0; 865 + 866 + dio->submit.iter = iter; 867 + if (is_sync_kiocb(iocb)) { 868 + dio->submit.waiter = current; 869 + dio->submit.cookie = BLK_QC_T_NONE; 870 + dio->submit.last_queue = NULL; 871 + } 872 + 873 + if (iov_iter_rw(iter) == READ) { 874 + if (pos >= dio->i_size) 875 + goto out_free_dio; 876 + 877 + if (iter->type == ITER_IOVEC) 878 + dio->flags |= IOMAP_DIO_DIRTY; 879 + } else { 880 + dio->flags |= IOMAP_DIO_WRITE; 881 + flags |= IOMAP_WRITE; 882 + } 883 + 884 + if (mapping->nrpages) { 885 + ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); 886 + if (ret) 887 + goto out_free_dio; 888 + 889 + ret = invalidate_inode_pages2_range(mapping, 890 + iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); 891 + WARN_ON_ONCE(ret); 892 + ret = 0; 893 + } 894 + 895 + inode_dio_begin(inode); 896 + 897 + blk_start_plug(&plug); 898 + do { 899 + ret = iomap_apply(inode, pos, count, flags, ops, dio, 900 + iomap_dio_actor); 901 + if (ret <= 0) { 902 + /* magic error code to fall back to buffered I/O */ 903 + if (ret == -ENOTBLK) 904 + ret = 0; 905 + break; 906 + } 907 + pos += ret; 908 + } while ((count = iov_iter_count(iter)) > 0); 909 + blk_finish_plug(&plug); 910 + 911 + if (ret < 0) 912 + iomap_dio_set_error(dio, ret); 913 + 914 + if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && 915 + !inode->i_sb->s_dio_done_wq) { 916 + ret = sb_init_dio_done_wq(inode->i_sb); 917 + if (ret < 0) 918 + iomap_dio_set_error(dio, ret); 919 + } 920 + 921 + if (!atomic_dec_and_test(&dio->ref)) { 922 + if (!is_sync_kiocb(iocb)) 923 + return -EIOCBQUEUED; 924 + 925 + for (;;) { 926 + set_current_state(TASK_UNINTERRUPTIBLE); 927 + if (!READ_ONCE(dio->submit.waiter)) 928 + break; 929 + 930 + if (!(iocb->ki_flags & IOCB_HIPRI) || 931 + !dio->submit.last_queue || 932 + !blk_mq_poll(dio->submit.last_queue, 933 + dio->submit.cookie)) 934 + io_schedule(); 935 + } 936 + __set_current_state(TASK_RUNNING); 937 + } 938 + 939 + /* 940 + * Try again to invalidate clean pages which might have been cached by 941 + * non-direct readahead, or faulted in by get_user_pages() if the source 942 + * of the write was an mmap'ed region of the file we're writing. Either 943 + * one is a pretty crazy thing to do, so we don't support it 100%. If 944 + * this invalidation fails, tough, the write still worked... 945 + */ 946 + if (iov_iter_rw(iter) == WRITE && mapping->nrpages) { 947 + ret = invalidate_inode_pages2_range(mapping, 948 + iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); 949 + WARN_ON_ONCE(ret); 950 + } 951 + 952 + return iomap_dio_complete(dio); 953 + 954 + out_free_dio: 955 + kfree(dio); 956 + return ret; 957 + } 958 + EXPORT_SYMBOL_GPL(iomap_dio_rw);

+7 -3

fs/xfs/libxfs/xfs_alloc.c

··· 2455 2455 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) 2456 2456 return false; 2457 2457 2458 - if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || 2458 + if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || 2459 + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || 2460 + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || 2459 2461 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) 2460 2462 return false; 2461 2463 2462 2464 if (xfs_sb_version_hasrmapbt(&mp->m_sb) && 2463 - be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS) 2465 + (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || 2466 + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) 2464 2467 return false; 2465 2468 2466 2469 /* ··· 2480 2477 return false; 2481 2478 2482 2479 if (xfs_sb_version_hasreflink(&mp->m_sb) && 2483 - be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS) 2480 + (be32_to_cpu(agf->agf_refcount_level) < 1 || 2481 + be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) 2484 2482 return false; 2485 2483 2486 2484 return true;;

+5 -1

fs/xfs/libxfs/xfs_alloc_btree.c

··· 421 421 422 422 ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); 423 423 424 - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); 424 + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); 425 425 426 426 cur->bc_tp = tp; 427 427 cur->bc_mp = mp; 428 428 cur->bc_btnum = btnum; 429 429 cur->bc_blocklog = mp->m_sb.sb_blocklog; 430 430 cur->bc_ops = &xfs_allocbt_ops; 431 + if (btnum == XFS_BTNUM_BNO) 432 + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); 433 + else 434 + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2); 431 435 432 436 if (btnum == XFS_BTNUM_CNT) { 433 437 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);

+7 -1

fs/xfs/libxfs/xfs_attr_leaf.c

··· 253 253 { 254 254 struct xfs_mount *mp = bp->b_target->bt_mount; 255 255 struct xfs_attr_leafblock *leaf = bp->b_addr; 256 + struct xfs_perag *pag = bp->b_pag; 256 257 struct xfs_attr3_icleaf_hdr ichdr; 257 258 258 259 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); ··· 274 273 if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) 275 274 return false; 276 275 } 277 - if (ichdr.count == 0) 276 + /* 277 + * In recovery there is a transient state where count == 0 is valid 278 + * because we may have transitioned an empty shortform attr to a leaf 279 + * if the attr didn't fit in shortform. 280 + */ 281 + if (pag && pag->pagf_init && ichdr.count == 0) 278 282 return false; 279 283 280 284 /* XXX: need to range check rest of attr header values */

+2 -2

fs/xfs/libxfs/xfs_attr_leaf.h

··· 51 51 int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); 52 52 int xfs_attr_shortform_remove(struct xfs_da_args *args); 53 53 int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); 54 - int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes); 54 + int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); 55 55 void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); 56 56 57 57 /* ··· 77 77 struct xfs_da_args *args); 78 78 int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer, 79 79 struct xfs_da_args *args); 80 - int xfs_attr3_leaf_list_int(struct xfs_buf *bp, 80 + void xfs_attr3_leaf_list_int(struct xfs_buf *bp, 81 81 struct xfs_attr_list_context *context); 82 82 83 83 /*

+135 -201

fs/xfs/libxfs/xfs_bmap.c

··· 49 49 #include "xfs_rmap.h" 50 50 #include "xfs_ag_resv.h" 51 51 #include "xfs_refcount.h" 52 + #include "xfs_rmap_btree.h" 53 + #include "xfs_icache.h" 52 54 53 55 54 56 kmem_zone_t *xfs_bmap_free_item_zone; ··· 192 190 int maxrecs; /* maximum record count at this level */ 193 191 xfs_mount_t *mp; /* mount structure */ 194 192 xfs_filblks_t rval; /* return value */ 193 + xfs_filblks_t orig_len; 195 194 196 195 mp = ip->i_mount; 196 + 197 + /* Calculate the worst-case size of the bmbt. */ 198 + orig_len = len; 197 199 maxrecs = mp->m_bmap_dmxr[0]; 198 200 for (level = 0, rval = 0; 199 201 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); ··· 205 199 len += maxrecs - 1; 206 200 do_div(len, maxrecs); 207 201 rval += len; 208 - if (len == 1) 209 - return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 202 + if (len == 1) { 203 + rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 210 204 level - 1; 205 + break; 206 + } 211 207 if (level == 0) 212 208 maxrecs = mp->m_bmap_dmxr[1]; 213 209 } 210 + 211 + /* Calculate the worst-case size of the rmapbt. */ 212 + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 213 + rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) + 214 + mp->m_rmap_maxlevels; 215 + 214 216 return rval; 215 217 } 216 218 ··· 518 504 xfs_bmap_trace_exlist( 519 505 xfs_inode_t *ip, /* incore inode pointer */ 520 506 xfs_extnum_t cnt, /* count of entries in the list */ 521 - int whichfork, /* data or attr fork */ 507 + int whichfork, /* data or attr or cow fork */ 522 508 unsigned long caller_ip) 523 509 { 524 510 xfs_extnum_t idx; /* extent record index */ ··· 527 513 528 514 if (whichfork == XFS_ATTR_FORK) 529 515 state |= BMAP_ATTRFORK; 516 + else if (whichfork == XFS_COW_FORK) 517 + state |= BMAP_COWFORK; 530 518 531 519 ifp = XFS_IFORK_PTR(ip, whichfork); 532 - ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); 520 + ASSERT(cnt == xfs_iext_count(ifp)); 533 521 for (idx = 0; idx < cnt; idx++) 534 - trace_xfs_extlist(ip, idx, whichfork, caller_ip); 522 + trace_xfs_extlist(ip, idx, state, caller_ip); 535 523 } 536 524 537 525 /* ··· 827 811 XFS_BTREE_LONG_PTRS); 828 812 829 813 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); 830 - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 814 + nextents = xfs_iext_count(ifp); 831 815 for (cnt = i = 0; i < nextents; i++) { 832 816 ep = xfs_iext_get_ext(ifp, i); 833 817 if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { ··· 1153 1137 goto trans_cancel; 1154 1138 if (XFS_IFORK_Q(ip)) 1155 1139 goto trans_cancel; 1140 + if (ip->i_d.di_anextents != 0) { 1141 + error = -EFSCORRUPTED; 1142 + goto trans_cancel; 1143 + } 1156 1144 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { 1157 1145 /* 1158 1146 * For inodes coming from pre-6.2 filesystems. ··· 1164 1144 ASSERT(ip->i_d.di_aformat == 0); 1165 1145 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 1166 1146 } 1167 - ASSERT(ip->i_d.di_anextents == 0); 1168 1147 1169 1148 xfs_trans_ijoin(tp, ip, 0); 1170 1149 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); ··· 1315 1296 /* 1316 1297 * Here with bp and block set to the leftmost leaf node in the tree. 1317 1298 */ 1318 - room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 1299 + room = xfs_iext_count(ifp); 1319 1300 i = 0; 1320 1301 /* 1321 1302 * Loop over all leaf nodes. Copy information to the extent records. ··· 1380 1361 return error; 1381 1362 block = XFS_BUF_TO_BLOCK(bp); 1382 1363 } 1383 - ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); 1384 - ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); 1364 + if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) 1365 + return -EFSCORRUPTED; 1366 + ASSERT(i == xfs_iext_count(ifp)); 1385 1367 XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); 1386 1368 return 0; 1387 1369 error0: 1388 1370 xfs_trans_brelse(tp, bp); 1389 1371 return -EFSCORRUPTED; 1390 - } 1391 - 1392 - 1393 - /* 1394 - * Search the extent records for the entry containing block bno. 1395 - * If bno lies in a hole, point to the next entry. If bno lies 1396 - * past eof, *eofp will be set, and *prevp will contain the last 1397 - * entry (null if none). Else, *lastxp will be set to the index 1398 - * of the found entry; *gotp will contain the entry. 1399 - */ 1400 - STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ 1401 - xfs_bmap_search_multi_extents( 1402 - xfs_ifork_t *ifp, /* inode fork pointer */ 1403 - xfs_fileoff_t bno, /* block number searched for */ 1404 - int *eofp, /* out: end of file found */ 1405 - xfs_extnum_t *lastxp, /* out: last extent index */ 1406 - xfs_bmbt_irec_t *gotp, /* out: extent entry found */ 1407 - xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ 1408 - { 1409 - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 1410 - xfs_extnum_t lastx; /* last extent index */ 1411 - 1412 - /* 1413 - * Initialize the extent entry structure to catch access to 1414 - * uninitialized br_startblock field. 1415 - */ 1416 - gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL; 1417 - gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL; 1418 - gotp->br_state = XFS_EXT_INVALID; 1419 - gotp->br_startblock = 0xffffa5a5a5a5a5a5LL; 1420 - prevp->br_startoff = NULLFILEOFF; 1421 - 1422 - ep = xfs_iext_bno_to_ext(ifp, bno, &lastx); 1423 - if (lastx > 0) { 1424 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp); 1425 - } 1426 - if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { 1427 - xfs_bmbt_get_all(ep, gotp); 1428 - *eofp = 0; 1429 - } else { 1430 - if (lastx > 0) { 1431 - *gotp = *prevp; 1432 - } 1433 - *eofp = 1; 1434 - ep = NULL; 1435 - } 1436 - *lastxp = lastx; 1437 - return ep; 1438 - } 1439 - 1440 - /* 1441 - * Search the extents list for the inode, for the extent containing bno. 1442 - * If bno lies in a hole, point to the next entry. If bno lies past eof, 1443 - * *eofp will be set, and *prevp will contain the last entry (null if none). 1444 - * Else, *lastxp will be set to the index of the found 1445 - * entry; *gotp will contain the entry. 1446 - */ 1447 - xfs_bmbt_rec_host_t * /* pointer to found extent entry */ 1448 - xfs_bmap_search_extents( 1449 - xfs_inode_t *ip, /* incore inode pointer */ 1450 - xfs_fileoff_t bno, /* block number searched for */ 1451 - int fork, /* data or attr fork */ 1452 - int *eofp, /* out: end of file found */ 1453 - xfs_extnum_t *lastxp, /* out: last extent index */ 1454 - xfs_bmbt_irec_t *gotp, /* out: extent entry found */ 1455 - xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ 1456 - { 1457 - xfs_ifork_t *ifp; /* inode fork pointer */ 1458 - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 1459 - 1460 - XFS_STATS_INC(ip->i_mount, xs_look_exlist); 1461 - ifp = XFS_IFORK_PTR(ip, fork); 1462 - 1463 - ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); 1464 - 1465 - if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && 1466 - !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { 1467 - xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, 1468 - "Access to block zero in inode %llu " 1469 - "start_block: %llx start_off: %llx " 1470 - "blkcnt: %llx extent-state: %x lastx: %x", 1471 - (unsigned long long)ip->i_ino, 1472 - (unsigned long long)gotp->br_startblock, 1473 - (unsigned long long)gotp->br_startoff, 1474 - (unsigned long long)gotp->br_blockcount, 1475 - gotp->br_state, *lastxp); 1476 - *lastxp = NULLEXTNUM; 1477 - *eofp = 1; 1478 - return NULL; 1479 - } 1480 - return ep; 1481 1372 } 1482 1373 1483 1374 /* ··· 1426 1497 (error = xfs_iread_extents(tp, ip, whichfork))) 1427 1498 return error; 1428 1499 lowest = *first_unused; 1429 - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 1500 + nextents = xfs_iext_count(ifp); 1430 1501 for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) { 1431 1502 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); 1432 1503 off = xfs_bmbt_get_startoff(ep); ··· 1452 1523 */ 1453 1524 int /* error */ 1454 1525 xfs_bmap_last_before( 1455 - xfs_trans_t *tp, /* transaction pointer */ 1456 - xfs_inode_t *ip, /* incore inode */ 1457 - xfs_fileoff_t *last_block, /* last block */ 1458 - int whichfork) /* data or attr fork */ 1526 + struct xfs_trans *tp, /* transaction pointer */ 1527 + struct xfs_inode *ip, /* incore inode */ 1528 + xfs_fileoff_t *last_block, /* last block */ 1529 + int whichfork) /* data or attr fork */ 1459 1530 { 1460 - xfs_fileoff_t bno; /* input file offset */ 1461 - int eof; /* hit end of file */ 1462 - xfs_bmbt_rec_host_t *ep; /* pointer to last extent */ 1463 - int error; /* error return value */ 1464 - xfs_bmbt_irec_t got; /* current extent value */ 1465 - xfs_ifork_t *ifp; /* inode fork pointer */ 1466 - xfs_extnum_t lastx; /* last extent used */ 1467 - xfs_bmbt_irec_t prev; /* previous extent value */ 1531 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1532 + struct xfs_bmbt_irec got; 1533 + xfs_extnum_t idx; 1534 + int error; 1468 1535 1469 - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && 1470 - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 1471 - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) 1472 - return -EIO; 1473 - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 1536 + switch (XFS_IFORK_FORMAT(ip, whichfork)) { 1537 + case XFS_DINODE_FMT_LOCAL: 1474 1538 *last_block = 0; 1475 1539 return 0; 1540 + case XFS_DINODE_FMT_BTREE: 1541 + case XFS_DINODE_FMT_EXTENTS: 1542 + break; 1543 + default: 1544 + return -EIO; 1476 1545 } 1477 - ifp = XFS_IFORK_PTR(ip, whichfork); 1478 - if (!(ifp->if_flags & XFS_IFEXTENTS) && 1479 - (error = xfs_iread_extents(tp, ip, whichfork))) 1480 - return error; 1481 - bno = *last_block - 1; 1482 - ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, 1483 - &prev); 1484 - if (eof || xfs_bmbt_get_startoff(ep) > bno) { 1485 - if (prev.br_startoff == NULLFILEOFF) 1486 - *last_block = 0; 1487 - else 1488 - *last_block = prev.br_startoff + prev.br_blockcount; 1546 + 1547 + if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1548 + error = xfs_iread_extents(tp, ip, whichfork); 1549 + if (error) 1550 + return error; 1489 1551 } 1490 - /* 1491 - * Otherwise *last_block is already the right answer. 1492 - */ 1552 + 1553 + if (xfs_iext_lookup_extent(ip, ifp, *last_block - 1, &idx, &got)) { 1554 + if (got.br_startoff <= *last_block - 1) 1555 + return 0; 1556 + } 1557 + 1558 + if (xfs_iext_get_extent(ifp, idx - 1, &got)) { 1559 + *last_block = got.br_startoff + got.br_blockcount; 1560 + return 0; 1561 + } 1562 + 1563 + *last_block = 0; 1493 1564 return 0; 1494 1565 } 1495 1566 ··· 1511 1582 return error; 1512 1583 } 1513 1584 1514 - nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 1585 + nextents = xfs_iext_count(ifp); 1515 1586 if (nextents == 0) { 1516 1587 *is_empty = 1; 1517 1588 return 0; ··· 1664 1735 &bma->ip->i_d.di_nextents); 1665 1736 1666 1737 ASSERT(bma->idx >= 0); 1667 - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); 1738 + ASSERT(bma->idx <= xfs_iext_count(ifp)); 1668 1739 ASSERT(!isnullstartblock(new->br_startblock)); 1669 1740 ASSERT(!bma->cur || 1670 1741 (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); ··· 1723 1794 * Don't set contiguous if the combined extent would be too large. 1724 1795 * Also check for all-three-contiguous being too large. 1725 1796 */ 1726 - if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { 1797 + if (bma->idx < xfs_iext_count(ifp) - 1) { 1727 1798 state |= BMAP_RIGHT_VALID; 1728 1799 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); 1729 1800 ··· 2229 2300 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 2230 2301 2231 2302 ASSERT(*idx >= 0); 2232 - ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); 2303 + ASSERT(*idx <= xfs_iext_count(ifp)); 2233 2304 ASSERT(!isnullstartblock(new->br_startblock)); 2234 2305 2235 2306 XFS_STATS_INC(mp, xs_add_exlist); ··· 2285 2356 * Don't set contiguous if the combined extent would be too large. 2286 2357 * Also check for all-three-contiguous being too large. 2287 2358 */ 2288 - if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { 2359 + if (*idx < xfs_iext_count(&ip->i_df) - 1) { 2289 2360 state |= BMAP_RIGHT_VALID; 2290 2361 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); 2291 2362 if (isnullstartblock(RIGHT.br_startblock)) ··· 2765 2836 * Check and set flags if the current (right) segment exists. 2766 2837 * If it doesn't exist, we're converting the hole at end-of-file. 2767 2838 */ 2768 - if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { 2839 + if (*idx < xfs_iext_count(ifp)) { 2769 2840 state |= BMAP_RIGHT_VALID; 2770 2841 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); 2771 2842 ··· 2895 2966 ifp = XFS_IFORK_PTR(bma->ip, whichfork); 2896 2967 2897 2968 ASSERT(bma->idx >= 0); 2898 - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); 2969 + ASSERT(bma->idx <= xfs_iext_count(ifp)); 2899 2970 ASSERT(!isnullstartblock(new->br_startblock)); 2900 2971 ASSERT(!bma->cur || 2901 2972 !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); ··· 2921 2992 * Check and set flags if this segment has a current value. 2922 2993 * Not true if we're inserting into the "hole" at eof. 2923 2994 */ 2924 - if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { 2995 + if (bma->idx < xfs_iext_count(ifp)) { 2925 2996 state |= BMAP_RIGHT_VALID; 2926 2997 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); 2927 2998 if (isnullstartblock(right.br_startblock)) ··· 4074 4145 struct xfs_mount *mp = ip->i_mount; 4075 4146 struct xfs_ifork *ifp; 4076 4147 struct xfs_bmbt_irec got; 4077 - struct xfs_bmbt_irec prev; 4078 4148 xfs_fileoff_t obno; 4079 4149 xfs_fileoff_t end; 4080 - xfs_extnum_t lastx; 4150 + xfs_extnum_t idx; 4081 4151 int error; 4082 - int eof; 4152 + bool eof = false; 4083 4153 int n = 0; 4084 4154 int whichfork = xfs_bmapi_whichfork(flags); 4085 4155 ··· 4118 4190 return error; 4119 4191 } 4120 4192 4121 - xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); 4193 + if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) 4194 + eof = true; 4122 4195 end = bno + len; 4123 4196 obno = bno; 4124 4197 ··· 4150 4221 break; 4151 4222 4152 4223 /* Else go on to the next record. */ 4153 - if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) 4154 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); 4155 - else 4156 - eof = 1; 4224 + if (!xfs_iext_get_extent(ifp, ++idx, &got)) 4225 + eof = true; 4157 4226 } 4158 4227 *nmap = n; 4159 4228 return 0; ··· 4161 4234 xfs_bmapi_reserve_delalloc( 4162 4235 struct xfs_inode *ip, 4163 4236 int whichfork, 4164 - xfs_fileoff_t aoff, 4237 + xfs_fileoff_t off, 4165 4238 xfs_filblks_t len, 4239 + xfs_filblks_t prealloc, 4166 4240 struct xfs_bmbt_irec *got, 4167 - struct xfs_bmbt_irec *prev, 4168 4241 xfs_extnum_t *lastx, 4169 4242 int eof) 4170 4243 { ··· 4175 4248 char rt = XFS_IS_REALTIME_INODE(ip); 4176 4249 xfs_extlen_t extsz; 4177 4250 int error; 4251 + xfs_fileoff_t aoff = off; 4178 4252 4179 - alen = XFS_FILBLKS_MIN(len, MAXEXTLEN); 4253 + /* 4254 + * Cap the alloc length. Keep track of prealloc so we know whether to 4255 + * tag the inode before we return. 4256 + */ 4257 + alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN); 4180 4258 if (!eof) 4181 4259 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); 4260 + if (prealloc && alen >= len) 4261 + prealloc = alen - len; 4182 4262 4183 4263 /* Figure out the extent size, adjust alen */ 4184 4264 if (whichfork == XFS_COW_FORK) ··· 4193 4259 else 4194 4260 extsz = xfs_get_extsz_hint(ip); 4195 4261 if (extsz) { 4196 - error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, 4262 + struct xfs_bmbt_irec prev; 4263 + 4264 + if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev)) 4265 + prev.br_startoff = NULLFILEOFF; 4266 + 4267 + error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, 4197 4268 1, 0, &aoff, &alen); 4198 4269 ASSERT(!error); 4199 4270 } ··· 4251 4312 */ 4252 4313 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); 4253 4314 4315 + /* 4316 + * Tag the inode if blocks were preallocated. Note that COW fork 4317 + * preallocation can occur at the start or end of the extent, even when 4318 + * prealloc == 0, so we must also check the aligned offset and length. 4319 + */ 4320 + if (whichfork == XFS_DATA_FORK && prealloc) 4321 + xfs_inode_set_eofblocks_tag(ip); 4322 + if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) 4323 + xfs_inode_set_cowblocks_tag(ip); 4324 + 4254 4325 ASSERT(got->br_startoff <= aoff); 4255 4326 ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); 4256 4327 ASSERT(isnullstartblock(got->br_startblock)); ··· 4298 4349 if (bma->wasdel) { 4299 4350 bma->length = (xfs_extlen_t)bma->got.br_blockcount; 4300 4351 bma->offset = bma->got.br_startoff; 4301 - if (bma->idx != NULLEXTNUM && bma->idx) { 4352 + if (bma->idx) { 4302 4353 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), 4303 4354 &bma->prev); 4304 4355 } ··· 4512 4563 struct xfs_ifork *ifp; 4513 4564 struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */ 4514 4565 xfs_fileoff_t end; /* end of mapped file region */ 4515 - int eof; /* after the end of extents */ 4566 + bool eof = false; /* after the end of extents */ 4516 4567 int error; /* error return */ 4517 4568 int n; /* current extent index */ 4518 4569 xfs_fileoff_t obno; /* old block number (offset) */ ··· 4590 4641 goto error0; 4591 4642 } 4592 4643 4593 - xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got, 4594 - &bma.prev); 4595 4644 n = 0; 4596 4645 end = bno + len; 4597 4646 obno = bno; 4598 4647 4648 + if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.idx, &bma.got)) 4649 + eof = true; 4650 + if (!xfs_iext_get_extent(ifp, bma.idx - 1, &bma.prev)) 4651 + bma.prev.br_startoff = NULLFILEOFF; 4599 4652 bma.tp = tp; 4600 4653 bma.ip = ip; 4601 4654 bma.total = total; ··· 4684 4733 4685 4734 /* Else go on to the next record. */ 4686 4735 bma.prev = bma.got; 4687 - if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) { 4688 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx), 4689 - &bma.got); 4690 - } else 4691 - eof = 1; 4736 + if (!xfs_iext_get_extent(ifp, ++bma.idx, &bma.got)) 4737 + eof = true; 4692 4738 } 4693 4739 *nmap = n; 4694 4740 ··· 4833 4885 da_new = 0; 4834 4886 4835 4887 ASSERT(*idx >= 0); 4836 - ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); 4888 + ASSERT(*idx <= xfs_iext_count(ifp)); 4837 4889 ASSERT(del->br_blockcount > 0); 4838 4890 ASSERT(got->br_startoff <= del->br_startoff); 4839 4891 ASSERT(got_endoff >= del_endoff); ··· 4850 4902 * sb counters as we might have to borrow some blocks for the 4851 4903 * indirect block accounting. 4852 4904 */ 4853 - xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0, 4905 + error = xfs_trans_reserve_quota_nblks(NULL, ip, 4906 + -((long)del->br_blockcount), 0, 4854 4907 isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); 4908 + if (error) 4909 + return error; 4855 4910 ip->i_delayed_blks -= del->br_blockcount; 4856 4911 4857 4912 if (whichfork == XFS_COW_FORK) ··· 4964 5013 got_endoff = got->br_startoff + got->br_blockcount; 4965 5014 4966 5015 ASSERT(*idx >= 0); 4967 - ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); 5016 + ASSERT(*idx <= xfs_iext_count(ifp)); 4968 5017 ASSERT(del->br_blockcount > 0); 4969 5018 ASSERT(got->br_startoff <= del->br_startoff); 4970 5019 ASSERT(got_endoff >= del_endoff); ··· 5070 5119 state |= BMAP_COWFORK; 5071 5120 5072 5121 ifp = XFS_IFORK_PTR(ip, whichfork); 5073 - ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / 5074 - (uint)sizeof(xfs_bmbt_rec_t))); 5122 + ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp))); 5075 5123 ASSERT(del->br_blockcount > 0); 5076 5124 ep = xfs_iext_get_ext(ifp, *idx); 5077 5125 xfs_bmbt_get_all(ep, &got); ··· 5384 5434 { 5385 5435 xfs_btree_cur_t *cur; /* bmap btree cursor */ 5386 5436 xfs_bmbt_irec_t del; /* extent being deleted */ 5387 - int eof; /* is deleting at eof */ 5388 - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 5389 5437 int error; /* error return value */ 5390 5438 xfs_extnum_t extno; /* extent number in list */ 5391 5439 xfs_bmbt_irec_t got; /* current extent record */ ··· 5393 5445 int logflags; /* transaction logging flags */ 5394 5446 xfs_extlen_t mod; /* rt extent offset */ 5395 5447 xfs_mount_t *mp; /* mount structure */ 5396 - xfs_extnum_t nextents; /* number of file extents */ 5397 - xfs_bmbt_irec_t prev; /* previous extent record */ 5398 5448 xfs_fileoff_t start; /* first file offset deleted */ 5399 5449 int tmp_logflags; /* partial logging flags */ 5400 5450 int wasdel; /* was a delayed alloc extent */ ··· 5423 5477 if (!(ifp->if_flags & XFS_IFEXTENTS) && 5424 5478 (error = xfs_iread_extents(tp, ip, whichfork))) 5425 5479 return error; 5426 - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 5427 - if (nextents == 0) { 5480 + if (xfs_iext_count(ifp) == 0) { 5428 5481 *rlen = 0; 5429 5482 return 0; 5430 5483 } ··· 5431 5486 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 5432 5487 start = bno; 5433 5488 bno = start + len - 1; 5434 - ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, 5435 - &prev); 5436 5489 5437 5490 /* 5438 5491 * Check to see if the given block number is past the end of the 5439 5492 * file, back up to the last block if so... 5440 5493 */ 5441 - if (eof) { 5442 - ep = xfs_iext_get_ext(ifp, --lastx); 5443 - xfs_bmbt_get_all(ep, &got); 5494 + if (!xfs_iext_lookup_extent(ip, ifp, bno, &lastx, &got)) { 5495 + ASSERT(lastx > 0); 5496 + xfs_iext_get_extent(ifp, --lastx, &got); 5444 5497 bno = got.br_startoff + got.br_blockcount - 1; 5445 5498 } 5499 + 5446 5500 logflags = 0; 5447 5501 if (ifp->if_flags & XFS_IFBROOT) { 5448 5502 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); ··· 5472 5528 if (got.br_startoff > bno) { 5473 5529 if (--lastx < 0) 5474 5530 break; 5475 - ep = xfs_iext_get_ext(ifp, lastx); 5476 - xfs_bmbt_get_all(ep, &got); 5531 + xfs_iext_get_extent(ifp, lastx, &got); 5477 5532 } 5478 5533 /* 5479 5534 * Is the last block of this extent before the range ··· 5486 5543 * Then deal with the (possibly delayed) allocated space 5487 5544 * we found. 5488 5545 */ 5489 - ASSERT(ep != NULL); 5490 5546 del = got; 5491 5547 wasdel = isnullstartblock(del.br_startblock); 5492 5548 if (got.br_startoff < start) { ··· 5566 5624 */ 5567 5625 ASSERT(bno >= del.br_blockcount); 5568 5626 bno -= del.br_blockcount; 5569 - if (got.br_startoff > bno) { 5570 - if (--lastx >= 0) { 5571 - ep = xfs_iext_get_ext(ifp, 5572 - lastx); 5573 - xfs_bmbt_get_all(ep, &got); 5574 - } 5575 - } 5627 + if (got.br_startoff > bno && --lastx >= 0) 5628 + xfs_iext_get_extent(ifp, lastx, &got); 5576 5629 continue; 5577 5630 } else if (del.br_state == XFS_EXT_UNWRITTEN) { 5631 + struct xfs_bmbt_irec prev; 5632 + 5578 5633 /* 5579 5634 * This one is already unwritten. 5580 5635 * It must have a written left neighbor. ··· 5579 5640 * try again. 5580 5641 */ 5581 5642 ASSERT(lastx > 0); 5582 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, 5583 - lastx - 1), &prev); 5643 + xfs_iext_get_extent(ifp, lastx - 1, &prev); 5584 5644 ASSERT(prev.br_state == XFS_EXT_NORM); 5585 5645 ASSERT(!isnullstartblock(prev.br_startblock)); 5586 5646 ASSERT(del.br_startblock == ··· 5677 5739 */ 5678 5740 if (bno != (xfs_fileoff_t)-1 && bno >= start) { 5679 5741 if (lastx >= 0) { 5680 - ep = xfs_iext_get_ext(ifp, lastx); 5681 - if (xfs_bmbt_get_startoff(ep) > bno) { 5682 - if (--lastx >= 0) 5683 - ep = xfs_iext_get_ext(ifp, 5684 - lastx); 5685 - } 5686 - xfs_bmbt_get_all(ep, &got); 5742 + xfs_iext_get_extent(ifp, lastx, &got); 5743 + if (got.br_startoff > bno && --lastx >= 0) 5744 + xfs_iext_get_extent(ifp, lastx, &got); 5687 5745 } 5688 5746 extno++; 5689 5747 } ··· 5897 5963 5898 5964 mp = ip->i_mount; 5899 5965 ifp = XFS_IFORK_PTR(ip, whichfork); 5900 - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5966 + total_extents = xfs_iext_count(ifp); 5901 5967 5902 5968 xfs_bmbt_get_all(gotp, &got); 5903 5969 ··· 6074 6140 * are collapsing out, so we cannot use the count of real extents here. 6075 6141 * Instead we have to calculate it from the incore fork. 6076 6142 */ 6077 - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 6143 + total_extents = xfs_iext_count(ifp); 6078 6144 if (total_extents == 0) { 6079 6145 *done = 1; 6080 6146 goto del_cursor; ··· 6134 6200 * count can change. Update the total and grade the next record. 6135 6201 */ 6136 6202 if (direction == SHIFT_LEFT) { 6137 - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 6203 + total_extents = xfs_iext_count(ifp); 6138 6204 stop_extent = total_extents; 6139 6205 } 6140 6206

+2 -7

fs/xfs/libxfs/xfs_bmap.h

··· 237 237 struct xfs_defer_ops *dfops, enum shift_direction direction, 238 238 int num_exts); 239 239 int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); 240 - struct xfs_bmbt_rec_host * 241 - xfs_bmap_search_extents(struct xfs_inode *ip, xfs_fileoff_t bno, 242 - int fork, int *eofp, xfs_extnum_t *lastxp, 243 - struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp); 244 240 int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, 245 - xfs_fileoff_t aoff, xfs_filblks_t len, 246 - struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *prev, 247 - xfs_extnum_t *lastx, int eof); 241 + xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, 242 + struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof); 248 243 249 244 enum xfs_bmap_intent_type { 250 245 XFS_BMAP_MAP = 1,

+2 -1

fs/xfs/libxfs/xfs_bmap_btree.c

··· 796 796 struct xfs_btree_cur *cur; 797 797 ASSERT(whichfork != XFS_COW_FORK); 798 798 799 - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); 799 + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); 800 800 801 801 cur->bc_tp = tp; 802 802 cur->bc_mp = mp; 803 803 cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; 804 804 cur->bc_btnum = XFS_BTNUM_BMAP; 805 805 cur->bc_blocklog = mp->m_sb.sb_blocklog; 806 + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2); 806 807 807 808 cur->bc_ops = &xfs_bmbt_ops; 808 809 cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;

+20

fs/xfs/libxfs/xfs_btree.c

··· 1769 1769 if (error) 1770 1770 return error; 1771 1771 1772 + /* Check the inode owner since the verifiers don't. */ 1773 + if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) && 1774 + (cur->bc_flags & XFS_BTREE_LONG_PTRS) && 1775 + be64_to_cpu((*blkp)->bb_u.l.bb_owner) != 1776 + cur->bc_private.b.ip->i_ino) 1777 + goto out_bad; 1778 + 1779 + /* Did we get the level we were looking for? */ 1780 + if (be16_to_cpu((*blkp)->bb_level) != level) 1781 + goto out_bad; 1782 + 1783 + /* Check that internal nodes have at least one record. */ 1784 + if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0) 1785 + goto out_bad; 1786 + 1772 1787 xfs_btree_setbuf(cur, level, bp); 1773 1788 return 0; 1789 + 1790 + out_bad: 1791 + *blkp = NULL; 1792 + xfs_trans_brelse(cur->bc_tp, bp); 1793 + return -EFSCORRUPTED; 1774 1794 } 1775 1795 1776 1796 /*

+4 -39

fs/xfs/libxfs/xfs_btree.h

··· 96 96 /* 97 97 * Generic stats interface 98 98 */ 99 - #define __XFS_BTREE_STATS_INC(mp, type, stat) \ 100 - XFS_STATS_INC(mp, xs_ ## type ## _2_ ## stat) 101 99 #define XFS_BTREE_STATS_INC(cur, stat) \ 102 - do { \ 103 - struct xfs_mount *__mp = cur->bc_mp; \ 104 - switch (cur->bc_btnum) { \ 105 - case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(__mp, abtb, stat); break; \ 106 - case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(__mp, abtc, stat); break; \ 107 - case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \ 108 - case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \ 109 - case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \ 110 - case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(__mp, rmap, stat); break; \ 111 - case XFS_BTNUM_REFC: __XFS_BTREE_STATS_INC(__mp, refcbt, stat); break; \ 112 - case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 113 - } \ 114 - } while (0) 115 - 116 - #define __XFS_BTREE_STATS_ADD(mp, type, stat, val) \ 117 - XFS_STATS_ADD(mp, xs_ ## type ## _2_ ## stat, val) 118 - #define XFS_BTREE_STATS_ADD(cur, stat, val) \ 119 - do { \ 120 - struct xfs_mount *__mp = cur->bc_mp; \ 121 - switch (cur->bc_btnum) { \ 122 - case XFS_BTNUM_BNO: \ 123 - __XFS_BTREE_STATS_ADD(__mp, abtb, stat, val); break; \ 124 - case XFS_BTNUM_CNT: \ 125 - __XFS_BTREE_STATS_ADD(__mp, abtc, stat, val); break; \ 126 - case XFS_BTNUM_BMAP: \ 127 - __XFS_BTREE_STATS_ADD(__mp, bmbt, stat, val); break; \ 128 - case XFS_BTNUM_INO: \ 129 - __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \ 130 - case XFS_BTNUM_FINO: \ 131 - __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \ 132 - case XFS_BTNUM_RMAP: \ 133 - __XFS_BTREE_STATS_ADD(__mp, rmap, stat, val); break; \ 134 - case XFS_BTNUM_REFC: \ 135 - __XFS_BTREE_STATS_ADD(__mp, refcbt, stat, val); break; \ 136 - case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 137 - } \ 138 - } while (0) 100 + XFS_STATS_INC_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat) 101 + #define XFS_BTREE_STATS_ADD(cur, stat, val) \ 102 + XFS_STATS_ADD_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat, val) 139 103 140 104 #define XFS_BTREE_MAXLEVELS 9 /* max of all btrees */ 141 105 ··· 217 253 __uint8_t bc_nlevels; /* number of levels in the tree */ 218 254 __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ 219 255 xfs_btnum_t bc_btnum; /* identifies which btree type */ 256 + int bc_statoff; /* offset of btre stats array */ 220 257 union { 221 258 struct { /* needed for BNO, CNT, INO */ 222 259 struct xfs_buf *agbp; /* agf/agi buffer pointer */

+22 -4

fs/xfs/libxfs/xfs_cksum.h

··· 6 6 /* 7 7 * Calculate the intermediate checksum for a buffer that has the CRC field 8 8 * inside it. The offset of the 32bit crc fields is passed as the 9 - * cksum_offset parameter. 9 + * cksum_offset parameter. We do not modify the buffer during verification, 10 + * hence we have to split the CRC calculation across the cksum_offset. 10 11 */ 11 12 static inline __uint32_t 12 - xfs_start_cksum(char *buffer, size_t length, unsigned long cksum_offset) 13 + xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset) 13 14 { 14 15 __uint32_t zero = 0; 15 16 __uint32_t crc; ··· 24 23 /* Calculate the rest of the CRC. */ 25 24 return crc32c(crc, &buffer[cksum_offset + sizeof(__be32)], 26 25 length - (cksum_offset + sizeof(__be32))); 26 + } 27 + 28 + /* 29 + * Fast CRC method where the buffer is modified. Callers must have exclusive 30 + * access to the buffer while the calculation takes place. 31 + */ 32 + static inline __uint32_t 33 + xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset) 34 + { 35 + /* zero the CRC field */ 36 + *(__le32 *)(buffer + cksum_offset) = 0; 37 + 38 + /* single pass CRC calculation for the entire buffer */ 39 + return crc32c(XFS_CRC_SEED, buffer, length); 27 40 } 28 41 29 42 /* ··· 55 40 56 41 /* 57 42 * Helper to generate the checksum for a buffer. 43 + * 44 + * This modifies the buffer temporarily - callers must have exclusive 45 + * access to the buffer while the calculation takes place. 58 46 */ 59 47 static inline void 60 48 xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset) 61 49 { 62 - __uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset); 50 + __uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset); 63 51 64 52 *(__le32 *)(buffer + cksum_offset) = xfs_end_cksum(crc); 65 53 } ··· 73 55 static inline int 74 56 xfs_verify_cksum(char *buffer, size_t length, unsigned long cksum_offset) 75 57 { 76 - __uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset); 58 + __uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset); 77 59 78 60 return *(__le32 *)(buffer + cksum_offset) == xfs_end_cksum(crc); 79 61 }

+1 -1

fs/xfs/libxfs/xfs_dir2.c

··· 93 93 return result; 94 94 } 95 95 96 - static struct xfs_nameops xfs_ascii_ci_nameops = { 96 + static const struct xfs_nameops xfs_ascii_ci_nameops = { 97 97 .hashname = xfs_ascii_ci_hashname, 98 98 .compname = xfs_ascii_ci_compname, 99 99 };

+5

fs/xfs/libxfs/xfs_dir2.h

··· 157 157 extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, 158 158 struct xfs_buf *bp); 159 159 160 + extern void xfs_dir2_data_freescan_int(struct xfs_da_geometry *geo, 161 + const struct xfs_dir_ops *ops, 162 + struct xfs_dir2_data_hdr *hdr, int *loghead); 160 163 extern void xfs_dir2_data_freescan(struct xfs_inode *dp, 161 164 struct xfs_dir2_data_hdr *hdr, int *loghead); 162 165 extern void xfs_dir2_data_log_entry(struct xfs_da_args *args, ··· 179 176 extern struct xfs_dir2_data_free *xfs_dir2_data_freefind( 180 177 struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf, 181 178 struct xfs_dir2_data_unused *dup); 179 + 180 + extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); 182 181 183 182 extern const struct xfs_buf_ops xfs_dir3_block_buf_ops; 184 183 extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;

+18 -8

fs/xfs/libxfs/xfs_dir2_data.c

··· 329 329 330 330 err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp, 331 331 XFS_DATA_FORK, &xfs_dir3_data_buf_ops); 332 - if (!err && tp) 332 + if (!err && tp && *bpp) 333 333 xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); 334 334 return err; 335 335 } ··· 505 505 * Given a data block, reconstruct its bestfree map. 506 506 */ 507 507 void 508 - xfs_dir2_data_freescan( 509 - struct xfs_inode *dp, 508 + xfs_dir2_data_freescan_int( 509 + struct xfs_da_geometry *geo, 510 + const struct xfs_dir_ops *ops, 510 511 struct xfs_dir2_data_hdr *hdr, 511 512 int *loghead) 512 513 { ··· 517 516 struct xfs_dir2_data_free *bf; 518 517 char *endp; /* end of block's data */ 519 518 char *p; /* current entry pointer */ 520 - struct xfs_da_geometry *geo = dp->i_mount->m_dir_geo; 521 519 522 520 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 523 521 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || ··· 526 526 /* 527 527 * Start by clearing the table. 528 528 */ 529 - bf = dp->d_ops->data_bestfree_p(hdr); 529 + bf = ops->data_bestfree_p(hdr); 530 530 memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT); 531 531 *loghead = 1; 532 532 /* 533 533 * Set up pointers. 534 534 */ 535 - p = (char *)dp->d_ops->data_entry_p(hdr); 535 + p = (char *)ops->data_entry_p(hdr); 536 536 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || 537 537 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { 538 538 btp = xfs_dir2_block_tail_p(geo, hdr); ··· 559 559 else { 560 560 dep = (xfs_dir2_data_entry_t *)p; 561 561 ASSERT((char *)dep - (char *)hdr == 562 - be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep))); 563 - p += dp->d_ops->data_entsize(dep->namelen); 562 + be16_to_cpu(*ops->data_entry_tag_p(dep))); 563 + p += ops->data_entsize(dep->namelen); 564 564 } 565 565 } 566 + } 567 + 568 + void 569 + xfs_dir2_data_freescan( 570 + struct xfs_inode *dp, 571 + struct xfs_dir2_data_hdr *hdr, 572 + int *loghead) 573 + { 574 + return xfs_dir2_data_freescan_int(dp->i_mount->m_dir_geo, dp->d_ops, 575 + hdr, loghead); 566 576 } 567 577 568 578 /*

-1

fs/xfs/libxfs/xfs_dir2_priv.h

··· 21 21 struct dir_context; 22 22 23 23 /* xfs_dir2.c */ 24 - extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); 25 24 extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, 26 25 xfs_dir2_db_t *dbp); 27 26 extern int xfs_dir_cilookup_result(struct xfs_da_args *args,

+13 -5

fs/xfs/libxfs/xfs_ialloc.c

··· 2344 2344 2345 2345 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); 2346 2346 imap->im_len = XFS_FSB_TO_BB(mp, 1); 2347 - imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); 2347 + imap->im_boffset = (unsigned short)(offset << 2348 + mp->m_sb.sb_inodelog); 2348 2349 return 0; 2349 2350 } 2350 2351 ··· 2373 2372 2374 2373 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); 2375 2374 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); 2376 - imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); 2375 + imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); 2377 2376 2378 2377 /* 2379 2378 * If the inode number maps to a block outside the bounds ··· 2451 2450 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 2452 2451 #endif 2453 2452 2454 - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); 2455 - 2456 2453 /* 2457 2454 * Compute byte offsets for the first and last fields in the first 2458 2455 * region and log the agi buffer. This only logs up through ··· 2511 2512 if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) 2512 2513 return false; 2513 2514 2514 - if (be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) 2515 + if (be32_to_cpu(agi->agi_level) < 1 || 2516 + be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) 2515 2517 return false; 2518 + 2519 + if (xfs_sb_version_hasfinobt(&mp->m_sb) && 2520 + (be32_to_cpu(agi->agi_free_level) < 1 || 2521 + be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS)) 2522 + return false; 2523 + 2516 2524 /* 2517 2525 * during growfs operations, the perag is not fully initialised, 2518 2526 * so we can't use it for any useful checking. growfs ensures we can't ··· 2598 2592 XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); 2599 2593 if (error) 2600 2594 return error; 2595 + if (tp) 2596 + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_AGI_BUF); 2601 2597 2602 2598 xfs_buf_set_ref(*bpp, XFS_AGI_REF); 2603 2599 return 0;

+3 -1

fs/xfs/libxfs/xfs_ialloc_btree.c

··· 357 357 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 358 358 struct xfs_btree_cur *cur; 359 359 360 - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); 360 + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); 361 361 362 362 cur->bc_tp = tp; 363 363 cur->bc_mp = mp; ··· 365 365 if (btnum == XFS_BTNUM_INO) { 366 366 cur->bc_nlevels = be32_to_cpu(agi->agi_level); 367 367 cur->bc_ops = &xfs_inobt_ops; 368 + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_ibt_2); 368 369 } else { 369 370 cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); 370 371 cur->bc_ops = &xfs_finobt_ops; 372 + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_fibt_2); 371 373 } 372 374 373 375 cur->bc_blocklog = mp->m_sb.sb_blocklog;

+12 -4

fs/xfs/libxfs/xfs_inode_buf.c

··· 383 383 static bool 384 384 xfs_dinode_verify( 385 385 struct xfs_mount *mp, 386 - struct xfs_inode *ip, 386 + xfs_ino_t ino, 387 387 struct xfs_dinode *dip) 388 388 { 389 389 uint16_t flags; 390 390 uint64_t flags2; 391 391 392 392 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 393 + return false; 394 + 395 + /* don't allow invalid i_size */ 396 + if (be64_to_cpu(dip->di_size) & (1ULL << 63)) 397 + return false; 398 + 399 + /* No zero-length symlinks. */ 400 + if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0) 393 401 return false; 394 402 395 403 /* only version 3 or greater inodes are extensively verified here */ ··· 409 401 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 410 402 XFS_DINODE_CRC_OFF)) 411 403 return false; 412 - if (be64_to_cpu(dip->di_ino) != ip->i_ino) 404 + if (be64_to_cpu(dip->di_ino) != ino) 413 405 return false; 414 406 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 415 407 return false; ··· 444 436 return; 445 437 446 438 ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); 447 - crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, 439 + crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, 448 440 XFS_DINODE_CRC_OFF); 449 441 dip->di_crc = xfs_end_cksum(crc); 450 442 } ··· 501 493 return error; 502 494 503 495 /* even unallocated inodes are verified */ 504 - if (!xfs_dinode_verify(mp, ip, dip)) { 496 + if (!xfs_dinode_verify(mp, ip->i_ino, dip)) { 505 497 xfs_alert(mp, "%s: validation failed for inode %lld failed", 506 498 __func__, ip->i_ino); 507 499

+2 -2

fs/xfs/libxfs/xfs_inode_buf.h

··· 58 58 */ 59 59 struct xfs_imap { 60 60 xfs_daddr_t im_blkno; /* starting BB of inode chunk */ 61 - ushort im_len; /* length in BBs of inode chunk */ 62 - ushort im_boffset; /* inode offset in block in bytes */ 61 + unsigned short im_len; /* length in BBs of inode chunk */ 62 + unsigned short im_boffset; /* inode offset in block in bytes */ 63 63 }; 64 64 65 65 int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,

+65 -12

fs/xfs/libxfs/xfs_inode_fork.c

··· 775 775 } 776 776 } 777 777 778 + /* Count number of incore extents based on if_bytes */ 779 + xfs_extnum_t 780 + xfs_iext_count(struct xfs_ifork *ifp) 781 + { 782 + return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 783 + } 784 + 778 785 /* 779 786 * Convert in-core extents to on-disk form 780 787 * ··· 810 803 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 811 804 ASSERT(ifp->if_bytes > 0); 812 805 813 - nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 806 + nrecs = xfs_iext_count(ifp); 814 807 XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); 815 808 ASSERT(nrecs > 0); 816 809 ··· 948 941 xfs_extnum_t idx) /* index of target extent */ 949 942 { 950 943 ASSERT(idx >= 0); 951 - ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); 944 + ASSERT(idx < xfs_iext_count(ifp)); 952 945 953 946 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { 954 947 return ifp->if_u1.if_ext_irec->er_extbuf; ··· 1024 1017 int new_size; /* size of extents after adding */ 1025 1018 xfs_extnum_t nextents; /* number of extents in file */ 1026 1019 1027 - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 1020 + nextents = xfs_iext_count(ifp); 1028 1021 ASSERT((idx >= 0) && (idx <= nextents)); 1029 1022 byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); 1030 1023 new_size = ifp->if_bytes + byte_diff; ··· 1248 1241 trace_xfs_iext_remove(ip, idx, state, _RET_IP_); 1249 1242 1250 1243 ASSERT(ext_diff > 0); 1251 - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 1244 + nextents = xfs_iext_count(ifp); 1252 1245 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); 1253 1246 1254 1247 if (new_size == 0) { ··· 1277 1270 1278 1271 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 1279 1272 ASSERT(idx < XFS_INLINE_EXTS); 1280 - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 1273 + nextents = xfs_iext_count(ifp); 1281 1274 ASSERT(((nextents - ext_diff) > 0) && 1282 1275 (nextents - ext_diff) < XFS_INLINE_EXTS); 1283 1276 ··· 1316 1309 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 1317 1310 new_size = ifp->if_bytes - 1318 1311 (ext_diff * sizeof(xfs_bmbt_rec_t)); 1319 - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 1312 + nextents = xfs_iext_count(ifp); 1320 1313 1321 1314 if (new_size == 0) { 1322 1315 xfs_iext_destroy(ifp); ··· 1553 1546 int size; /* size of file extents */ 1554 1547 1555 1548 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1556 - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 1549 + nextents = xfs_iext_count(ifp); 1557 1550 ASSERT(nextents <= XFS_LINEAR_EXTS); 1558 1551 size = nextents * sizeof(xfs_bmbt_rec_t); 1559 1552 ··· 1627 1620 xfs_extnum_t nextents; /* number of file extents */ 1628 1621 xfs_fileoff_t startoff = 0; /* start offset of extent */ 1629 1622 1630 - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 1623 + nextents = xfs_iext_count(ifp); 1631 1624 if (nextents == 0) { 1632 1625 *idxp = 0; 1633 1626 return NULL; ··· 1740 1733 1741 1734 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1742 1735 ASSERT(page_idx >= 0); 1743 - ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); 1744 - ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); 1736 + ASSERT(page_idx <= xfs_iext_count(ifp)); 1737 + ASSERT(page_idx < xfs_iext_count(ifp) || realloc); 1745 1738 1746 1739 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 1747 1740 erp_idx = 0; ··· 1789 1782 xfs_extnum_t nextents; /* number of extents in file */ 1790 1783 1791 1784 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 1792 - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 1785 + nextents = xfs_iext_count(ifp); 1793 1786 ASSERT(nextents <= XFS_LINEAR_EXTS); 1794 1787 1795 1788 erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); ··· 1913 1906 1914 1907 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 1915 1908 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 1916 - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 1909 + nextents = xfs_iext_count(ifp); 1917 1910 1918 1911 if (nextents == 0) { 1919 1912 xfs_iext_destroy(ifp); ··· 2002 1995 ip->i_cowfp->if_flags = XFS_IFEXTENTS; 2003 1996 ip->i_cformat = XFS_DINODE_FMT_EXTENTS; 2004 1997 ip->i_cnextents = 0; 1998 + } 1999 + 2000 + /* 2001 + * Lookup the extent covering bno. 2002 + * 2003 + * If there is an extent covering bno return the extent index, and store the 2004 + * expanded extent structure in *gotp, and the extent index in *idx. 2005 + * If there is no extent covering bno, but there is an extent after it (e.g. 2006 + * it lies in a hole) return that extent in *gotp and its index in *idx 2007 + * instead. 2008 + * If bno is beyond the last extent return false, and return the index after 2009 + * the last valid index in *idxp. 2010 + */ 2011 + bool 2012 + xfs_iext_lookup_extent( 2013 + struct xfs_inode *ip, 2014 + struct xfs_ifork *ifp, 2015 + xfs_fileoff_t bno, 2016 + xfs_extnum_t *idxp, 2017 + struct xfs_bmbt_irec *gotp) 2018 + { 2019 + struct xfs_bmbt_rec_host *ep; 2020 + 2021 + XFS_STATS_INC(ip->i_mount, xs_look_exlist); 2022 + 2023 + ep = xfs_iext_bno_to_ext(ifp, bno, idxp); 2024 + if (!ep) 2025 + return false; 2026 + xfs_bmbt_get_all(ep, gotp); 2027 + return true; 2028 + } 2029 + 2030 + /* 2031 + * Return true if there is an extent at index idx, and return the expanded 2032 + * extent structure at idx in that case. Else return false. 2033 + */ 2034 + bool 2035 + xfs_iext_get_extent( 2036 + struct xfs_ifork *ifp, 2037 + xfs_extnum_t idx, 2038 + struct xfs_bmbt_irec *gotp) 2039 + { 2040 + if (idx < 0 || idx >= xfs_iext_count(ifp)) 2041 + return false; 2042 + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp); 2043 + return true; 2005 2044 }

+7

fs/xfs/libxfs/xfs_inode_fork.h

··· 152 152 153 153 struct xfs_bmbt_rec_host * 154 154 xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t); 155 + xfs_extnum_t xfs_iext_count(struct xfs_ifork *); 155 156 void xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t, 156 157 struct xfs_bmbt_irec *, int); 157 158 void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int); ··· 181 180 void xfs_iext_irec_compact_pages(struct xfs_ifork *); 182 181 void xfs_iext_irec_compact_full(struct xfs_ifork *); 183 182 void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int); 183 + 184 + bool xfs_iext_lookup_extent(struct xfs_inode *ip, 185 + struct xfs_ifork *ifp, xfs_fileoff_t bno, 186 + xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp); 187 + bool xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx, 188 + struct xfs_bmbt_irec *gotp); 184 189 185 190 extern struct kmem_zone *xfs_ifork_zone; 186 191

+2 -2

fs/xfs/libxfs/xfs_log_format.h

··· 481 481 typedef struct xfs_buf_log_format { 482 482 unsigned short blf_type; /* buf log item type indicator */ 483 483 unsigned short blf_size; /* size of this item */ 484 - ushort blf_flags; /* misc state */ 485 - ushort blf_len; /* number of blocks in this buf */ 484 + unsigned short blf_flags; /* misc state */ 485 + unsigned short blf_len; /* number of blocks in this buf */ 486 486 __int64_t blf_blkno; /* starting blkno of this buf */ 487 487 unsigned int blf_map_size; /* used size of data bitmap in words */ 488 488 unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */

+1 -1

fs/xfs/libxfs/xfs_log_recover.h

··· 52 52 struct list_head r_itemq; /* q for items */ 53 53 } xlog_recover_t; 54 54 55 - #define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr) 55 + #define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].i_addr) 56 56 57 57 /* 58 58 * This is the number of entries in the l_buf_cancel_table used during

+1

fs/xfs/libxfs/xfs_refcount_btree.c

··· 354 354 cur->bc_btnum = XFS_BTNUM_REFC; 355 355 cur->bc_blocklog = mp->m_sb.sb_blocklog; 356 356 cur->bc_ops = &xfs_refcountbt_ops; 357 + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2); 357 358 358 359 cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level); 359 360

+1

fs/xfs/libxfs/xfs_rmap_btree.c

··· 484 484 cur->bc_blocklog = mp->m_sb.sb_blocklog; 485 485 cur->bc_ops = &xfs_rmapbt_ops; 486 486 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); 487 + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); 487 488 488 489 cur->bc_private.a.agbp = agbp; 489 490 cur->bc_private.a.agno = agno;

-1

fs/xfs/libxfs/xfs_rtbitmap.c

··· 1016 1016 } 1017 1017 return 0; 1018 1018 } 1019 -

+11 -2

fs/xfs/libxfs/xfs_sb.c

··· 262 262 return -EFSCORRUPTED; 263 263 } 264 264 265 + if (xfs_sb_version_hascrc(&mp->m_sb) && 266 + sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { 267 + xfs_notice(mp, "v5 SB sanity check failed"); 268 + return -EFSCORRUPTED; 269 + } 270 + 265 271 /* 266 272 * Until this is fixed only page-sized or smaller data blocks work. 267 273 */ ··· 344 338 XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD; 345 339 sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD); 346 340 347 - if (sbp->sb_qflags & XFS_PQUOTA_ACCT) { 341 + if (sbp->sb_qflags & XFS_PQUOTA_ACCT && 342 + sbp->sb_gquotino != NULLFSINO) { 348 343 /* 349 344 * In older version of superblock, on-disk superblock only 350 345 * has sb_gquotino, and in-core superblock has both sb_gquotino 351 346 * and sb_pquotino. But, only one of them is supported at any 352 347 * point of time. So, if PQUOTA is set in disk superblock, 353 - * copy over sb_gquotino to sb_pquotino. 348 + * copy over sb_gquotino to sb_pquotino. The NULLFSINO test 349 + * above is to make sure we don't do this twice and wipe them 350 + * both out! 354 351 */ 355 352 sbp->sb_pquotino = sbp->sb_gquotino; 356 353 sbp->sb_gquotino = NULLFSINO;

+3 -1

fs/xfs/libxfs/xfs_types.h

··· 57 57 58 58 #define NULLAGBLOCK ((xfs_agblock_t)-1) 59 59 #define NULLAGNUMBER ((xfs_agnumber_t)-1) 60 - #define NULLEXTNUM ((xfs_extnum_t)-1) 61 60 62 61 #define NULLCOMMITLSN ((xfs_lsn_t)-1) 63 62 ··· 74 75 * Minimum and maximum blocksize and sectorsize. 75 76 * The blocksize upper limit is pretty much arbitrary. 76 77 * The sectorsize upper limit is due to sizeof(sb_sectsize). 78 + * CRC enable filesystems use 512 byte inodes, meaning 512 byte block sizes 79 + * cannot be used. 77 80 */ 78 81 #define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */ 79 82 #define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */ 80 83 #define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG) 81 84 #define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG) 85 + #define XFS_MIN_CRC_BLOCKSIZE (1 << (XFS_MIN_BLOCKSIZE_LOG + 1)) 82 86 #define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */ 83 87 #define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */ 84 88 #define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG)

+13 -273

fs/xfs/xfs_aops.c

··· 37 37 #include <linux/pagevec.h> 38 38 #include <linux/writeback.h> 39 39 40 - /* flags for direct write completions */ 41 - #define XFS_DIO_FLAG_UNWRITTEN (1 << 0) 42 - #define XFS_DIO_FLAG_APPEND (1 << 1) 43 - #define XFS_DIO_FLAG_COW (1 << 2) 44 - 45 40 /* 46 41 * structure owned by writepages passed to individual writepage calls 47 42 */ ··· 771 776 { 772 777 struct xfs_inode *ip = XFS_I(inode); 773 778 struct xfs_bmbt_irec imap; 774 - bool is_cow = false, need_alloc = false; 779 + bool is_cow = false; 775 780 int error; 776 781 777 782 /* ··· 789 794 * Else we need to check if there is a COW mapping at this offset. 790 795 */ 791 796 xfs_ilock(ip, XFS_ILOCK_SHARED); 792 - is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc); 797 + is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap); 793 798 xfs_iunlock(ip, XFS_ILOCK_SHARED); 794 799 795 800 if (!is_cow) ··· 799 804 * And if the COW mapping has a delayed extent here we need to 800 805 * allocate real space for it now. 801 806 */ 802 - if (need_alloc) { 807 + if (isnullstartblock(imap.br_startblock)) { 803 808 error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset, 804 809 &imap); 805 810 if (error) ··· 1170 1175 } 1171 1176 1172 1177 /* 1173 - * When we map a DIO buffer, we may need to pass flags to 1174 - * xfs_end_io_direct_write to tell it what kind of write IO we are doing. 1175 - * 1176 - * Note that for DIO, an IO to the highest supported file block offset (i.e. 1177 - * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64 1178 - * bit variable. Hence if we see this overflow, we have to assume that the IO is 1179 - * extending the file size. We won't know for sure until IO completion is run 1180 - * and the actual max write offset is communicated to the IO completion 1181 - * routine. 1182 - */ 1183 - static void 1184 - xfs_map_direct( 1185 - struct inode *inode, 1186 - struct buffer_head *bh_result, 1187 - struct xfs_bmbt_irec *imap, 1188 - xfs_off_t offset, 1189 - bool is_cow) 1190 - { 1191 - uintptr_t *flags = (uintptr_t *)&bh_result->b_private; 1192 - xfs_off_t size = bh_result->b_size; 1193 - 1194 - trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size, 1195 - ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : is_cow ? XFS_IO_COW : 1196 - XFS_IO_OVERWRITE, imap); 1197 - 1198 - if (ISUNWRITTEN(imap)) { 1199 - *flags |= XFS_DIO_FLAG_UNWRITTEN; 1200 - set_buffer_defer_completion(bh_result); 1201 - } else if (is_cow) { 1202 - *flags |= XFS_DIO_FLAG_COW; 1203 - set_buffer_defer_completion(bh_result); 1204 - } 1205 - if (offset + size > i_size_read(inode) || offset + size < 0) { 1206 - *flags |= XFS_DIO_FLAG_APPEND; 1207 - set_buffer_defer_completion(bh_result); 1208 - } 1209 - } 1210 - 1211 - /* 1212 1178 * If this is O_DIRECT or the mpage code calling tell them how large the mapping 1213 1179 * is, so that we can avoid repeated get_blocks calls. 1214 1180 * ··· 1209 1253 bh_result->b_size = mapping_size; 1210 1254 } 1211 1255 1212 - /* Bounce unaligned directio writes to the page cache. */ 1213 1256 static int 1214 - xfs_bounce_unaligned_dio_write( 1215 - struct xfs_inode *ip, 1216 - xfs_fileoff_t offset_fsb, 1217 - struct xfs_bmbt_irec *imap) 1218 - { 1219 - struct xfs_bmbt_irec irec; 1220 - xfs_fileoff_t delta; 1221 - bool shared; 1222 - bool x; 1223 - int error; 1224 - 1225 - irec = *imap; 1226 - if (offset_fsb > irec.br_startoff) { 1227 - delta = offset_fsb - irec.br_startoff; 1228 - irec.br_blockcount -= delta; 1229 - irec.br_startblock += delta; 1230 - irec.br_startoff = offset_fsb; 1231 - } 1232 - error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x); 1233 - if (error) 1234 - return error; 1235 - 1236 - /* 1237 - * We're here because we're trying to do a directio write to a 1238 - * region that isn't aligned to a filesystem block. If any part 1239 - * of the extent is shared, fall back to buffered mode to handle 1240 - * the RMW. This is done by returning -EREMCHG ("remote addr 1241 - * changed"), which is caught further up the call stack. 1242 - */ 1243 - if (shared) { 1244 - trace_xfs_reflink_bounce_dio_write(ip, imap); 1245 - return -EREMCHG; 1246 - } 1247 - return 0; 1248 - } 1249 - 1250 - STATIC int 1251 - __xfs_get_blocks( 1257 + xfs_get_blocks( 1252 1258 struct inode *inode, 1253 1259 sector_t iblock, 1254 1260 struct buffer_head *bh_result, 1255 - int create, 1256 - bool direct) 1261 + int create) 1257 1262 { 1258 1263 struct xfs_inode *ip = XFS_I(inode); 1259 1264 struct xfs_mount *mp = ip->i_mount; ··· 1225 1308 int nimaps = 1; 1226 1309 xfs_off_t offset; 1227 1310 ssize_t size; 1228 - int new = 0; 1229 - bool is_cow = false; 1230 - bool need_alloc = false; 1231 1311 1232 - BUG_ON(create && !direct); 1312 + BUG_ON(create); 1233 1313 1234 1314 if (XFS_FORCED_SHUTDOWN(mp)) 1235 1315 return -EIO; ··· 1235 1321 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1236 1322 size = bh_result->b_size; 1237 1323 1238 - if (!create && offset >= i_size_read(inode)) 1324 + if (offset >= i_size_read(inode)) 1239 1325 return 0; 1240 1326 1241 1327 /* ··· 1250 1336 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); 1251 1337 offset_fsb = XFS_B_TO_FSBT(mp, offset); 1252 1338 1253 - if (create && direct && xfs_is_reflink_inode(ip)) 1254 - is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, 1255 - &need_alloc); 1256 - if (!is_cow) { 1257 - error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 1258 - &imap, &nimaps, XFS_BMAPI_ENTIRE); 1259 - /* 1260 - * Truncate an overwrite extent if there's a pending CoW 1261 - * reservation before the end of this extent. This 1262 - * forces us to come back to get_blocks to take care of 1263 - * the CoW. 1264 - */ 1265 - if (create && direct && nimaps && 1266 - imap.br_startblock != HOLESTARTBLOCK && 1267 - imap.br_startblock != DELAYSTARTBLOCK && 1268 - !ISUNWRITTEN(&imap)) 1269 - xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, 1270 - &imap); 1271 - } 1272 - ASSERT(!need_alloc); 1339 + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 1340 + &imap, &nimaps, XFS_BMAPI_ENTIRE); 1273 1341 if (error) 1274 1342 goto out_unlock; 1275 1343 1276 - /* for DAX, we convert unwritten extents directly */ 1277 - if (create && 1278 - (!nimaps || 1279 - (imap.br_startblock == HOLESTARTBLOCK || 1280 - imap.br_startblock == DELAYSTARTBLOCK) || 1281 - (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { 1282 - /* 1283 - * xfs_iomap_write_direct() expects the shared lock. It 1284 - * is unlocked on return. 1285 - */ 1286 - if (lockmode == XFS_ILOCK_EXCL) 1287 - xfs_ilock_demote(ip, lockmode); 1288 - 1289 - error = xfs_iomap_write_direct(ip, offset, size, 1290 - &imap, nimaps); 1291 - if (error) 1292 - return error; 1293 - new = 1; 1294 - 1295 - trace_xfs_get_blocks_alloc(ip, offset, size, 1296 - ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN 1297 - : XFS_IO_DELALLOC, &imap); 1298 - } else if (nimaps) { 1344 + if (nimaps) { 1299 1345 trace_xfs_get_blocks_found(ip, offset, size, 1300 1346 ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN 1301 1347 : XFS_IO_OVERWRITE, &imap); ··· 1263 1389 } else { 1264 1390 trace_xfs_get_blocks_notfound(ip, offset, size); 1265 1391 goto out_unlock; 1266 - } 1267 - 1268 - if (IS_DAX(inode) && create) { 1269 - ASSERT(!ISUNWRITTEN(&imap)); 1270 - /* zeroing is not needed at a higher layer */ 1271 - new = 0; 1272 1392 } 1273 1393 1274 1394 /* trim mapping down to size requested */ ··· 1274 1406 */ 1275 1407 if (imap.br_startblock != HOLESTARTBLOCK && 1276 1408 imap.br_startblock != DELAYSTARTBLOCK && 1277 - (create || !ISUNWRITTEN(&imap))) { 1278 - if (create && direct && !is_cow) { 1279 - error = xfs_bounce_unaligned_dio_write(ip, offset_fsb, 1280 - &imap); 1281 - if (error) 1282 - return error; 1283 - } 1284 - 1409 + !ISUNWRITTEN(&imap)) 1285 1410 xfs_map_buffer(inode, bh_result, &imap, offset); 1286 - if (ISUNWRITTEN(&imap)) 1287 - set_buffer_unwritten(bh_result); 1288 - /* direct IO needs special help */ 1289 - if (create) 1290 - xfs_map_direct(inode, bh_result, &imap, offset, is_cow); 1291 - } 1292 1411 1293 1412 /* 1294 1413 * If this is a realtime file, data may be on a different device. 1295 1414 * to that pointed to from the buffer_head b_bdev currently. 1296 1415 */ 1297 1416 bh_result->b_bdev = xfs_find_bdev_for_inode(inode); 1298 - 1299 - /* 1300 - * If we previously allocated a block out beyond eof and we are now 1301 - * coming back to use it then we will need to flag it as new even if it 1302 - * has a disk address. 1303 - * 1304 - * With sub-block writes into unwritten extents we also need to mark 1305 - * the buffer as new so that the unwritten parts of the buffer gets 1306 - * correctly zeroed. 1307 - */ 1308 - if (create && 1309 - ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || 1310 - (offset >= i_size_read(inode)) || 1311 - (new || ISUNWRITTEN(&imap)))) 1312 - set_buffer_new(bh_result); 1313 - 1314 - BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK); 1315 - 1316 1417 return 0; 1317 1418 1318 1419 out_unlock: 1319 1420 xfs_iunlock(ip, lockmode); 1320 - return error; 1321 - } 1322 - 1323 - int 1324 - xfs_get_blocks( 1325 - struct inode *inode, 1326 - sector_t iblock, 1327 - struct buffer_head *bh_result, 1328 - int create) 1329 - { 1330 - return __xfs_get_blocks(inode, iblock, bh_result, create, false); 1331 - } 1332 - 1333 - int 1334 - xfs_get_blocks_direct( 1335 - struct inode *inode, 1336 - sector_t iblock, 1337 - struct buffer_head *bh_result, 1338 - int create) 1339 - { 1340 - return __xfs_get_blocks(inode, iblock, bh_result, create, true); 1341 - } 1342 - 1343 - /* 1344 - * Complete a direct I/O write request. 1345 - * 1346 - * xfs_map_direct passes us some flags in the private data to tell us what to 1347 - * do. If no flags are set, then the write IO is an overwrite wholly within 1348 - * the existing allocated file size and so there is nothing for us to do. 1349 - * 1350 - * Note that in this case the completion can be called in interrupt context, 1351 - * whereas if we have flags set we will always be called in task context 1352 - * (i.e. from a workqueue). 1353 - */ 1354 - int 1355 - xfs_end_io_direct_write( 1356 - struct kiocb *iocb, 1357 - loff_t offset, 1358 - ssize_t size, 1359 - void *private) 1360 - { 1361 - struct inode *inode = file_inode(iocb->ki_filp); 1362 - struct xfs_inode *ip = XFS_I(inode); 1363 - uintptr_t flags = (uintptr_t)private; 1364 - int error = 0; 1365 - 1366 - trace_xfs_end_io_direct_write(ip, offset, size); 1367 - 1368 - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 1369 - return -EIO; 1370 - 1371 - if (size <= 0) 1372 - return size; 1373 - 1374 - /* 1375 - * The flags tell us whether we are doing unwritten extent conversions 1376 - * or an append transaction that updates the on-disk file size. These 1377 - * cases are the only cases where we should *potentially* be needing 1378 - * to update the VFS inode size. 1379 - */ 1380 - if (flags == 0) { 1381 - ASSERT(offset + size <= i_size_read(inode)); 1382 - return 0; 1383 - } 1384 - 1385 - /* 1386 - * We need to update the in-core inode size here so that we don't end up 1387 - * with the on-disk inode size being outside the in-core inode size. We 1388 - * have no other method of updating EOF for AIO, so always do it here 1389 - * if necessary. 1390 - * 1391 - * We need to lock the test/set EOF update as we can be racing with 1392 - * other IO completions here to update the EOF. Failing to serialise 1393 - * here can result in EOF moving backwards and Bad Things Happen when 1394 - * that occurs. 1395 - */ 1396 - spin_lock(&ip->i_flags_lock); 1397 - if (offset + size > i_size_read(inode)) 1398 - i_size_write(inode, offset + size); 1399 - spin_unlock(&ip->i_flags_lock); 1400 - 1401 - if (flags & XFS_DIO_FLAG_COW) 1402 - error = xfs_reflink_end_cow(ip, offset, size); 1403 - if (flags & XFS_DIO_FLAG_UNWRITTEN) { 1404 - trace_xfs_end_io_direct_write_unwritten(ip, offset, size); 1405 - 1406 - error = xfs_iomap_write_unwritten(ip, offset, size); 1407 - } 1408 - if (flags & XFS_DIO_FLAG_APPEND) { 1409 - trace_xfs_end_io_direct_write_append(ip, offset, size); 1410 - 1411 - error = xfs_setfilesize(ip, offset, size); 1412 - } 1413 - 1414 1421 return error; 1415 1422 } 1416 1423 ··· 1309 1566 struct xfs_inode *ip = XFS_I(inode); 1310 1567 1311 1568 trace_xfs_vm_bmap(XFS_I(inode)); 1312 - xfs_ilock(ip, XFS_IOLOCK_SHARED); 1313 1569 1314 1570 /* 1315 1571 * The swap code (ab-)uses ->bmap to get a block mapping and then ··· 1316 1574 * that on reflinks inodes, so we have to skip out here. And yes, 1317 1575 * 0 is the magic code for a bmap error.. 1318 1576 */ 1319 - if (xfs_is_reflink_inode(ip)) { 1320 - xfs_iunlock(ip, XFS_IOLOCK_SHARED); 1577 + if (xfs_is_reflink_inode(ip)) 1321 1578 return 0; 1322 - } 1579 + 1323 1580 filemap_write_and_wait(mapping); 1324 - xfs_iunlock(ip, XFS_IOLOCK_SHARED); 1325 1581 return generic_block_bmap(mapping, block, xfs_get_blocks); 1326 1582 } 1327 1583

-6

fs/xfs/xfs_aops.h

··· 55 55 56 56 extern const struct address_space_operations xfs_address_space_operations; 57 57 58 - int xfs_get_blocks(struct inode *inode, sector_t offset, 59 - struct buffer_head *map_bh, int create); 60 - int xfs_get_blocks_direct(struct inode *inode, sector_t offset, 61 - struct buffer_head *map_bh, int create); 62 - int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset, 63 - ssize_t size, void *private); 64 58 int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); 65 59 66 60 extern void xfs_count_page_state(struct page *, int *, int *);

+2 -2

fs/xfs/xfs_attr.h

··· 112 112 *========================================================================*/ 113 113 114 114 115 - /* Return 0 on success, or -errno; other state communicated via *context */ 116 - typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int, 115 + /* void; state communicated via *context */ 116 + typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int, 117 117 unsigned char *, int, int); 118 118 119 119 typedef struct xfs_attr_list_context {

+22 -37

fs/xfs/xfs_attr_list.c

··· 74 74 xfs_attr_sf_entry_t *sfe; 75 75 xfs_inode_t *dp; 76 76 int sbsize, nsbuf, count, i; 77 - int error; 78 77 79 78 ASSERT(context != NULL); 80 79 dp = context->dp; ··· 101 102 (XFS_ISRESET_CURSOR(cursor) && 102 103 (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) { 103 104 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { 104 - error = context->put_listent(context, 105 - sfe->flags, 106 - sfe->nameval, 107 - (int)sfe->namelen, 108 - (int)sfe->valuelen); 109 - if (error) 110 - return error; 105 + context->put_listent(context, 106 + sfe->flags, 107 + sfe->nameval, 108 + (int)sfe->namelen, 109 + (int)sfe->valuelen); 111 110 /* 112 111 * Either search callback finished early or 113 112 * didn't fit it all in the buffer after all. ··· 190 193 cursor->hashval = sbp->hash; 191 194 cursor->offset = 0; 192 195 } 193 - error = context->put_listent(context, 194 - sbp->flags, 195 - sbp->name, 196 - sbp->namelen, 197 - sbp->valuelen); 198 - if (error) { 199 - kmem_free(sbuf); 200 - return error; 201 - } 196 + context->put_listent(context, 197 + sbp->flags, 198 + sbp->name, 199 + sbp->namelen, 200 + sbp->valuelen); 202 201 if (context->seen_enough) 203 202 break; 204 203 cursor->offset++; ··· 328 335 */ 329 336 for (;;) { 330 337 leaf = bp->b_addr; 331 - error = xfs_attr3_leaf_list_int(bp, context); 332 - if (error) { 333 - xfs_trans_brelse(NULL, bp); 334 - return error; 335 - } 338 + xfs_attr3_leaf_list_int(bp, context); 336 339 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); 337 340 if (context->seen_enough || leafhdr.forw == 0) 338 341 break; ··· 345 356 /* 346 357 * Copy out attribute list entries for attr_list(), for leaf attribute lists. 347 358 */ 348 - int 359 + void 349 360 xfs_attr3_leaf_list_int( 350 361 struct xfs_buf *bp, 351 362 struct xfs_attr_list_context *context) ··· 355 366 struct xfs_attr3_icleaf_hdr ichdr; 356 367 struct xfs_attr_leaf_entry *entries; 357 368 struct xfs_attr_leaf_entry *entry; 358 - int retval; 359 369 int i; 360 370 struct xfs_mount *mp = context->dp->i_mount; 361 371 ··· 387 399 } 388 400 if (i == ichdr.count) { 389 401 trace_xfs_attr_list_notfound(context); 390 - return 0; 402 + return; 391 403 } 392 404 } else { 393 405 entry = &entries[0]; ··· 398 410 /* 399 411 * We have found our place, start copying out the new attributes. 400 412 */ 401 - retval = 0; 402 413 for (; i < ichdr.count; entry++, i++) { 403 414 char *name; 404 415 int namelen, valuelen; ··· 426 439 valuelen = be32_to_cpu(name_rmt->valuelen); 427 440 } 428 441 429 - retval = context->put_listent(context, entry->flags, 442 + context->put_listent(context, entry->flags, 430 443 name, namelen, valuelen); 431 - if (retval) 432 - break; 433 444 if (context->seen_enough) 434 445 break; 435 446 cursor->offset++; 436 447 } 437 448 trace_xfs_attr_list_leaf_end(context); 438 - return retval; 449 + return; 439 450 } 440 451 441 452 /* ··· 452 467 if (error) 453 468 return error; 454 469 455 - error = xfs_attr3_leaf_list_int(bp, context); 470 + xfs_attr3_leaf_list_int(bp, context); 456 471 xfs_trans_brelse(NULL, bp); 457 - return error; 472 + return 0; 458 473 } 459 474 460 475 int ··· 498 513 * Take care to check values and protect against them changing later, 499 514 * we may be reading them directly out of a user buffer. 500 515 */ 501 - STATIC int 516 + STATIC void 502 517 xfs_attr_put_listent( 503 518 xfs_attr_list_context_t *context, 504 519 int flags, ··· 521 536 */ 522 537 if (((context->flags & ATTR_SECURE) == 0) != 523 538 ((flags & XFS_ATTR_SECURE) == 0)) 524 - return 0; 539 + return; 525 540 if (((context->flags & ATTR_ROOT) == 0) != 526 541 ((flags & XFS_ATTR_ROOT) == 0)) 527 - return 0; 542 + return; 528 543 529 544 arraytop = sizeof(*alist) + 530 545 context->count * sizeof(alist->al_offset[0]); ··· 533 548 trace_xfs_attr_list_full(context); 534 549 alist->al_more = 1; 535 550 context->seen_enough = 1; 536 - return 0; 551 + return; 537 552 } 538 553 539 554 aep = (attrlist_ent_t *)&context->alist[context->firstu]; ··· 543 558 alist->al_offset[context->count++] = context->firstu; 544 559 alist->al_count = context->count; 545 560 trace_xfs_attr_list_add(context); 546 - return 0; 561 + return; 547 562 } 548 563 549 564 /*

+20 -25

fs/xfs/xfs_bmap_util.c

··· 359 359 mp = ip->i_mount; 360 360 ifp = XFS_IFORK_PTR(ip, whichfork); 361 361 if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { 362 - xfs_bmap_count_leaves(ifp, 0, 363 - ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), 364 - count); 362 + xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count); 365 363 return 0; 366 364 } 367 365 ··· 424 426 ifp = XFS_IFORK_PTR(ip, whichfork); 425 427 if (!moretocome && 426 428 xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && 427 - (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) 429 + (lastx == xfs_iext_count(ifp) - 1)) 428 430 out->bmv_oflags |= BMV_OF_LAST; 429 431 } 430 432 ··· 1790 1792 struct xfs_ifork tempifp, *ifp, *tifp; 1791 1793 int aforkblks = 0; 1792 1794 int taforkblks = 0; 1795 + xfs_extnum_t nextents; 1793 1796 __uint64_t tmp; 1794 1797 int error; 1795 1798 ··· 1876 1877 1877 1878 switch (ip->i_d.di_format) { 1878 1879 case XFS_DINODE_FMT_EXTENTS: 1879 - /* If the extents fit in the inode, fix the 1880 - * pointer. Otherwise it's already NULL or 1881 - * pointing to the extent. 1880 + /* 1881 + * If the extents fit in the inode, fix the pointer. Otherwise 1882 + * it's already NULL or pointing to the extent. 1882 1883 */ 1883 - if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { 1884 - ifp->if_u1.if_extents = 1885 - ifp->if_u2.if_inline_ext; 1886 - } 1884 + nextents = xfs_iext_count(&ip->i_df); 1885 + if (nextents <= XFS_INLINE_EXTS) 1886 + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 1887 1887 (*src_log_flags) |= XFS_ILOG_DEXT; 1888 1888 break; 1889 1889 case XFS_DINODE_FMT_BTREE: ··· 1894 1896 1895 1897 switch (tip->i_d.di_format) { 1896 1898 case XFS_DINODE_FMT_EXTENTS: 1897 - /* If the extents fit in the inode, fix the 1898 - * pointer. Otherwise it's already NULL or 1899 - * pointing to the extent. 1899 + /* 1900 + * If the extents fit in the inode, fix the pointer. Otherwise 1901 + * it's already NULL or pointing to the extent. 1900 1902 */ 1901 - if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { 1902 - tifp->if_u1.if_extents = 1903 - tifp->if_u2.if_inline_ext; 1904 - } 1903 + nextents = xfs_iext_count(&tip->i_df); 1904 + if (nextents <= XFS_INLINE_EXTS) 1905 + tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; 1905 1906 (*target_log_flags) |= XFS_ILOG_DEXT; 1906 1907 break; 1907 1908 case XFS_DINODE_FMT_BTREE: ··· 1935 1938 * page cache safely. Once we have done this we can take the ilocks and 1936 1939 * do the rest of the checks. 1937 1940 */ 1938 - lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 1939 - xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); 1941 + lock_two_nondirectories(VFS_I(ip), VFS_I(tip)); 1942 + lock_flags = XFS_MMAPLOCK_EXCL; 1940 1943 xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); 1941 1944 1942 1945 /* Verify that both files have the same format */ ··· 2076 2079 trace_xfs_swap_extent_after(ip, 0); 2077 2080 trace_xfs_swap_extent_after(tip, 1); 2078 2081 2082 + out_unlock: 2079 2083 xfs_iunlock(ip, lock_flags); 2080 2084 xfs_iunlock(tip, lock_flags); 2085 + unlock_two_nondirectories(VFS_I(ip), VFS_I(tip)); 2081 2086 return error; 2082 2087 2083 2088 out_trans_cancel: 2084 2089 xfs_trans_cancel(tp); 2085 - 2086 - out_unlock: 2087 - xfs_iunlock(ip, lock_flags); 2088 - xfs_iunlock(tip, lock_flags); 2089 - return error; 2090 + goto out_unlock; 2090 2091 }

+74 -49

fs/xfs/xfs_buf.c

··· 219 219 init_completion(&bp->b_iowait); 220 220 INIT_LIST_HEAD(&bp->b_lru); 221 221 INIT_LIST_HEAD(&bp->b_list); 222 - RB_CLEAR_NODE(&bp->b_rbnode); 223 222 sema_init(&bp->b_sema, 0); /* held, no waiters */ 224 223 spin_lock_init(&bp->b_lock); 225 224 XB_SET_OWNER(bp); ··· 472 473 /* 473 474 * Finding and Reading Buffers 474 475 */ 476 + static int 477 + _xfs_buf_obj_cmp( 478 + struct rhashtable_compare_arg *arg, 479 + const void *obj) 480 + { 481 + const struct xfs_buf_map *map = arg->key; 482 + const struct xfs_buf *bp = obj; 483 + 484 + /* 485 + * The key hashing in the lookup path depends on the key being the 486 + * first element of the compare_arg, make sure to assert this. 487 + */ 488 + BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0); 489 + 490 + if (bp->b_bn != map->bm_bn) 491 + return 1; 492 + 493 + if (unlikely(bp->b_length != map->bm_len)) { 494 + /* 495 + * found a block number match. If the range doesn't 496 + * match, the only way this is allowed is if the buffer 497 + * in the cache is stale and the transaction that made 498 + * it stale has not yet committed. i.e. we are 499 + * reallocating a busy extent. Skip this buffer and 500 + * continue searching for an exact match. 501 + */ 502 + ASSERT(bp->b_flags & XBF_STALE); 503 + return 1; 504 + } 505 + return 0; 506 + } 507 + 508 + static const struct rhashtable_params xfs_buf_hash_params = { 509 + .min_size = 32, /* empty AGs have minimal footprint */ 510 + .nelem_hint = 16, 511 + .key_len = sizeof(xfs_daddr_t), 512 + .key_offset = offsetof(struct xfs_buf, b_bn), 513 + .head_offset = offsetof(struct xfs_buf, b_rhash_head), 514 + .automatic_shrinking = true, 515 + .obj_cmpfn = _xfs_buf_obj_cmp, 516 + }; 517 + 518 + int 519 + xfs_buf_hash_init( 520 + struct xfs_perag *pag) 521 + { 522 + spin_lock_init(&pag->pag_buf_lock); 523 + return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params); 524 + } 525 + 526 + void 527 + xfs_buf_hash_destroy( 528 + struct xfs_perag *pag) 529 + { 530 + rhashtable_destroy(&pag->pag_buf_hash); 531 + } 475 532 476 533 /* 477 534 * Look up, and creates if absent, a lockable buffer for ··· 543 488 xfs_buf_t *new_bp) 544 489 { 545 490 struct xfs_perag *pag; 546 - struct rb_node **rbp; 547 - struct rb_node *parent; 548 491 xfs_buf_t *bp; 549 - xfs_daddr_t blkno = map[0].bm_bn; 492 + struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; 550 493 xfs_daddr_t eofs; 551 - int numblks = 0; 552 494 int i; 553 495 554 496 for (i = 0; i < nmaps; i++) 555 - numblks += map[i].bm_len; 497 + cmap.bm_len += map[i].bm_len; 556 498 557 499 /* Check for IOs smaller than the sector size / not sector aligned */ 558 - ASSERT(!(BBTOB(numblks) < btp->bt_meta_sectorsize)); 559 - ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_meta_sectormask)); 500 + ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize)); 501 + ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask)); 560 502 561 503 /* 562 504 * Corrupted block numbers can get through to here, unfortunately, so we 563 505 * have to check that the buffer falls within the filesystem bounds. 564 506 */ 565 507 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); 566 - if (blkno < 0 || blkno >= eofs) { 508 + if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) { 567 509 /* 568 510 * XXX (dgc): we should really be returning -EFSCORRUPTED here, 569 511 * but none of the higher level infrastructure supports ··· 568 516 */ 569 517 xfs_alert(btp->bt_mount, 570 518 "%s: Block out of range: block 0x%llx, EOFS 0x%llx ", 571 - __func__, blkno, eofs); 519 + __func__, cmap.bm_bn, eofs); 572 520 WARN_ON(1); 573 521 return NULL; 574 522 } 575 523 576 - /* get tree root */ 577 524 pag = xfs_perag_get(btp->bt_mount, 578 - xfs_daddr_to_agno(btp->bt_mount, blkno)); 525 + xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn)); 579 526 580 - /* walk tree */ 581 527 spin_lock(&pag->pag_buf_lock); 582 - rbp = &pag->pag_buf_tree.rb_node; 583 - parent = NULL; 584 - bp = NULL; 585 - while (*rbp) { 586 - parent = *rbp; 587 - bp = rb_entry(parent, struct xfs_buf, b_rbnode); 588 - 589 - if (blkno < bp->b_bn) 590 - rbp = &(*rbp)->rb_left; 591 - else if (blkno > bp->b_bn) 592 - rbp = &(*rbp)->rb_right; 593 - else { 594 - /* 595 - * found a block number match. If the range doesn't 596 - * match, the only way this is allowed is if the buffer 597 - * in the cache is stale and the transaction that made 598 - * it stale has not yet committed. i.e. we are 599 - * reallocating a busy extent. Skip this buffer and 600 - * continue searching to the right for an exact match. 601 - */ 602 - if (bp->b_length != numblks) { 603 - ASSERT(bp->b_flags & XBF_STALE); 604 - rbp = &(*rbp)->rb_right; 605 - continue; 606 - } 607 - atomic_inc(&bp->b_hold); 608 - goto found; 609 - } 528 + bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap, 529 + xfs_buf_hash_params); 530 + if (bp) { 531 + atomic_inc(&bp->b_hold); 532 + goto found; 610 533 } 611 534 612 535 /* No match found */ 613 536 if (new_bp) { 614 - rb_link_node(&new_bp->b_rbnode, parent, rbp); 615 - rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); 616 537 /* the buffer keeps the perag reference until it is freed */ 617 538 new_bp->b_pag = pag; 539 + rhashtable_insert_fast(&pag->pag_buf_hash, 540 + &new_bp->b_rhash_head, 541 + xfs_buf_hash_params); 618 542 spin_unlock(&pag->pag_buf_lock); 619 543 } else { 620 544 XFS_STATS_INC(btp->bt_mount, xb_miss_locked); ··· 958 930 959 931 if (!pag) { 960 932 ASSERT(list_empty(&bp->b_lru)); 961 - ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); 962 933 if (atomic_dec_and_test(&bp->b_hold)) { 963 934 xfs_buf_ioacct_dec(bp); 964 935 xfs_buf_free(bp); 965 936 } 966 937 return; 967 938 } 968 - 969 - ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); 970 939 971 940 ASSERT(atomic_read(&bp->b_hold) > 0); 972 941 ··· 1008 983 } 1009 984 1010 985 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 1011 - rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); 986 + rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head, 987 + xfs_buf_hash_params); 1012 988 spin_unlock(&pag->pag_buf_lock); 1013 989 xfs_perag_put(pag); 1014 990 freebuf = true; ··· 1737 1711 percpu_counter_destroy(&btp->bt_io_count); 1738 1712 list_lru_destroy(&btp->bt_lru); 1739 1713 1740 - if (mp->m_flags & XFS_MOUNT_BARRIER) 1741 - xfs_blkdev_issue_flush(btp); 1714 + xfs_blkdev_issue_flush(btp); 1742 1715 1743 1716 kmem_free(btp); 1744 1717 }

+2 -1

fs/xfs/xfs_buf.h

··· 71 71 { XBF_READ, "READ" }, \ 72 72 { XBF_WRITE, "WRITE" }, \ 73 73 { XBF_READ_AHEAD, "READ_AHEAD" }, \ 74 + { XBF_NO_IOACCT, "NO_IOACCT" }, \ 74 75 { XBF_ASYNC, "ASYNC" }, \ 75 76 { XBF_DONE, "DONE" }, \ 76 77 { XBF_STALE, "STALE" }, \ ··· 151 150 * which is the only bit that is touched if we hit the semaphore 152 151 * fast-path on locking. 153 152 */ 154 - struct rb_node b_rbnode; /* rbtree node */ 153 + struct rhash_head b_rhash_head; /* pag buffer hash node */ 155 154 xfs_daddr_t b_bn; /* block number of buffer */ 156 155 int b_length; /* size of buffer in BBs */ 157 156 atomic_t b_hold; /* reference count */

-2

fs/xfs/xfs_dir2_readdir.c

··· 677 677 args.dp = dp; 678 678 args.geo = dp->i_mount->m_dir_geo; 679 679 680 - xfs_ilock(dp, XFS_IOLOCK_SHARED); 681 680 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 682 681 rval = xfs_dir2_sf_getdents(&args, ctx); 683 682 else if ((rval = xfs_dir2_isblock(&args, &v))) ··· 685 686 rval = xfs_dir2_block_getdents(&args, ctx); 686 687 else 687 688 rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize); 688 - xfs_iunlock(dp, XFS_IOLOCK_SHARED); 689 689 690 690 return rval; 691 691 }

+94 -170

fs/xfs/xfs_file.c

··· 48 48 static const struct vm_operations_struct xfs_file_vm_ops; 49 49 50 50 /* 51 - * Locking primitives for read and write IO paths to ensure we consistently use 52 - * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. 53 - */ 54 - static inline void 55 - xfs_rw_ilock( 56 - struct xfs_inode *ip, 57 - int type) 58 - { 59 - if (type & XFS_IOLOCK_EXCL) 60 - inode_lock(VFS_I(ip)); 61 - xfs_ilock(ip, type); 62 - } 63 - 64 - static inline void 65 - xfs_rw_iunlock( 66 - struct xfs_inode *ip, 67 - int type) 68 - { 69 - xfs_iunlock(ip, type); 70 - if (type & XFS_IOLOCK_EXCL) 71 - inode_unlock(VFS_I(ip)); 72 - } 73 - 74 - static inline void 75 - xfs_rw_ilock_demote( 76 - struct xfs_inode *ip, 77 - int type) 78 - { 79 - xfs_ilock_demote(ip, type); 80 - if (type & XFS_IOLOCK_EXCL) 81 - inode_unlock(VFS_I(ip)); 82 - } 83 - 84 - /* 85 51 * Clear the specified ranges to zero through either the pagecache or DAX. 86 52 * Holes and unwritten extents will be left as-is as they already are zeroed. 87 53 */ ··· 149 183 150 184 xfs_iflags_clear(ip, XFS_ITRUNCATED); 151 185 152 - if (mp->m_flags & XFS_MOUNT_BARRIER) { 153 - /* 154 - * If we have an RT and/or log subvolume we need to make sure 155 - * to flush the write cache the device used for file data 156 - * first. This is to ensure newly written file data make 157 - * it to disk before logging the new inode size in case of 158 - * an extending write. 159 - */ 160 - if (XFS_IS_REALTIME_INODE(ip)) 161 - xfs_blkdev_issue_flush(mp->m_rtdev_targp); 162 - else if (mp->m_logdev_targp != mp->m_ddev_targp) 163 - xfs_blkdev_issue_flush(mp->m_ddev_targp); 164 - } 186 + /* 187 + * If we have an RT and/or log subvolume we need to make sure to flush 188 + * the write cache the device used for file data first. This is to 189 + * ensure newly written file data make it to disk before logging the new 190 + * inode size in case of an extending write. 191 + */ 192 + if (XFS_IS_REALTIME_INODE(ip)) 193 + xfs_blkdev_issue_flush(mp->m_rtdev_targp); 194 + else if (mp->m_logdev_targp != mp->m_ddev_targp) 195 + xfs_blkdev_issue_flush(mp->m_ddev_targp); 165 196 166 197 /* 167 198 * All metadata updates are logged, which means that we just have to ··· 193 230 * an already allocated file and thus do not have any metadata to 194 231 * commit. 195 232 */ 196 - if ((mp->m_flags & XFS_MOUNT_BARRIER) && 197 - mp->m_logdev_targp == mp->m_ddev_targp && 198 - !XFS_IS_REALTIME_INODE(ip) && 199 - !log_flushed) 233 + if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) && 234 + mp->m_logdev_targp == mp->m_ddev_targp) 200 235 xfs_blkdev_issue_flush(mp->m_ddev_targp); 201 236 202 237 return error; ··· 205 244 struct kiocb *iocb, 206 245 struct iov_iter *to) 207 246 { 208 - struct address_space *mapping = iocb->ki_filp->f_mapping; 209 - struct inode *inode = mapping->host; 210 - struct xfs_inode *ip = XFS_I(inode); 211 - loff_t isize = i_size_read(inode); 247 + struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); 212 248 size_t count = iov_iter_count(to); 213 - loff_t end = iocb->ki_pos + count - 1; 214 - struct iov_iter data; 215 - struct xfs_buftarg *target; 216 - ssize_t ret = 0; 249 + ssize_t ret; 217 250 218 251 trace_xfs_file_direct_read(ip, count, iocb->ki_pos); 219 252 220 253 if (!count) 221 254 return 0; /* skip atime */ 222 255 223 - if (XFS_IS_REALTIME_INODE(ip)) 224 - target = ip->i_mount->m_rtdev_targp; 225 - else 226 - target = ip->i_mount->m_ddev_targp; 227 - 228 - /* DIO must be aligned to device logical sector size */ 229 - if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { 230 - if (iocb->ki_pos == isize) 231 - return 0; 232 - return -EINVAL; 233 - } 234 - 235 256 file_accessed(iocb->ki_filp); 236 257 237 - xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); 238 - if (mapping->nrpages) { 239 - ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); 240 - if (ret) 241 - goto out_unlock; 258 + xfs_ilock(ip, XFS_IOLOCK_SHARED); 259 + ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL); 260 + xfs_iunlock(ip, XFS_IOLOCK_SHARED); 242 261 243 - /* 244 - * Invalidate whole pages. This can return an error if we fail 245 - * to invalidate a page, but this should never happen on XFS. 246 - * Warn if it does fail. 247 - */ 248 - ret = invalidate_inode_pages2_range(mapping, 249 - iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); 250 - WARN_ON_ONCE(ret); 251 - ret = 0; 252 - } 253 - 254 - data = *to; 255 - ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, 256 - xfs_get_blocks_direct, NULL, NULL, 0); 257 - if (ret >= 0) { 258 - iocb->ki_pos += ret; 259 - iov_iter_advance(to, ret); 260 - } 261 - 262 - out_unlock: 263 - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); 264 262 return ret; 265 263 } 266 264 ··· 237 317 if (!count) 238 318 return 0; /* skip atime */ 239 319 240 - xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); 320 + xfs_ilock(ip, XFS_IOLOCK_SHARED); 241 321 ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); 242 - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); 322 + xfs_iunlock(ip, XFS_IOLOCK_SHARED); 243 323 244 324 file_accessed(iocb->ki_filp); 245 325 return ret; ··· 255 335 256 336 trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); 257 337 258 - xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); 338 + xfs_ilock(ip, XFS_IOLOCK_SHARED); 259 339 ret = generic_file_read_iter(iocb, to); 260 - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); 340 + xfs_iunlock(ip, XFS_IOLOCK_SHARED); 261 341 262 342 return ret; 263 343 } ··· 338 418 if (error <= 0) 339 419 return error; 340 420 341 - error = xfs_break_layouts(inode, iolock, true); 421 + error = xfs_break_layouts(inode, iolock); 342 422 if (error) 343 423 return error; 344 424 345 - /* For changing security info in file_remove_privs() we need i_mutex */ 425 + /* 426 + * For changing security info in file_remove_privs() we need i_rwsem 427 + * exclusively. 428 + */ 346 429 if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) { 347 - xfs_rw_iunlock(ip, *iolock); 430 + xfs_iunlock(ip, *iolock); 348 431 *iolock = XFS_IOLOCK_EXCL; 349 - xfs_rw_ilock(ip, *iolock); 432 + xfs_ilock(ip, *iolock); 350 433 goto restart; 351 434 } 352 435 /* ··· 374 451 spin_unlock(&ip->i_flags_lock); 375 452 if (!drained_dio) { 376 453 if (*iolock == XFS_IOLOCK_SHARED) { 377 - xfs_rw_iunlock(ip, *iolock); 454 + xfs_iunlock(ip, *iolock); 378 455 *iolock = XFS_IOLOCK_EXCL; 379 - xfs_rw_ilock(ip, *iolock); 456 + xfs_ilock(ip, *iolock); 380 457 iov_iter_reexpand(from, count); 381 458 } 382 459 /* ··· 419 496 return 0; 420 497 } 421 498 499 + static int 500 + xfs_dio_write_end_io( 501 + struct kiocb *iocb, 502 + ssize_t size, 503 + unsigned flags) 504 + { 505 + struct inode *inode = file_inode(iocb->ki_filp); 506 + struct xfs_inode *ip = XFS_I(inode); 507 + loff_t offset = iocb->ki_pos; 508 + bool update_size = false; 509 + int error = 0; 510 + 511 + trace_xfs_end_io_direct_write(ip, offset, size); 512 + 513 + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 514 + return -EIO; 515 + 516 + if (size <= 0) 517 + return size; 518 + 519 + /* 520 + * We need to update the in-core inode size here so that we don't end up 521 + * with the on-disk inode size being outside the in-core inode size. We 522 + * have no other method of updating EOF for AIO, so always do it here 523 + * if necessary. 524 + * 525 + * We need to lock the test/set EOF update as we can be racing with 526 + * other IO completions here to update the EOF. Failing to serialise 527 + * here can result in EOF moving backwards and Bad Things Happen when 528 + * that occurs. 529 + */ 530 + spin_lock(&ip->i_flags_lock); 531 + if (offset + size > i_size_read(inode)) { 532 + i_size_write(inode, offset + size); 533 + update_size = true; 534 + } 535 + spin_unlock(&ip->i_flags_lock); 536 + 537 + if (flags & IOMAP_DIO_COW) { 538 + error = xfs_reflink_end_cow(ip, offset, size); 539 + if (error) 540 + return error; 541 + } 542 + 543 + if (flags & IOMAP_DIO_UNWRITTEN) 544 + error = xfs_iomap_write_unwritten(ip, offset, size); 545 + else if (update_size) 546 + error = xfs_setfilesize(ip, offset, size); 547 + 548 + return error; 549 + } 550 + 422 551 /* 423 552 * xfs_file_dio_aio_write - handle direct IO writes 424 553 * ··· 510 535 int unaligned_io = 0; 511 536 int iolock; 512 537 size_t count = iov_iter_count(from); 513 - loff_t end; 514 - struct iov_iter data; 515 - struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? 538 + struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? 516 539 mp->m_rtdev_targp : mp->m_ddev_targp; 517 540 518 541 /* DIO must be aligned to device logical sector size */ ··· 532 559 iolock = XFS_IOLOCK_SHARED; 533 560 } 534 561 535 - xfs_rw_ilock(ip, iolock); 562 + xfs_ilock(ip, iolock); 536 563 537 564 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 538 565 if (ret) 539 566 goto out; 540 567 count = iov_iter_count(from); 541 - end = iocb->ki_pos + count - 1; 542 - 543 - if (mapping->nrpages) { 544 - ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); 545 - if (ret) 546 - goto out; 547 - 548 - /* 549 - * Invalidate whole pages. This can return an error if we fail 550 - * to invalidate a page, but this should never happen on XFS. 551 - * Warn if it does fail. 552 - */ 553 - ret = invalidate_inode_pages2_range(mapping, 554 - iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); 555 - WARN_ON_ONCE(ret); 556 - ret = 0; 557 - } 558 568 559 569 /* 560 570 * If we are doing unaligned IO, wait for all other IO to drain, ··· 547 591 if (unaligned_io) 548 592 inode_dio_wait(inode); 549 593 else if (iolock == XFS_IOLOCK_EXCL) { 550 - xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); 594 + xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); 551 595 iolock = XFS_IOLOCK_SHARED; 552 596 } 553 597 ··· 560 604 goto out; 561 605 } 562 606 563 - data = *from; 564 - ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, 565 - xfs_get_blocks_direct, xfs_end_io_direct_write, 566 - NULL, DIO_ASYNC_EXTEND); 567 - 568 - /* see generic_file_direct_write() for why this is necessary */ 569 - if (mapping->nrpages) { 570 - invalidate_inode_pages2_range(mapping, 571 - iocb->ki_pos >> PAGE_SHIFT, 572 - end >> PAGE_SHIFT); 573 - } 574 - 575 - if (ret > 0) { 576 - iocb->ki_pos += ret; 577 - iov_iter_advance(from, ret); 578 - } 607 + ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io); 579 608 out: 580 - xfs_rw_iunlock(ip, iolock); 609 + xfs_iunlock(ip, iolock); 581 610 582 611 /* 583 612 * No fallback to buffered IO on errors for XFS, direct IO will either ··· 584 643 size_t count; 585 644 loff_t pos; 586 645 587 - xfs_rw_ilock(ip, iolock); 646 + xfs_ilock(ip, iolock); 588 647 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 589 648 if (ret) 590 649 goto out; ··· 593 652 count = iov_iter_count(from); 594 653 595 654 trace_xfs_file_dax_write(ip, count, pos); 596 - 597 655 ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops); 598 656 if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { 599 657 i_size_write(inode, iocb->ki_pos); 600 658 error = xfs_setfilesize(ip, pos, ret); 601 659 } 602 - 603 660 out: 604 - xfs_rw_iunlock(ip, iolock); 661 + xfs_iunlock(ip, iolock); 605 662 return error ? error : ret; 606 663 } 607 664 ··· 616 677 int enospc = 0; 617 678 int iolock = XFS_IOLOCK_EXCL; 618 679 619 - xfs_rw_ilock(ip, iolock); 680 + xfs_ilock(ip, iolock); 620 681 621 682 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 622 683 if (ret) ··· 660 721 661 722 current->backing_dev_info = NULL; 662 723 out: 663 - xfs_rw_iunlock(ip, iolock); 724 + xfs_iunlock(ip, iolock); 664 725 return ret; 665 726 } 666 727 ··· 736 797 return -EOPNOTSUPP; 737 798 738 799 xfs_ilock(ip, iolock); 739 - error = xfs_break_layouts(inode, &iolock, false); 800 + error = xfs_break_layouts(inode, &iolock); 740 801 if (error) 741 802 goto out_unlock; 742 803 ··· 878 939 len, false); 879 940 } 880 941 881 - #define XFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) 882 942 STATIC ssize_t 883 943 xfs_file_dedupe_range( 884 944 struct file *src_file, ··· 887 949 u64 dst_loff) 888 950 { 889 951 int error; 890 - 891 - /* 892 - * Limit the total length we will dedupe for each operation. 893 - * This is intended to bound the total time spent in this 894 - * ioctl to something sane. 895 - */ 896 - if (len > XFS_MAX_DEDUPE_LEN) 897 - len = XFS_MAX_DEDUPE_LEN; 898 952 899 953 error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff, 900 954 len, true); ··· 1431 1501 return xfs_filemap_page_mkwrite(vma, vmf); 1432 1502 1433 1503 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1434 - if (IS_DAX(inode)) { 1435 - /* 1436 - * we do not want to trigger unwritten extent conversion on read 1437 - * faults - that is unnecessary overhead and would also require 1438 - * changes to xfs_get_blocks_direct() to map unwritten extent 1439 - * ioend for conversion on read-only mappings. 1440 - */ 1504 + if (IS_DAX(inode)) 1441 1505 ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); 1442 - } else 1506 + else 1443 1507 ret = filemap_fault(vma, vmf); 1444 1508 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1445 1509

+22 -18

fs/xfs/xfs_icache.c

··· 70 70 ASSERT(!xfs_isiflocked(ip)); 71 71 ASSERT(ip->i_ino == 0); 72 72 73 - mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 74 - 75 73 /* initialise the xfs inode */ 76 74 ip->i_ino = ino; 77 75 ip->i_mount = mp; ··· 121 123 { 122 124 /* asserts to verify all state is correct here */ 123 125 ASSERT(atomic_read(&ip->i_pincount) == 0); 124 - ASSERT(!xfs_isiflocked(ip)); 125 126 XFS_STATS_DEC(ip->i_mount, vn_active); 126 127 127 128 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); ··· 130 133 xfs_inode_free( 131 134 struct xfs_inode *ip) 132 135 { 136 + ASSERT(!xfs_isiflocked(ip)); 137 + 133 138 /* 134 139 * Because we use RCU freeing we need to ensure the inode always 135 140 * appears to be reclaimed with an invalid inode number when in the ··· 392 393 xfs_inode_clear_reclaim_tag(pag, ip->i_ino); 393 394 inode->i_state = I_NEW; 394 395 395 - ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); 396 - mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 396 + ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 397 + init_rwsem(&inode->i_rwsem); 397 398 398 399 spin_unlock(&ip->i_flags_lock); 399 400 spin_unlock(&pag->pag_ici_lock); ··· 980 981 981 982 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 982 983 xfs_iunpin_wait(ip); 984 + /* xfs_iflush_abort() drops the flush lock */ 983 985 xfs_iflush_abort(ip, false); 984 986 goto reclaim; 985 987 } ··· 989 989 goto out_ifunlock; 990 990 xfs_iunpin_wait(ip); 991 991 } 992 - if (xfs_iflags_test(ip, XFS_ISTALE)) 992 + if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) { 993 + xfs_ifunlock(ip); 993 994 goto reclaim; 994 - if (xfs_inode_clean(ip)) 995 - goto reclaim; 995 + } 996 996 997 997 /* 998 998 * Never flush out dirty data during non-blocking reclaim, as it would ··· 1030 1030 xfs_buf_relse(bp); 1031 1031 } 1032 1032 1033 - xfs_iflock(ip); 1034 1033 reclaim: 1034 + ASSERT(!xfs_isiflocked(ip)); 1035 + 1035 1036 /* 1036 1037 * Because we use RCU freeing we need to ensure the inode always appears 1037 1038 * to be reclaimed with an invalid inode number when in the free state. 1038 - * We do this as early as possible under the ILOCK and flush lock so 1039 - * that xfs_iflush_cluster() can be guaranteed to detect races with us 1040 - * here. By doing this, we guarantee that once xfs_iflush_cluster has 1041 - * locked both the XFS_ILOCK and the flush lock that it will see either 1042 - * a valid, flushable inode that will serialise correctly against the 1043 - * locks below, or it will see a clean (and invalid) inode that it can 1044 - * skip. 1039 + * We do this as early as possible under the ILOCK so that 1040 + * xfs_iflush_cluster() can be guaranteed to detect races with us here. 1041 + * By doing this, we guarantee that once xfs_iflush_cluster has locked 1042 + * XFS_ILOCK that it will see either a valid, flushable inode that will 1043 + * serialise correctly, or it will see a clean (and invalid) inode that 1044 + * it can skip. 1045 1045 */ 1046 1046 spin_lock(&ip->i_flags_lock); 1047 1047 ip->i_flags = XFS_IRECLAIM; 1048 1048 ip->i_ino = 0; 1049 1049 spin_unlock(&ip->i_flags_lock); 1050 1050 1051 - xfs_ifunlock(ip); 1052 1051 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1053 1052 1054 1053 XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); ··· 1579 1580 struct xfs_eofblocks *eofb = args; 1580 1581 bool need_iolock = true; 1581 1582 int match; 1583 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 1582 1584 1583 1585 ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); 1584 1586 1585 - if (!xfs_reflink_has_real_cow_blocks(ip)) { 1587 + /* 1588 + * Just clear the tag if we have an empty cow fork or none at all. It's 1589 + * possible the inode was fully unshared since it was originally tagged. 1590 + */ 1591 + if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { 1586 1592 trace_xfs_inode_free_cowblocks_invalid(ip); 1587 1593 xfs_inode_clear_cowblocks_tag(ip); 1588 1594 return 0;

+1 -1

fs/xfs/xfs_icreate_item.c

··· 133 133 /* 134 134 * This is the ops vector shared by all buf log items. 135 135 */ 136 - static struct xfs_item_ops xfs_icreate_item_ops = { 136 + static const struct xfs_item_ops xfs_icreate_item_ops = { 137 137 .iop_size = xfs_icreate_item_size, 138 138 .iop_format = xfs_icreate_item_format, 139 139 .iop_pin = xfs_icreate_item_pin,

+33 -51

fs/xfs/xfs_inode.c

··· 142 142 } 143 143 144 144 /* 145 - * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and 146 - * the i_lock. This routine allows various combinations of the locks to be 147 - * obtained. 145 + * In addition to i_rwsem in the VFS inode, the xfs inode contains 2 146 + * multi-reader locks: i_mmap_lock and the i_lock. This routine allows 147 + * various combinations of the locks to be obtained. 148 148 * 149 149 * The 3 locks should always be ordered so that the IO lock is obtained first, 150 150 * the mmap lock second and the ilock last in order to prevent deadlock. 151 151 * 152 152 * Basic locking order: 153 153 * 154 - * i_iolock -> i_mmap_lock -> page_lock -> i_ilock 154 + * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock 155 155 * 156 156 * mmap_sem locking order: 157 157 * 158 - * i_iolock -> page lock -> mmap_sem 158 + * i_rwsem -> page lock -> mmap_sem 159 159 * mmap_sem -> i_mmap_lock -> page_lock 160 160 * 161 161 * The difference in mmap_sem locking order mean that we cannot hold the 162 162 * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can 163 163 * fault in pages during copy in/out (for buffered IO) or require the mmap_sem 164 164 * in get_user_pages() to map the user pages into the kernel address space for 165 - * direct IO. Similarly the i_iolock cannot be taken inside a page fault because 165 + * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because 166 166 * page faults already hold the mmap_sem. 167 167 * 168 168 * Hence to serialise fully against both syscall and mmap based IO, we need to 169 - * take both the i_iolock and the i_mmap_lock. These locks should *only* be both 169 + * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both 170 170 * taken in places where we need to invalidate the page cache in a race 171 171 * free manner (e.g. truncate, hole punch and other extent manipulation 172 172 * functions). ··· 191 191 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 192 192 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); 193 193 194 - if (lock_flags & XFS_IOLOCK_EXCL) 195 - mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 196 - else if (lock_flags & XFS_IOLOCK_SHARED) 197 - mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 194 + if (lock_flags & XFS_IOLOCK_EXCL) { 195 + down_write_nested(&VFS_I(ip)->i_rwsem, 196 + XFS_IOLOCK_DEP(lock_flags)); 197 + } else if (lock_flags & XFS_IOLOCK_SHARED) { 198 + down_read_nested(&VFS_I(ip)->i_rwsem, 199 + XFS_IOLOCK_DEP(lock_flags)); 200 + } 198 201 199 202 if (lock_flags & XFS_MMAPLOCK_EXCL) 200 203 mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); ··· 243 240 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); 244 241 245 242 if (lock_flags & XFS_IOLOCK_EXCL) { 246 - if (!mrtryupdate(&ip->i_iolock)) 243 + if (!down_write_trylock(&VFS_I(ip)->i_rwsem)) 247 244 goto out; 248 245 } else if (lock_flags & XFS_IOLOCK_SHARED) { 249 - if (!mrtryaccess(&ip->i_iolock)) 246 + if (!down_read_trylock(&VFS_I(ip)->i_rwsem)) 250 247 goto out; 251 248 } 252 249 ··· 274 271 mrunlock_shared(&ip->i_mmaplock); 275 272 out_undo_iolock: 276 273 if (lock_flags & XFS_IOLOCK_EXCL) 277 - mrunlock_excl(&ip->i_iolock); 274 + up_write(&VFS_I(ip)->i_rwsem); 278 275 else if (lock_flags & XFS_IOLOCK_SHARED) 279 - mrunlock_shared(&ip->i_iolock); 276 + up_read(&VFS_I(ip)->i_rwsem); 280 277 out: 281 278 return 0; 282 279 } ··· 313 310 ASSERT(lock_flags != 0); 314 311 315 312 if (lock_flags & XFS_IOLOCK_EXCL) 316 - mrunlock_excl(&ip->i_iolock); 313 + up_write(&VFS_I(ip)->i_rwsem); 317 314 else if (lock_flags & XFS_IOLOCK_SHARED) 318 - mrunlock_shared(&ip->i_iolock); 315 + up_read(&VFS_I(ip)->i_rwsem); 319 316 320 317 if (lock_flags & XFS_MMAPLOCK_EXCL) 321 318 mrunlock_excl(&ip->i_mmaplock); ··· 348 345 if (lock_flags & XFS_MMAPLOCK_EXCL) 349 346 mrdemote(&ip->i_mmaplock); 350 347 if (lock_flags & XFS_IOLOCK_EXCL) 351 - mrdemote(&ip->i_iolock); 348 + downgrade_write(&VFS_I(ip)->i_rwsem); 352 349 353 350 trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); 354 351 } ··· 373 370 374 371 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { 375 372 if (!(lock_flags & XFS_IOLOCK_SHARED)) 376 - return !!ip->i_iolock.mr_writer; 377 - return rwsem_is_locked(&ip->i_iolock.mr_lock); 373 + return !debug_locks || 374 + lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0); 375 + return rwsem_is_locked(&VFS_I(ip)->i_rwsem); 378 376 } 379 377 380 378 ASSERT(0); ··· 425 421 426 422 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { 427 423 ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS); 428 - ASSERT(xfs_lockdep_subclass_ok(subclass + 429 - XFS_IOLOCK_PARENT_VAL)); 430 424 class += subclass << XFS_IOLOCK_SHIFT; 431 - if (lock_mode & XFS_IOLOCK_PARENT) 432 - class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT; 433 425 } 434 426 435 427 if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { ··· 477 477 XFS_ILOCK_EXCL)); 478 478 ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED | 479 479 XFS_ILOCK_SHARED))); 480 - ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) || 481 - inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1); 482 480 ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) || 483 481 inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1); 484 482 ASSERT(!(lock_mode & XFS_ILOCK_EXCL) || ··· 579 581 int attempts = 0; 580 582 xfs_log_item_t *lp; 581 583 582 - if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { 583 - ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); 584 - ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 585 - } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) 584 + ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); 585 + if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) 586 586 ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 587 587 588 588 ASSERT(ip0->i_ino != ip1->i_ino); ··· 711 715 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 712 716 return -EIO; 713 717 714 - xfs_ilock(dp, XFS_IOLOCK_SHARED); 715 718 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); 716 719 if (error) 717 720 goto out_unlock; ··· 719 724 if (error) 720 725 goto out_free_name; 721 726 722 - xfs_iunlock(dp, XFS_IOLOCK_SHARED); 723 727 return 0; 724 728 725 729 out_free_name: 726 730 if (ci_name) 727 731 kmem_free(ci_name->name); 728 732 out_unlock: 729 - xfs_iunlock(dp, XFS_IOLOCK_SHARED); 730 733 *ipp = NULL; 731 734 return error; 732 735 } ··· 1208 1215 if (error) 1209 1216 goto out_release_inode; 1210 1217 1211 - xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | 1212 - XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); 1218 + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1213 1219 unlock_dp_on_error = true; 1214 1220 1215 1221 xfs_defer_init(&dfops, &first_block); ··· 1244 1252 * the transaction cancel unlocking dp so don't do it explicitly in the 1245 1253 * error path. 1246 1254 */ 1247 - xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1255 + xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1248 1256 unlock_dp_on_error = false; 1249 1257 1250 1258 error = xfs_dir_createname(tp, dp, name, ip->i_ino, ··· 1317 1325 xfs_qm_dqrele(pdqp); 1318 1326 1319 1327 if (unlock_dp_on_error) 1320 - xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1328 + xfs_iunlock(dp, XFS_ILOCK_EXCL); 1321 1329 return error; 1322 1330 } 1323 1331 ··· 1458 1466 if (error) 1459 1467 goto std_return; 1460 1468 1461 - xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); 1462 1469 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1463 1470 1464 1471 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 1465 - xfs_trans_ijoin(tp, tdp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1472 + xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 1466 1473 1467 1474 /* 1468 1475 * If we are using project inheritance, we only allow hard link ··· 2032 2041 agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); 2033 2042 offset = offsetof(xfs_agi_t, agi_unlinked) + 2034 2043 (sizeof(xfs_agino_t) * bucket_index); 2035 - xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); 2036 2044 xfs_trans_log_buf(tp, agibp, offset, 2037 2045 (offset + sizeof(xfs_agino_t) - 1)); 2038 2046 return 0; ··· 2123 2133 agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); 2124 2134 offset = offsetof(xfs_agi_t, agi_unlinked) + 2125 2135 (sizeof(xfs_agino_t) * bucket_index); 2126 - xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); 2127 2136 xfs_trans_log_buf(tp, agibp, offset, 2128 2137 (offset + sizeof(xfs_agino_t) - 1)); 2129 2138 } else { ··· 2568 2579 goto std_return; 2569 2580 } 2570 2581 2571 - xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); 2572 2582 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 2573 2583 2574 - xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 2584 + xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2575 2585 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2576 2586 2577 2587 /* ··· 2951 2963 * whether the target directory is the same as the source 2952 2964 * directory, we can lock from 2 to 4 inodes. 2953 2965 */ 2954 - if (!new_parent) 2955 - xfs_ilock(src_dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); 2956 - else 2957 - xfs_lock_two_inodes(src_dp, target_dp, 2958 - XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); 2959 - 2960 2966 xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); 2961 2967 2962 2968 /* ··· 2958 2976 * we can rely on either trans_commit or trans_cancel to unlock 2959 2977 * them. 2960 2978 */ 2961 - xfs_trans_ijoin(tp, src_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 2979 + xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); 2962 2980 if (new_parent) 2963 - xfs_trans_ijoin(tp, target_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 2981 + xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); 2964 2982 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); 2965 2983 if (target_ip) 2966 2984 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);

+8 -10

fs/xfs/xfs_inode.h

··· 56 56 /* Transaction and locking information. */ 57 57 struct xfs_inode_log_item *i_itemp; /* logging information */ 58 58 mrlock_t i_lock; /* inode lock */ 59 - mrlock_t i_iolock; /* inode IO lock */ 60 59 mrlock_t i_mmaplock; /* inode mmap IO lock */ 61 60 atomic_t i_pincount; /* inode pin count */ 62 61 spinlock_t i_flags_lock; /* inode i_flags lock */ ··· 245 246 * Synchronize processes attempting to flush the in-core inode back to disk. 246 247 */ 247 248 249 + static inline int xfs_isiflocked(struct xfs_inode *ip) 250 + { 251 + return xfs_iflags_test(ip, XFS_IFLOCK); 252 + } 253 + 248 254 extern void __xfs_iflock(struct xfs_inode *ip); 249 255 250 256 static inline int xfs_iflock_nowait(struct xfs_inode *ip) ··· 265 261 266 262 static inline void xfs_ifunlock(struct xfs_inode *ip) 267 263 { 264 + ASSERT(xfs_isiflocked(ip)); 268 265 xfs_iflags_clear(ip, XFS_IFLOCK); 269 266 smp_mb(); 270 267 wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); 271 - } 272 - 273 - static inline int xfs_isiflocked(struct xfs_inode *ip) 274 - { 275 - return xfs_iflags_test(ip, XFS_IFLOCK); 276 268 } 277 269 278 270 /* ··· 332 332 * IOLOCK values 333 333 * 334 334 * 0-3 subclass value 335 - * 4-7 PARENT subclass values 335 + * 4-7 unused 336 336 * 337 337 * MMAPLOCK values 338 338 * ··· 347 347 * 348 348 */ 349 349 #define XFS_IOLOCK_SHIFT 16 350 - #define XFS_IOLOCK_PARENT_VAL 4 351 - #define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1) 350 + #define XFS_IOLOCK_MAX_SUBCLASS 3 352 351 #define XFS_IOLOCK_DEP_MASK 0x000f0000 353 - #define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT) 354 352 355 353 #define XFS_MMAPLOCK_SHIFT 20 356 354 #define XFS_MMAPLOCK_NUMORDER 0

+2 -2

fs/xfs/xfs_inode_item.c

··· 164 164 struct xfs_bmbt_rec *p; 165 165 166 166 ASSERT(ip->i_df.if_u1.if_extents != NULL); 167 - ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0); 167 + ASSERT(xfs_iext_count(&ip->i_df) > 0); 168 168 169 169 p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); 170 170 data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK); ··· 261 261 ip->i_afp->if_bytes > 0) { 262 262 struct xfs_bmbt_rec *p; 263 263 264 - ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) == 264 + ASSERT(xfs_iext_count(ip->i_afp) == 265 265 ip->i_d.di_anextents); 266 266 ASSERT(ip->i_afp->if_u1.if_extents != NULL); 267 267

+3 -5

fs/xfs/xfs_ioctl.c

··· 639 639 return error; 640 640 641 641 xfs_ilock(ip, iolock); 642 - error = xfs_break_layouts(inode, &iolock, false); 642 + error = xfs_break_layouts(inode, &iolock); 643 643 if (error) 644 644 goto out_unlock; 645 645 ··· 910 910 if (attr) { 911 911 if (ip->i_afp) { 912 912 if (ip->i_afp->if_flags & XFS_IFEXTENTS) 913 - fa.fsx_nextents = ip->i_afp->if_bytes / 914 - sizeof(xfs_bmbt_rec_t); 913 + fa.fsx_nextents = xfs_iext_count(ip->i_afp); 915 914 else 916 915 fa.fsx_nextents = ip->i_d.di_anextents; 917 916 } else 918 917 fa.fsx_nextents = 0; 919 918 } else { 920 919 if (ip->i_df.if_flags & XFS_IFEXTENTS) 921 - fa.fsx_nextents = ip->i_df.if_bytes / 922 - sizeof(xfs_bmbt_rec_t); 920 + fa.fsx_nextents = xfs_iext_count(&ip->i_df); 923 921 else 924 922 fa.fsx_nextents = ip->i_d.di_nextents; 925 923 }

+64 -40

fs/xfs/xfs_iomap.c

··· 395 395 struct xfs_inode *ip, 396 396 loff_t offset, 397 397 loff_t count, 398 - xfs_extnum_t idx, 399 - struct xfs_bmbt_irec *prev) 398 + xfs_extnum_t idx) 400 399 { 401 400 struct xfs_mount *mp = ip->i_mount; 401 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 402 402 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 403 + struct xfs_bmbt_irec prev; 403 404 int shift = 0; 404 405 int64_t freesp; 405 406 xfs_fsblock_t qblocks; ··· 420 419 */ 421 420 if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) || 422 421 XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || 423 - idx == 0 || 424 - prev->br_startoff + prev->br_blockcount < offset_fsb) 422 + !xfs_iext_get_extent(ifp, idx - 1, &prev) || 423 + prev.br_startoff + prev.br_blockcount < offset_fsb) 425 424 return mp->m_writeio_blocks; 426 425 427 426 /* ··· 440 439 * always extends to MAXEXTLEN rather than falling short due to things 441 440 * like stripe unit/width alignment of real extents. 442 441 */ 443 - if (prev->br_blockcount <= (MAXEXTLEN >> 1)) 444 - alloc_blocks = prev->br_blockcount << 1; 442 + if (prev.br_blockcount <= (MAXEXTLEN >> 1)) 443 + alloc_blocks = prev.br_blockcount << 1; 445 444 else 446 445 alloc_blocks = XFS_B_TO_FSB(mp, offset); 447 446 if (!alloc_blocks) ··· 536 535 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 537 536 xfs_fileoff_t maxbytes_fsb = 538 537 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 539 - xfs_fileoff_t end_fsb, orig_end_fsb; 538 + xfs_fileoff_t end_fsb; 540 539 int error = 0, eof = 0; 541 540 struct xfs_bmbt_irec got; 542 - struct xfs_bmbt_irec prev; 543 541 xfs_extnum_t idx; 542 + xfs_fsblock_t prealloc_blocks = 0; 544 543 545 544 ASSERT(!XFS_IS_REALTIME_INODE(ip)); 546 545 ASSERT(!xfs_get_extsz_hint(ip)); ··· 564 563 goto out_unlock; 565 564 } 566 565 567 - xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx, 568 - &got, &prev); 566 + eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); 569 567 if (!eof && got.br_startoff <= offset_fsb) { 570 568 if (xfs_is_reflink_inode(ip)) { 571 569 bool shared; ··· 595 595 * the lower level functions are updated. 596 596 */ 597 597 count = min_t(loff_t, count, 1024 * PAGE_SIZE); 598 - end_fsb = orig_end_fsb = 599 - min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); 598 + end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); 600 599 601 600 if (eof) { 602 - xfs_fsblock_t prealloc_blocks; 603 - 604 - prealloc_blocks = 605 - xfs_iomap_prealloc_size(ip, offset, count, idx, &prev); 601 + prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx); 606 602 if (prealloc_blocks) { 607 603 xfs_extlen_t align; 608 604 xfs_off_t end_offset; 605 + xfs_fileoff_t p_end_fsb; 609 606 610 607 end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1); 611 - end_fsb = XFS_B_TO_FSBT(mp, end_offset) + 612 - prealloc_blocks; 608 + p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) + 609 + prealloc_blocks; 613 610 614 611 align = xfs_eof_alignment(ip, 0); 615 612 if (align) 616 - end_fsb = roundup_64(end_fsb, align); 613 + p_end_fsb = roundup_64(p_end_fsb, align); 617 614 618 - end_fsb = min(end_fsb, maxbytes_fsb); 619 - ASSERT(end_fsb > offset_fsb); 615 + p_end_fsb = min(p_end_fsb, maxbytes_fsb); 616 + ASSERT(p_end_fsb > offset_fsb); 617 + prealloc_blocks = p_end_fsb - end_fsb; 620 618 } 621 619 } 622 620 623 621 retry: 624 622 error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, 625 - end_fsb - offset_fsb, &got, 626 - &prev, &idx, eof); 623 + end_fsb - offset_fsb, prealloc_blocks, &got, &idx, eof); 627 624 switch (error) { 628 625 case 0: 629 626 break; ··· 628 631 case -EDQUOT: 629 632 /* retry without any preallocation */ 630 633 trace_xfs_delalloc_enospc(ip, offset, count); 631 - if (end_fsb != orig_end_fsb) { 632 - end_fsb = orig_end_fsb; 634 + if (prealloc_blocks) { 635 + prealloc_blocks = 0; 633 636 goto retry; 634 637 } 635 638 /*FALLTHRU*/ 636 639 default: 637 640 goto out_unlock; 638 641 } 639 - 640 - /* 641 - * Tag the inode as speculatively preallocated so we can reclaim this 642 - * space on demand, if necessary. 643 - */ 644 - if (end_fsb != orig_end_fsb) 645 - xfs_inode_set_eofblocks_tag(ip); 646 642 647 643 trace_xfs_iomap_alloc(ip, offset, count, 0, &got); 648 644 done: ··· 950 960 (IS_DAX(inode) && ISUNWRITTEN(imap)); 951 961 } 952 962 963 + static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags) 964 + { 965 + /* 966 + * COW writes will allocate delalloc space, so we need to make sure 967 + * to take the lock exclusively here. 968 + */ 969 + if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) 970 + return true; 971 + if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE)) 972 + return true; 973 + return false; 974 + } 975 + 953 976 static int 954 977 xfs_file_iomap_begin( 955 978 struct inode *inode, ··· 982 979 if (XFS_FORCED_SHUTDOWN(mp)) 983 980 return -EIO; 984 981 985 - if ((flags & IOMAP_WRITE) && !IS_DAX(inode) && 986 - !xfs_get_extsz_hint(ip)) { 982 + if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) && 983 + !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { 987 984 /* Reserve delalloc blocks for regular writeback. */ 988 985 return xfs_file_iomap_begin_delay(inode, offset, length, flags, 989 986 iomap); 990 987 } 991 988 992 - /* 993 - * COW writes will allocate delalloc space, so we need to make sure 994 - * to take the lock exclusively here. 995 - */ 996 - if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { 989 + if (need_excl_ilock(ip, flags)) { 997 990 lockmode = XFS_ILOCK_EXCL; 998 991 xfs_ilock(ip, XFS_ILOCK_EXCL); 999 992 } else { ··· 1002 1003 offset_fsb = XFS_B_TO_FSBT(mp, offset); 1003 1004 end_fsb = XFS_B_TO_FSB(mp, offset + length); 1004 1005 1006 + if (xfs_is_reflink_inode(ip) && 1007 + (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) { 1008 + shared = xfs_reflink_find_cow_mapping(ip, offset, &imap); 1009 + if (shared) { 1010 + xfs_iunlock(ip, lockmode); 1011 + goto alloc_done; 1012 + } 1013 + ASSERT(!isnullstartblock(imap.br_startblock)); 1014 + } 1015 + 1005 1016 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1006 1017 &nimaps, 0); 1007 1018 if (error) 1008 1019 goto out_unlock; 1009 1020 1010 - if (flags & IOMAP_REPORT) { 1021 + if ((flags & IOMAP_REPORT) || 1022 + (xfs_is_reflink_inode(ip) && 1023 + (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) { 1011 1024 /* Trim the mapping to the nearest shared extent boundary. */ 1012 1025 error = xfs_reflink_trim_around_shared(ip, &imap, &shared, 1013 1026 &trimmed); 1014 1027 if (error) 1015 1028 goto out_unlock; 1029 + 1030 + /* 1031 + * We're here because we're trying to do a directio write to a 1032 + * region that isn't aligned to a filesystem block. If the 1033 + * extent is shared, fall back to buffered mode to handle the 1034 + * RMW. 1035 + */ 1036 + if (!(flags & IOMAP_REPORT) && shared) { 1037 + trace_xfs_reflink_bounce_dio_write(ip, &imap); 1038 + error = -EREMCHG; 1039 + goto out_unlock; 1040 + } 1016 1041 } 1017 1042 1018 1043 if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { ··· 1071 1048 if (error) 1072 1049 return error; 1073 1050 1051 + alloc_done: 1074 1052 iomap->flags = IOMAP_F_NEW; 1075 1053 trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); 1076 1054 } else {

+6 -8

fs/xfs/xfs_iops.c

··· 983 983 struct xfs_inode *ip = XFS_I(d_inode(dentry)); 984 984 uint iolock = XFS_IOLOCK_EXCL; 985 985 986 - xfs_ilock(ip, iolock); 987 - error = xfs_break_layouts(d_inode(dentry), &iolock, true); 988 - if (!error) { 989 - xfs_ilock(ip, XFS_MMAPLOCK_EXCL); 990 - iolock |= XFS_MMAPLOCK_EXCL; 986 + error = xfs_break_layouts(d_inode(dentry), &iolock); 987 + if (error) 988 + return error; 991 989 992 - error = xfs_vn_setattr_size(dentry, iattr); 993 - } 994 - xfs_iunlock(ip, iolock); 990 + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); 991 + error = xfs_vn_setattr_size(dentry, iattr); 992 + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); 995 993 } else { 996 994 error = xfs_vn_setattr_nonsize(dentry, iattr); 997 995 }

+1

fs/xfs/xfs_linux.h

··· 78 78 #include <linux/freezer.h> 79 79 #include <linux/list_sort.h> 80 80 #include <linux/ratelimit.h> 81 + #include <linux/rhashtable.h> 81 82 82 83 #include <asm/page.h> 83 84 #include <asm/div64.h>

+17 -24

fs/xfs/xfs_log.c

··· 1668 1668 __uint32_t crc; 1669 1669 1670 1670 /* first generate the crc for the record header ... */ 1671 - crc = xfs_start_cksum((char *)rhead, 1671 + crc = xfs_start_cksum_update((char *)rhead, 1672 1672 sizeof(struct xlog_rec_header), 1673 1673 offsetof(struct xlog_rec_header, h_crc)); 1674 1674 ··· 1862 1862 1863 1863 bp->b_io_length = BTOBB(count); 1864 1864 bp->b_fspriv = iclog; 1865 - bp->b_flags &= ~(XBF_FUA | XBF_FLUSH); 1866 - bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE); 1865 + bp->b_flags &= ~XBF_FLUSH; 1866 + bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); 1867 1867 1868 - if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { 1869 - bp->b_flags |= XBF_FUA; 1870 - 1871 - /* 1872 - * Flush the data device before flushing the log to make 1873 - * sure all meta data written back from the AIL actually made 1874 - * it to disk before stamping the new log tail LSN into the 1875 - * log buffer. For an external log we need to issue the 1876 - * flush explicitly, and unfortunately synchronously here; 1877 - * for an internal log we can simply use the block layer 1878 - * state machine for preflushes. 1879 - */ 1880 - if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) 1881 - xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); 1882 - else 1883 - bp->b_flags |= XBF_FLUSH; 1884 - } 1868 + /* 1869 + * Flush the data device before flushing the log to make sure all meta 1870 + * data written back from the AIL actually made it to disk before 1871 + * stamping the new log tail LSN into the log buffer. For an external 1872 + * log we need to issue the flush explicitly, and unfortunately 1873 + * synchronously here; for an internal log we can simply use the block 1874 + * layer state machine for preflushes. 1875 + */ 1876 + if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) 1877 + xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); 1878 + else 1879 + bp->b_flags |= XBF_FLUSH; 1885 1880 1886 1881 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1887 1882 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); ··· 1901 1906 xfs_buf_associate_memory(bp, 1902 1907 (char *)&iclog->ic_header + count, split); 1903 1908 bp->b_fspriv = iclog; 1904 - bp->b_flags &= ~(XBF_FUA | XBF_FLUSH); 1905 - bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE); 1906 - if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1907 - bp->b_flags |= XBF_FUA; 1909 + bp->b_flags &= ~XBF_FLUSH; 1910 + bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); 1908 1911 1909 1912 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1910 1913 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);

+9 -7

fs/xfs/xfs_log_recover.c

··· 2025 2025 struct xlog *log, 2026 2026 xfs_daddr_t blkno, 2027 2027 uint len, 2028 - ushort flags) 2028 + unsigned short flags) 2029 2029 { 2030 2030 struct list_head *bucket; 2031 2031 struct xfs_buf_cancel *bcp; ··· 2065 2065 struct xlog *log, 2066 2066 xfs_daddr_t blkno, 2067 2067 uint len, 2068 - ushort flags) 2068 + unsigned short flags) 2069 2069 { 2070 2070 struct xfs_buf_cancel *bcp; 2071 2071 ··· 5113 5113 struct list_head *buffer_list) 5114 5114 { 5115 5115 int error; 5116 + __le32 old_crc = rhead->h_crc; 5116 5117 __le32 crc; 5118 + 5117 5119 5118 5120 crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); 5119 5121 5120 5122 /* 5121 5123 * Nothing else to do if this is a CRC verification pass. Just return 5122 5124 * if this a record with a non-zero crc. Unfortunately, mkfs always 5123 - * sets h_crc to 0 so we must consider this valid even on v5 supers. 5125 + * sets old_crc to 0 so we must consider this valid even on v5 supers. 5124 5126 * Otherwise, return EFSBADCRC on failure so the callers up the stack 5125 5127 * know precisely what failed. 5126 5128 */ 5127 5129 if (pass == XLOG_RECOVER_CRCPASS) { 5128 - if (rhead->h_crc && crc != rhead->h_crc) 5130 + if (old_crc && crc != old_crc) 5129 5131 return -EFSBADCRC; 5130 5132 return 0; 5131 5133 } ··· 5138 5136 * zero CRC check prevents warnings from being emitted when upgrading 5139 5137 * the kernel from one that does not add CRCs by default. 5140 5138 */ 5141 - if (crc != rhead->h_crc) { 5142 - if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { 5139 + if (crc != old_crc) { 5140 + if (old_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { 5143 5141 xfs_alert(log->l_mp, 5144 5142 "log record CRC mismatch: found 0x%x, expected 0x%x.", 5145 - le32_to_cpu(rhead->h_crc), 5143 + le32_to_cpu(old_crc), 5146 5144 le32_to_cpu(crc)); 5147 5145 xfs_hex_dump(dp, 32); 5148 5146 }

+5 -2

fs/xfs/xfs_mount.c

··· 157 157 spin_unlock(&mp->m_perag_lock); 158 158 ASSERT(pag); 159 159 ASSERT(atomic_read(&pag->pag_ref) == 0); 160 + xfs_buf_hash_destroy(pag); 160 161 call_rcu(&pag->rcu_head, __xfs_free_perag); 161 162 } 162 163 } ··· 213 212 spin_lock_init(&pag->pag_ici_lock); 214 213 mutex_init(&pag->pag_ici_reclaim_lock); 215 214 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); 216 - spin_lock_init(&pag->pag_buf_lock); 217 - pag->pag_buf_tree = RB_ROOT; 215 + if (xfs_buf_hash_init(pag)) 216 + goto out_unwind; 218 217 219 218 if (radix_tree_preload(GFP_NOFS)) 220 219 goto out_unwind; ··· 240 239 return 0; 241 240 242 241 out_unwind: 242 + xfs_buf_hash_destroy(pag); 243 243 kmem_free(pag); 244 244 for (; index > first_initialised; index--) { 245 245 pag = radix_tree_delete(&mp->m_perag_tree, index); 246 + xfs_buf_hash_destroy(pag); 246 247 kmem_free(pag); 247 248 } 248 249 return error;

+5 -2

fs/xfs/xfs_mount.h

··· 393 393 unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ 394 394 395 395 /* buffer cache index */ 396 - spinlock_t pag_buf_lock; /* lock for pag_buf_tree */ 397 - struct rb_root pag_buf_tree; /* ordered tree of active buffers */ 396 + spinlock_t pag_buf_lock; /* lock for pag_buf_hash */ 397 + struct rhashtable pag_buf_hash; 398 398 399 399 /* for rcu-safe freeing */ 400 400 struct rcu_head rcu_head; ··· 423 423 return NULL; 424 424 } 425 425 } 426 + 427 + int xfs_buf_hash_init(xfs_perag_t *pag); 428 + void xfs_buf_hash_destroy(xfs_perag_t *pag); 426 429 427 430 extern void xfs_uuid_table_free(void); 428 431 extern int xfs_log_sbcount(xfs_mount_t *);

+1 -6

fs/xfs/xfs_pnfs.c

··· 32 32 int 33 33 xfs_break_layouts( 34 34 struct inode *inode, 35 - uint *iolock, 36 - bool with_imutex) 35 + uint *iolock) 37 36 { 38 37 struct xfs_inode *ip = XFS_I(inode); 39 38 int error; ··· 41 42 42 43 while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { 43 44 xfs_iunlock(ip, *iolock); 44 - if (with_imutex && (*iolock & XFS_IOLOCK_EXCL)) 45 - inode_unlock(inode); 46 45 error = break_layout(inode, true); 47 46 *iolock = XFS_IOLOCK_EXCL; 48 - if (with_imutex) 49 - inode_lock(inode); 50 47 xfs_ilock(ip, *iolock); 51 48 } 52 49

+2 -2

fs/xfs/xfs_pnfs.h

··· 8 8 int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps, 9 9 struct iattr *iattr); 10 10 11 - int xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex); 11 + int xfs_break_layouts(struct inode *inode, uint *iolock); 12 12 #else 13 13 static inline int 14 - xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex) 14 + xfs_break_layouts(struct inode *inode, uint *iolock) 15 15 { 16 16 return 0; 17 17 }

+1 -1

fs/xfs/xfs_qm.c

··· 1135 1135 return error; 1136 1136 } 1137 1137 rtblks = 0; 1138 - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 1138 + nextents = xfs_iext_count(ifp); 1139 1139 for (idx = 0; idx < nextents; idx++) 1140 1140 rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx)); 1141 1141 *O_rtblks = (xfs_qcnt_t)rtblks;

+54 -137

fs/xfs/xfs_reflink.c

··· 243 243 struct xfs_bmbt_irec *imap, 244 244 bool *shared) 245 245 { 246 - struct xfs_bmbt_irec got, prev; 247 - xfs_fileoff_t end_fsb, orig_end_fsb; 248 - int eof = 0, error = 0; 249 - bool trimmed; 246 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 247 + struct xfs_bmbt_irec got; 248 + int error = 0; 249 + bool eof = false, trimmed; 250 250 xfs_extnum_t idx; 251 - xfs_extlen_t align; 252 251 253 252 /* 254 253 * Search the COW fork extent list first. This serves two purposes: ··· 257 258 * extent list is generally faster than going out to the shared extent 258 259 * tree. 259 260 */ 260 - xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx, 261 - &got, &prev); 261 + 262 + if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got)) 263 + eof = true; 262 264 if (!eof && got.br_startoff <= imap->br_startoff) { 263 265 trace_xfs_reflink_cow_found(ip, imap); 264 266 xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); ··· 285 285 if (error) 286 286 return error; 287 287 288 - end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount; 289 - 290 - align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); 291 - if (align) 292 - end_fsb = roundup_64(end_fsb, align); 293 - 294 - retry: 295 288 error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, 296 - end_fsb - imap->br_startoff, &got, &prev, &idx, eof); 297 - switch (error) { 298 - case 0: 299 - break; 300 - case -ENOSPC: 301 - case -EDQUOT: 302 - /* retry without any preallocation */ 289 + imap->br_blockcount, 0, &got, &idx, eof); 290 + if (error == -ENOSPC || error == -EDQUOT) 303 291 trace_xfs_reflink_cow_enospc(ip, imap); 304 - if (end_fsb != orig_end_fsb) { 305 - end_fsb = orig_end_fsb; 306 - goto retry; 307 - } 308 - /*FALLTHRU*/ 309 - default: 292 + if (error) 310 293 return error; 311 - } 312 - 313 - if (end_fsb != orig_end_fsb) 314 - xfs_inode_set_cowblocks_tag(ip); 315 294 316 295 trace_xfs_reflink_cow_alloc(ip, &got); 317 296 return 0; ··· 397 418 } 398 419 399 420 /* 400 - * Find the CoW reservation (and whether or not it needs block allocation) 401 - * for a given byte offset of a file. 421 + * Find the CoW reservation for a given byte offset of a file. 402 422 */ 403 423 bool 404 424 xfs_reflink_find_cow_mapping( 405 425 struct xfs_inode *ip, 406 426 xfs_off_t offset, 407 - struct xfs_bmbt_irec *imap, 408 - bool *need_alloc) 427 + struct xfs_bmbt_irec *imap) 409 428 { 410 - struct xfs_bmbt_irec irec; 411 - struct xfs_ifork *ifp; 412 - struct xfs_bmbt_rec_host *gotp; 413 - xfs_fileoff_t bno; 429 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 430 + xfs_fileoff_t offset_fsb; 431 + struct xfs_bmbt_irec got; 414 432 xfs_extnum_t idx; 415 433 416 434 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); 417 435 ASSERT(xfs_is_reflink_inode(ip)); 418 436 419 - /* Find the extent in the CoW fork. */ 420 - ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 421 - bno = XFS_B_TO_FSBT(ip->i_mount, offset); 422 - gotp = xfs_iext_bno_to_ext(ifp, bno, &idx); 423 - if (!gotp) 437 + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 438 + if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) 424 439 return false; 425 - 426 - xfs_bmbt_get_all(gotp, &irec); 427 - if (bno >= irec.br_startoff + irec.br_blockcount || 428 - bno < irec.br_startoff) 440 + if (got.br_startoff > offset_fsb) 429 441 return false; 430 442 431 443 trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE, 432 - &irec); 433 - 434 - /* If it's still delalloc, we must allocate later. */ 435 - *imap = irec; 436 - *need_alloc = !!(isnullstartblock(irec.br_startblock)); 437 - 444 + &got); 445 + *imap = got; 438 446 return true; 439 447 } 440 448 441 449 /* 442 450 * Trim an extent to end at the next CoW reservation past offset_fsb. 443 451 */ 444 - int 452 + void 445 453 xfs_reflink_trim_irec_to_next_cow( 446 454 struct xfs_inode *ip, 447 455 xfs_fileoff_t offset_fsb, 448 456 struct xfs_bmbt_irec *imap) 449 457 { 450 - struct xfs_bmbt_irec irec; 451 - struct xfs_ifork *ifp; 452 - struct xfs_bmbt_rec_host *gotp; 458 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 459 + struct xfs_bmbt_irec got; 453 460 xfs_extnum_t idx; 454 461 455 462 if (!xfs_is_reflink_inode(ip)) 456 - return 0; 463 + return; 457 464 458 465 /* Find the extent in the CoW fork. */ 459 - ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 460 - gotp = xfs_iext_bno_to_ext(ifp, offset_fsb, &idx); 461 - if (!gotp) 462 - return 0; 463 - xfs_bmbt_get_all(gotp, &irec); 466 + if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) 467 + return; 464 468 465 469 /* This is the extent before; try sliding up one. */ 466 - if (irec.br_startoff < offset_fsb) { 467 - idx++; 468 - if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) 469 - return 0; 470 - gotp = xfs_iext_get_ext(ifp, idx); 471 - xfs_bmbt_get_all(gotp, &irec); 470 + if (got.br_startoff < offset_fsb) { 471 + if (!xfs_iext_get_extent(ifp, idx + 1, &got)) 472 + return; 472 473 } 473 474 474 - if (irec.br_startoff >= imap->br_startoff + imap->br_blockcount) 475 - return 0; 475 + if (got.br_startoff >= imap->br_startoff + imap->br_blockcount) 476 + return; 476 477 477 - imap->br_blockcount = irec.br_startoff - imap->br_startoff; 478 + imap->br_blockcount = got.br_startoff - imap->br_startoff; 478 479 trace_xfs_reflink_trim_irec(ip, imap); 479 - 480 - return 0; 481 480 } 482 481 483 482 /* ··· 469 512 xfs_fileoff_t end_fsb) 470 513 { 471 514 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 472 - struct xfs_bmbt_irec got, prev, del; 515 + struct xfs_bmbt_irec got, del; 473 516 xfs_extnum_t idx; 474 517 xfs_fsblock_t firstfsb; 475 518 struct xfs_defer_ops dfops; 476 - int error = 0, eof = 0; 519 + int error = 0; 477 520 478 521 if (!xfs_is_reflink_inode(ip)) 479 522 return 0; 480 - 481 - xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx, 482 - &got, &prev); 483 - if (eof) 523 + if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) 484 524 return 0; 485 525 486 526 while (got.br_startoff < end_fsb) { ··· 520 566 xfs_bmap_del_extent_cow(ip, &idx, &got, &del); 521 567 } 522 568 523 - if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)) 569 + if (!xfs_iext_get_extent(ifp, ++idx, &got)) 524 570 break; 525 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got); 526 571 } 527 572 528 573 /* clear tag if cow fork is emptied */ ··· 591 638 xfs_off_t count) 592 639 { 593 640 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 594 - struct xfs_bmbt_irec got, prev, del; 641 + struct xfs_bmbt_irec got, del; 595 642 struct xfs_trans *tp; 596 643 xfs_fileoff_t offset_fsb; 597 644 xfs_fileoff_t end_fsb; 598 645 xfs_fsblock_t firstfsb; 599 646 struct xfs_defer_ops dfops; 600 - int error, eof = 0; 647 + int error; 601 648 unsigned int resblks; 602 649 xfs_filblks_t rlen; 603 650 xfs_extnum_t idx; ··· 621 668 xfs_ilock(ip, XFS_ILOCK_EXCL); 622 669 xfs_trans_ijoin(tp, ip, 0); 623 670 624 - xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx, 625 - &got, &prev); 626 - 627 671 /* If there is a hole at end_fsb - 1 go to the previous extent */ 628 - if (eof || got.br_startoff > end_fsb) { 672 + if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) || 673 + got.br_startoff > end_fsb) { 629 674 ASSERT(idx > 0); 630 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got); 675 + xfs_iext_get_extent(ifp, --idx, &got); 631 676 } 632 677 633 678 /* Walk backwards until we're out of the I/O range... */ ··· 673 722 error = xfs_defer_finish(&tp, &dfops, ip); 674 723 if (error) 675 724 goto out_defer; 676 - 677 725 next_extent: 678 - if (idx < 0) 726 + if (!xfs_iext_get_extent(ifp, idx, &got)) 679 727 break; 680 - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got); 681 728 } 682 729 683 730 error = xfs_trans_commit(tp); ··· 1251 1302 return -EIO; 1252 1303 1253 1304 /* Lock both files against IO */ 1254 - if (same_inode) { 1255 - xfs_ilock(src, XFS_IOLOCK_EXCL); 1305 + lock_two_nondirectories(inode_in, inode_out); 1306 + if (same_inode) 1256 1307 xfs_ilock(src, XFS_MMAPLOCK_EXCL); 1257 - } else { 1258 - xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL); 1308 + else 1259 1309 xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); 1260 - } 1261 1310 1262 1311 /* Don't touch certain kinds of inodes */ 1263 1312 ret = -EPERM; ··· 1292 1345 goto out_unlock; 1293 1346 } 1294 1347 1295 - if (len == 0) 1348 + /* Zero length dedupe exits immediately; reflink goes to EOF. */ 1349 + if (len == 0) { 1350 + if (is_dedupe) { 1351 + ret = 0; 1352 + goto out_unlock; 1353 + } 1296 1354 len = isize - pos_in; 1355 + } 1297 1356 1298 1357 /* Ensure offsets don't wrap and the input is inside i_size */ 1299 1358 if (pos_in + len < pos_in || pos_out + len < pos_out || ··· 1400 1447 1401 1448 out_unlock: 1402 1449 xfs_iunlock(src, XFS_MMAPLOCK_EXCL); 1403 - xfs_iunlock(src, XFS_IOLOCK_EXCL); 1404 - if (src->i_ino != dest->i_ino) { 1450 + if (!same_inode) 1405 1451 xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); 1406 - xfs_iunlock(dest, XFS_IOLOCK_EXCL); 1407 - } 1452 + unlock_two_nondirectories(inode_in, inode_out); 1408 1453 if (ret) 1409 1454 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); 1410 1455 return ret; ··· 1647 1696 out: 1648 1697 trace_xfs_reflink_unshare_error(ip, error, _RET_IP_); 1649 1698 return error; 1650 - } 1651 - 1652 - /* 1653 - * Does this inode have any real CoW reservations? 1654 - */ 1655 - bool 1656 - xfs_reflink_has_real_cow_blocks( 1657 - struct xfs_inode *ip) 1658 - { 1659 - struct xfs_bmbt_irec irec; 1660 - struct xfs_ifork *ifp; 1661 - struct xfs_bmbt_rec_host *gotp; 1662 - xfs_extnum_t idx; 1663 - 1664 - if (!xfs_is_reflink_inode(ip)) 1665 - return false; 1666 - 1667 - /* Go find the old extent in the CoW fork. */ 1668 - ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 1669 - gotp = xfs_iext_bno_to_ext(ifp, 0, &idx); 1670 - while (gotp) { 1671 - xfs_bmbt_get_all(gotp, &irec); 1672 - 1673 - if (!isnullstartblock(irec.br_startblock)) 1674 - return true; 1675 - 1676 - /* Roll on... */ 1677 - idx++; 1678 - if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) 1679 - break; 1680 - gotp = xfs_iext_get_ext(ifp, idx); 1681 - } 1682 - 1683 - return false; 1684 1699 }

+2 -4

fs/xfs/xfs_reflink.h

··· 31 31 extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, 32 32 xfs_off_t offset, xfs_off_t count); 33 33 extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, 34 - struct xfs_bmbt_irec *imap, bool *need_alloc); 35 - extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, 34 + struct xfs_bmbt_irec *imap); 35 + extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, 36 36 xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap); 37 37 38 38 extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, ··· 49 49 struct xfs_trans **tpp); 50 50 extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, 51 51 xfs_off_t len); 52 - 53 - extern bool xfs_reflink_has_real_cow_blocks(struct xfs_inode *ip); 54 52 55 53 #endif /* __XFS_REFLINK_H */

+5 -5

fs/xfs/xfs_stats.c

··· 80 80 } 81 81 /* extra precision counters */ 82 82 for_each_possible_cpu(i) { 83 - xs_xstrat_bytes += per_cpu_ptr(stats, i)->xs_xstrat_bytes; 84 - xs_write_bytes += per_cpu_ptr(stats, i)->xs_write_bytes; 85 - xs_read_bytes += per_cpu_ptr(stats, i)->xs_read_bytes; 83 + xs_xstrat_bytes += per_cpu_ptr(stats, i)->s.xs_xstrat_bytes; 84 + xs_write_bytes += per_cpu_ptr(stats, i)->s.xs_write_bytes; 85 + xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes; 86 86 } 87 87 88 88 len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n", ··· 106 106 for_each_possible_cpu(c) { 107 107 preempt_disable(); 108 108 /* save vn_active, it's a universal truth! */ 109 - vn_active = per_cpu_ptr(stats, c)->vn_active; 109 + vn_active = per_cpu_ptr(stats, c)->s.vn_active; 110 110 memset(per_cpu_ptr(stats, c), 0, sizeof(*stats)); 111 - per_cpu_ptr(stats, c)->vn_active = vn_active; 111 + per_cpu_ptr(stats, c)->s.vn_active = vn_active; 112 112 preempt_enable(); 113 113 } 114 114 }

+81 -119

fs/xfs/xfs_stats.h

··· 22 22 #include <linux/percpu.h> 23 23 24 24 /* 25 + * The btree stats arrays have fixed offsets for the different stats. We 26 + * store the base index in the btree cursor via XFS_STATS_CALC_INDEX() and 27 + * that allows us to use fixed offsets into the stats array for each btree 28 + * stat. These index offsets are defined in the order they will be emitted 29 + * in the stats files, so it is possible to add new btree stat types by 30 + * appending to the enum list below. 31 + */ 32 + enum { 33 + __XBTS_lookup = 0, 34 + __XBTS_compare = 1, 35 + __XBTS_insrec = 2, 36 + __XBTS_delrec = 3, 37 + __XBTS_newroot = 4, 38 + __XBTS_killroot = 5, 39 + __XBTS_increment = 6, 40 + __XBTS_decrement = 7, 41 + __XBTS_lshift = 8, 42 + __XBTS_rshift = 9, 43 + __XBTS_split = 10, 44 + __XBTS_join = 11, 45 + __XBTS_alloc = 12, 46 + __XBTS_free = 13, 47 + __XBTS_moves = 14, 48 + 49 + __XBTS_MAX = 15, 50 + }; 51 + 52 + /* 25 53 * XFS global statistics 26 54 */ 27 - struct xfsstats { 55 + struct __xfsstats { 28 56 # define XFSSTAT_END_EXTENT_ALLOC 4 29 57 __uint32_t xs_allocx; 30 58 __uint32_t xs_allocb; ··· 145 117 __uint32_t xb_page_found; 146 118 __uint32_t xb_get_read; 147 119 /* Version 2 btree counters */ 148 - #define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15) 149 - __uint32_t xs_abtb_2_lookup; 150 - __uint32_t xs_abtb_2_compare; 151 - __uint32_t xs_abtb_2_insrec; 152 - __uint32_t xs_abtb_2_delrec; 153 - __uint32_t xs_abtb_2_newroot; 154 - __uint32_t xs_abtb_2_killroot; 155 - __uint32_t xs_abtb_2_increment; 156 - __uint32_t xs_abtb_2_decrement; 157 - __uint32_t xs_abtb_2_lshift; 158 - __uint32_t xs_abtb_2_rshift; 159 - __uint32_t xs_abtb_2_split; 160 - __uint32_t xs_abtb_2_join; 161 - __uint32_t xs_abtb_2_alloc; 162 - __uint32_t xs_abtb_2_free; 163 - __uint32_t xs_abtb_2_moves; 164 - #define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15) 165 - __uint32_t xs_abtc_2_lookup; 166 - __uint32_t xs_abtc_2_compare; 167 - __uint32_t xs_abtc_2_insrec; 168 - __uint32_t xs_abtc_2_delrec; 169 - __uint32_t xs_abtc_2_newroot; 170 - __uint32_t xs_abtc_2_killroot; 171 - __uint32_t xs_abtc_2_increment; 172 - __uint32_t xs_abtc_2_decrement; 173 - __uint32_t xs_abtc_2_lshift; 174 - __uint32_t xs_abtc_2_rshift; 175 - __uint32_t xs_abtc_2_split; 176 - __uint32_t xs_abtc_2_join; 177 - __uint32_t xs_abtc_2_alloc; 178 - __uint32_t xs_abtc_2_free; 179 - __uint32_t xs_abtc_2_moves; 180 - #define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15) 181 - __uint32_t xs_bmbt_2_lookup; 182 - __uint32_t xs_bmbt_2_compare; 183 - __uint32_t xs_bmbt_2_insrec; 184 - __uint32_t xs_bmbt_2_delrec; 185 - __uint32_t xs_bmbt_2_newroot; 186 - __uint32_t xs_bmbt_2_killroot; 187 - __uint32_t xs_bmbt_2_increment; 188 - __uint32_t xs_bmbt_2_decrement; 189 - __uint32_t xs_bmbt_2_lshift; 190 - __uint32_t xs_bmbt_2_rshift; 191 - __uint32_t xs_bmbt_2_split; 192 - __uint32_t xs_bmbt_2_join; 193 - __uint32_t xs_bmbt_2_alloc; 194 - __uint32_t xs_bmbt_2_free; 195 - __uint32_t xs_bmbt_2_moves; 196 - #define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15) 197 - __uint32_t xs_ibt_2_lookup; 198 - __uint32_t xs_ibt_2_compare; 199 - __uint32_t xs_ibt_2_insrec; 200 - __uint32_t xs_ibt_2_delrec; 201 - __uint32_t xs_ibt_2_newroot; 202 - __uint32_t xs_ibt_2_killroot; 203 - __uint32_t xs_ibt_2_increment; 204 - __uint32_t xs_ibt_2_decrement; 205 - __uint32_t xs_ibt_2_lshift; 206 - __uint32_t xs_ibt_2_rshift; 207 - __uint32_t xs_ibt_2_split; 208 - __uint32_t xs_ibt_2_join; 209 - __uint32_t xs_ibt_2_alloc; 210 - __uint32_t xs_ibt_2_free; 211 - __uint32_t xs_ibt_2_moves; 212 - #define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2+15) 213 - __uint32_t xs_fibt_2_lookup; 214 - __uint32_t xs_fibt_2_compare; 215 - __uint32_t xs_fibt_2_insrec; 216 - __uint32_t xs_fibt_2_delrec; 217 - __uint32_t xs_fibt_2_newroot; 218 - __uint32_t xs_fibt_2_killroot; 219 - __uint32_t xs_fibt_2_increment; 220 - __uint32_t xs_fibt_2_decrement; 221 - __uint32_t xs_fibt_2_lshift; 222 - __uint32_t xs_fibt_2_rshift; 223 - __uint32_t xs_fibt_2_split; 224 - __uint32_t xs_fibt_2_join; 225 - __uint32_t xs_fibt_2_alloc; 226 - __uint32_t xs_fibt_2_free; 227 - __uint32_t xs_fibt_2_moves; 228 - #define XFSSTAT_END_RMAP_V2 (XFSSTAT_END_FIBT_V2+15) 229 - __uint32_t xs_rmap_2_lookup; 230 - __uint32_t xs_rmap_2_compare; 231 - __uint32_t xs_rmap_2_insrec; 232 - __uint32_t xs_rmap_2_delrec; 233 - __uint32_t xs_rmap_2_newroot; 234 - __uint32_t xs_rmap_2_killroot; 235 - __uint32_t xs_rmap_2_increment; 236 - __uint32_t xs_rmap_2_decrement; 237 - __uint32_t xs_rmap_2_lshift; 238 - __uint32_t xs_rmap_2_rshift; 239 - __uint32_t xs_rmap_2_split; 240 - __uint32_t xs_rmap_2_join; 241 - __uint32_t xs_rmap_2_alloc; 242 - __uint32_t xs_rmap_2_free; 243 - __uint32_t xs_rmap_2_moves; 244 - #define XFSSTAT_END_REFCOUNT (XFSSTAT_END_RMAP_V2 + 15) 245 - __uint32_t xs_refcbt_2_lookup; 246 - __uint32_t xs_refcbt_2_compare; 247 - __uint32_t xs_refcbt_2_insrec; 248 - __uint32_t xs_refcbt_2_delrec; 249 - __uint32_t xs_refcbt_2_newroot; 250 - __uint32_t xs_refcbt_2_killroot; 251 - __uint32_t xs_refcbt_2_increment; 252 - __uint32_t xs_refcbt_2_decrement; 253 - __uint32_t xs_refcbt_2_lshift; 254 - __uint32_t xs_refcbt_2_rshift; 255 - __uint32_t xs_refcbt_2_split; 256 - __uint32_t xs_refcbt_2_join; 257 - __uint32_t xs_refcbt_2_alloc; 258 - __uint32_t xs_refcbt_2_free; 259 - __uint32_t xs_refcbt_2_moves; 120 + #define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF + __XBTS_MAX) 121 + __uint32_t xs_abtb_2[__XBTS_MAX]; 122 + #define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2 + __XBTS_MAX) 123 + __uint32_t xs_abtc_2[__XBTS_MAX]; 124 + #define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2 + __XBTS_MAX) 125 + __uint32_t xs_bmbt_2[__XBTS_MAX]; 126 + #define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2 + __XBTS_MAX) 127 + __uint32_t xs_ibt_2[__XBTS_MAX]; 128 + #define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2 + __XBTS_MAX) 129 + __uint32_t xs_fibt_2[__XBTS_MAX]; 130 + #define XFSSTAT_END_RMAP_V2 (XFSSTAT_END_FIBT_V2 + __XBTS_MAX) 131 + __uint32_t xs_rmap_2[__XBTS_MAX]; 132 + #define XFSSTAT_END_REFCOUNT (XFSSTAT_END_RMAP_V2 + __XBTS_MAX) 133 + __uint32_t xs_refcbt_2[__XBTS_MAX]; 260 134 #define XFSSTAT_END_XQMSTAT (XFSSTAT_END_REFCOUNT + 6) 261 135 __uint32_t xs_qm_dqreclaims; 262 136 __uint32_t xs_qm_dqreclaim_misses; ··· 175 245 __uint64_t xs_read_bytes; 176 246 }; 177 247 248 + struct xfsstats { 249 + union { 250 + struct __xfsstats s; 251 + uint32_t a[XFSSTAT_END_XQMSTAT]; 252 + }; 253 + }; 254 + 255 + /* 256 + * simple wrapper for getting the array index of s struct member offset 257 + */ 258 + #define XFS_STATS_CALC_INDEX(member) \ 259 + (offsetof(struct __xfsstats, member) / (int)sizeof(__uint32_t)) 260 + 261 + 178 262 int xfs_stats_format(struct xfsstats __percpu *stats, char *buf); 179 263 void xfs_stats_clearall(struct xfsstats __percpu *stats); 180 264 extern struct xstats xfsstats; 181 265 182 266 #define XFS_STATS_INC(mp, v) \ 183 267 do { \ 184 - per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v++; \ 185 - per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v++; \ 268 + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->s.v++; \ 269 + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->s.v++; \ 186 270 } while (0) 187 271 188 272 #define XFS_STATS_DEC(mp, v) \ 189 273 do { \ 190 - per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v--; \ 191 - per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v--; \ 274 + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->s.v--; \ 275 + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->s.v--; \ 192 276 } while (0) 193 277 194 278 #define XFS_STATS_ADD(mp, v, inc) \ 195 279 do { \ 196 - per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v += (inc); \ 197 - per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v += (inc); \ 280 + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->s.v += (inc); \ 281 + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->s.v += (inc); \ 282 + } while (0) 283 + 284 + #define XFS_STATS_INC_OFF(mp, off) \ 285 + do { \ 286 + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->a[off]++; \ 287 + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->a[off]++; \ 288 + } while (0) 289 + 290 + #define XFS_STATS_DEC_OFF(mp, off) \ 291 + do { \ 292 + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->a[off]; \ 293 + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->a[off]; \ 294 + } while (0) 295 + 296 + #define XFS_STATS_ADD_OFF(mp, off, inc) \ 297 + do { \ 298 + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->a[off] += (inc); \ 299 + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->a[off] += (inc); \ 198 300 } while (0) 199 301 200 302 #if defined(CONFIG_PROC_FS)

+17 -10

fs/xfs/xfs_super.c

··· 104 104 {Opt_sysvgroups,"sysvgroups"}, /* group-ID from current process */ 105 105 {Opt_allocsize, "allocsize=%s"},/* preferred allocation size */ 106 106 {Opt_norecovery,"norecovery"}, /* don't run XFS recovery */ 107 - {Opt_barrier, "barrier"}, /* use writer barriers for log write and 108 - * unwritten extent conversion */ 109 - {Opt_nobarrier, "nobarrier"}, /* .. disable */ 110 107 {Opt_inode64, "inode64"}, /* inodes can be allocated anywhere */ 111 108 {Opt_inode32, "inode32"}, /* inode allocation limited to 112 109 * XFS_MAXINUMBER_32 */ ··· 131 134 {Opt_nodiscard, "nodiscard"}, /* Do not discard unused blocks */ 132 135 133 136 {Opt_dax, "dax"}, /* Enable direct access to bdev pages */ 137 + 138 + /* Deprecated mount options scheduled for removal */ 139 + {Opt_barrier, "barrier"}, /* use writer barriers for log write and 140 + * unwritten extent conversion */ 141 + {Opt_nobarrier, "nobarrier"}, /* .. disable */ 142 + 134 143 {Opt_err, NULL}, 135 144 }; 136 145 ··· 304 301 case Opt_nouuid: 305 302 mp->m_flags |= XFS_MOUNT_NOUUID; 306 303 break; 307 - case Opt_barrier: 308 - mp->m_flags |= XFS_MOUNT_BARRIER; 309 - break; 310 - case Opt_nobarrier: 311 - mp->m_flags &= ~XFS_MOUNT_BARRIER; 312 - break; 313 304 case Opt_ikeep: 314 305 mp->m_flags |= XFS_MOUNT_IKEEP; 315 306 break; ··· 371 374 mp->m_flags |= XFS_MOUNT_DAX; 372 375 break; 373 376 #endif 377 + case Opt_barrier: 378 + xfs_warn(mp, "%s option is deprecated, ignoring.", p); 379 + mp->m_flags |= XFS_MOUNT_BARRIER; 380 + break; 381 + case Opt_nobarrier: 382 + xfs_warn(mp, "%s option is deprecated, ignoring.", p); 383 + mp->m_flags &= ~XFS_MOUNT_BARRIER; 384 + break; 374 385 default: 375 386 xfs_warn(mp, "unknown mount option [%s].", p); 376 387 return -EINVAL; ··· 948 943 949 944 trace_xfs_destroy_inode(ip); 950 945 951 - ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); 946 + ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 952 947 XFS_STATS_INC(ip->i_mount, vn_rele); 953 948 XFS_STATS_INC(ip->i_mount, vn_remove); 954 949 ··· 1243 1238 token = match_token(p, tokens, args); 1244 1239 switch (token) { 1245 1240 case Opt_barrier: 1241 + xfs_warn(mp, "%s option is deprecated, ignoring.", p); 1246 1242 mp->m_flags |= XFS_MOUNT_BARRIER; 1247 1243 break; 1248 1244 case Opt_nobarrier: 1245 + xfs_warn(mp, "%s option is deprecated, ignoring.", p); 1249 1246 mp->m_flags &= ~XFS_MOUNT_BARRIER; 1250 1247 break; 1251 1248 case Opt_inode64:

+3 -4

fs/xfs/xfs_symlink.c

··· 238 238 if (error) 239 239 goto out_release_inode; 240 240 241 - xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | 242 - XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); 241 + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 243 242 unlock_dp_on_error = true; 244 243 245 244 /* ··· 286 287 * the transaction cancel unlocking dp so don't do it explicitly in the 287 288 * error path. 288 289 */ 289 - xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 290 + xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 290 291 unlock_dp_on_error = false; 291 292 292 293 /* ··· 411 412 xfs_qm_dqrele(pdqp); 412 413 413 414 if (unlock_dp_on_error) 414 - xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 415 + xfs_iunlock(dp, XFS_ILOCK_EXCL); 415 416 return error; 416 417 } 417 418

-109

fs/xfs/xfs_trace.h

··· 355 355 DEFINE_BUF_EVENT(xfs_buf_iodone); 356 356 DEFINE_BUF_EVENT(xfs_buf_submit); 357 357 DEFINE_BUF_EVENT(xfs_buf_submit_wait); 358 - DEFINE_BUF_EVENT(xfs_buf_bawrite); 359 358 DEFINE_BUF_EVENT(xfs_buf_lock); 360 359 DEFINE_BUF_EVENT(xfs_buf_lock_done); 361 360 DEFINE_BUF_EVENT(xfs_buf_trylock_fail); ··· 366 367 DEFINE_BUF_EVENT(xfs_buf_delwri_queued); 367 368 DEFINE_BUF_EVENT(xfs_buf_delwri_split); 368 369 DEFINE_BUF_EVENT(xfs_buf_get_uncached); 369 - DEFINE_BUF_EVENT(xfs_bdstrat_shut); 370 370 DEFINE_BUF_EVENT(xfs_buf_item_relse); 371 371 DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); 372 372 DEFINE_BUF_EVENT(xfs_buf_error_relse); 373 373 DEFINE_BUF_EVENT(xfs_buf_wait_buftarg); 374 - DEFINE_BUF_EVENT(xfs_trans_read_buf_io); 375 374 DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); 376 375 377 376 /* not really buffer traces, but the buf provides useful information */ 378 377 DEFINE_BUF_EVENT(xfs_btree_corrupt); 379 - DEFINE_BUF_EVENT(xfs_da_btree_corrupt); 380 378 DEFINE_BUF_EVENT(xfs_reset_dqcounts); 381 - DEFINE_BUF_EVENT(xfs_inode_item_push); 382 379 383 380 /* pass flags explicitly */ 384 381 DECLARE_EVENT_CLASS(xfs_buf_flags_class, ··· 536 541 DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); 537 542 DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); 538 543 DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); 539 - DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered); 540 544 541 545 DECLARE_EVENT_CLASS(xfs_filestream_class, 542 546 TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), ··· 674 680 DEFINE_INODE_EVENT(xfs_dir_fsync); 675 681 DEFINE_INODE_EVENT(xfs_file_fsync); 676 682 DEFINE_INODE_EVENT(xfs_destroy_inode); 677 - DEFINE_INODE_EVENT(xfs_evict_inode); 678 683 DEFINE_INODE_EVENT(xfs_update_time); 679 684 680 685 DEFINE_INODE_EVENT(xfs_dquot_dqalloc); ··· 791 798 DEFINE_EVENT(xfs_iref_class, name, \ 792 799 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ 793 800 TP_ARGS(ip, caller_ip)) 794 - DEFINE_IREF_EVENT(xfs_ihold); 795 801 DEFINE_IREF_EVENT(xfs_irele); 796 802 DEFINE_IREF_EVENT(xfs_inode_pin); 797 803 DEFINE_IREF_EVENT(xfs_inode_unpin); ··· 931 939 DEFINE_DQUOT_EVENT(xfs_dqget_freeing); 932 940 DEFINE_DQUOT_EVENT(xfs_dqget_dup); 933 941 DEFINE_DQUOT_EVENT(xfs_dqput); 934 - DEFINE_DQUOT_EVENT(xfs_dqput_wait); 935 942 DEFINE_DQUOT_EVENT(xfs_dqput_free); 936 943 DEFINE_DQUOT_EVENT(xfs_dqrele); 937 944 DEFINE_DQUOT_EVENT(xfs_dqflush); ··· 1806 1815 DEFINE_ATTR_EVENT(xfs_attr_sf_create); 1807 1816 DEFINE_ATTR_EVENT(xfs_attr_sf_lookup); 1808 1817 DEFINE_ATTR_EVENT(xfs_attr_sf_remove); 1809 - DEFINE_ATTR_EVENT(xfs_attr_sf_removename); 1810 1818 DEFINE_ATTR_EVENT(xfs_attr_sf_to_leaf); 1811 1819 1812 1820 DEFINE_ATTR_EVENT(xfs_attr_leaf_add); ··· 1834 1844 1835 1845 DEFINE_ATTR_EVENT(xfs_attr_node_addname); 1836 1846 DEFINE_ATTR_EVENT(xfs_attr_node_get); 1837 - DEFINE_ATTR_EVENT(xfs_attr_node_lookup); 1838 1847 DEFINE_ATTR_EVENT(xfs_attr_node_replace); 1839 1848 DEFINE_ATTR_EVENT(xfs_attr_node_removename); 1840 1849 ··· 2429 2440 2430 2441 DEFINE_DEFER_ERROR_EVENT(xfs_defer_trans_roll_error); 2431 2442 DEFINE_DEFER_ERROR_EVENT(xfs_defer_finish_error); 2432 - DEFINE_DEFER_ERROR_EVENT(xfs_defer_op_finish_error); 2433 2443 2434 2444 DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_work); 2435 2445 DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_cancel); 2436 - DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_commit); 2437 2446 DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_cancel); 2438 2447 DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish); 2439 2448 DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort); ··· 3079 3092 struct xfs_inode *dest, xfs_off_t doffset), \ 3080 3093 TP_ARGS(src, soffset, len, dest, doffset)) 3081 3094 3082 - /* two-file vfs io tracepoint class */ 3083 - DECLARE_EVENT_CLASS(xfs_double_vfs_io_class, 3084 - TP_PROTO(struct inode *src, u64 soffset, u64 len, 3085 - struct inode *dest, u64 doffset), 3086 - TP_ARGS(src, soffset, len, dest, doffset), 3087 - TP_STRUCT__entry( 3088 - __field(dev_t, dev) 3089 - __field(unsigned long, src_ino) 3090 - __field(loff_t, src_isize) 3091 - __field(loff_t, src_offset) 3092 - __field(size_t, len) 3093 - __field(unsigned long, dest_ino) 3094 - __field(loff_t, dest_isize) 3095 - __field(loff_t, dest_offset) 3096 - ), 3097 - TP_fast_assign( 3098 - __entry->dev = src->i_sb->s_dev; 3099 - __entry->src_ino = src->i_ino; 3100 - __entry->src_isize = i_size_read(src); 3101 - __entry->src_offset = soffset; 3102 - __entry->len = len; 3103 - __entry->dest_ino = dest->i_ino; 3104 - __entry->dest_isize = i_size_read(dest); 3105 - __entry->dest_offset = doffset; 3106 - ), 3107 - TP_printk("dev %d:%d count %zd " 3108 - "ino 0x%lx isize 0x%llx offset 0x%llx -> " 3109 - "ino 0x%lx isize 0x%llx offset 0x%llx", 3110 - MAJOR(__entry->dev), MINOR(__entry->dev), 3111 - __entry->len, 3112 - __entry->src_ino, 3113 - __entry->src_isize, 3114 - __entry->src_offset, 3115 - __entry->dest_ino, 3116 - __entry->dest_isize, 3117 - __entry->dest_offset) 3118 - ) 3119 - 3120 - #define DEFINE_DOUBLE_VFS_IO_EVENT(name) \ 3121 - DEFINE_EVENT(xfs_double_vfs_io_class, name, \ 3122 - TP_PROTO(struct inode *src, u64 soffset, u64 len, \ 3123 - struct inode *dest, u64 doffset), \ 3124 - TP_ARGS(src, soffset, len, dest, doffset)) 3125 - 3126 - /* CoW write tracepoint */ 3127 - DECLARE_EVENT_CLASS(xfs_copy_on_write_class, 3128 - TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk, xfs_fsblock_t pblk, 3129 - xfs_extlen_t len, xfs_fsblock_t new_pblk), 3130 - TP_ARGS(ip, lblk, pblk, len, new_pblk), 3131 - TP_STRUCT__entry( 3132 - __field(dev_t, dev) 3133 - __field(xfs_ino_t, ino) 3134 - __field(xfs_fileoff_t, lblk) 3135 - __field(xfs_fsblock_t, pblk) 3136 - __field(xfs_extlen_t, len) 3137 - __field(xfs_fsblock_t, new_pblk) 3138 - ), 3139 - TP_fast_assign( 3140 - __entry->dev = VFS_I(ip)->i_sb->s_dev; 3141 - __entry->ino = ip->i_ino; 3142 - __entry->lblk = lblk; 3143 - __entry->pblk = pblk; 3144 - __entry->len = len; 3145 - __entry->new_pblk = new_pblk; 3146 - ), 3147 - TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx pblk 0x%llx " 3148 - "len 0x%x new_pblk %llu", 3149 - MAJOR(__entry->dev), MINOR(__entry->dev), 3150 - __entry->ino, 3151 - __entry->lblk, 3152 - __entry->pblk, 3153 - __entry->len, 3154 - __entry->new_pblk) 3155 - ) 3156 - 3157 - #define DEFINE_COW_EVENT(name) \ 3158 - DEFINE_EVENT(xfs_copy_on_write_class, name, \ 3159 - TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk, xfs_fsblock_t pblk, \ 3160 - xfs_extlen_t len, xfs_fsblock_t new_pblk), \ 3161 - TP_ARGS(ip, lblk, pblk, len, new_pblk)) 3162 - 3163 3095 /* inode/irec events */ 3164 3096 DECLARE_EVENT_CLASS(xfs_inode_irec_class, 3165 3097 TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec), ··· 3198 3292 DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_range_error); 3199 3293 DEFINE_INODE_ERROR_EVENT(xfs_reflink_set_inode_flag_error); 3200 3294 DEFINE_INODE_ERROR_EVENT(xfs_reflink_update_inode_size_error); 3201 - DEFINE_INODE_ERROR_EVENT(xfs_reflink_reflink_main_loop_error); 3202 - DEFINE_INODE_ERROR_EVENT(xfs_reflink_read_iomap_error); 3203 3295 DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_blocks_error); 3204 3296 DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_extent_error); 3205 3297 ··· 3206 3302 DEFINE_INODE_ERROR_EVENT(xfs_reflink_compare_extents_error); 3207 3303 3208 3304 /* ioctl tracepoints */ 3209 - DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_reflink); 3210 - DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_clone_range); 3211 - DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_file_extent_same); 3212 3305 TRACE_EVENT(xfs_ioctl_clone, 3213 3306 TP_PROTO(struct inode *src, struct inode *dest), 3214 3307 TP_ARGS(src, dest), ··· 3235 3334 3236 3335 /* unshare tracepoints */ 3237 3336 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_unshare); 3238 - DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cow_eof_block); 3239 - DEFINE_PAGE_EVENT(xfs_reflink_unshare_page); 3240 3337 DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error); 3241 - DEFINE_INODE_ERROR_EVENT(xfs_reflink_cow_eof_block_error); 3242 - DEFINE_INODE_ERROR_EVENT(xfs_reflink_dirty_page_error); 3243 3338 3244 3339 /* copy on write */ 3245 3340 DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); ··· 3258 3361 DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); 3259 3362 DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); 3260 3363 3261 - DEFINE_COW_EVENT(xfs_reflink_fork_buf); 3262 - DEFINE_COW_EVENT(xfs_reflink_finish_fork_buf); 3263 - DEFINE_INODE_ERROR_EVENT(xfs_reflink_fork_buf_error); 3264 - DEFINE_INODE_ERROR_EVENT(xfs_reflink_finish_fork_buf_error); 3265 3364 3266 - DEFINE_INODE_EVENT(xfs_reflink_cancel_pending_cow); 3267 3365 DEFINE_INODE_IREC_EVENT(xfs_reflink_cancel_cow); 3268 - DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_pending_cow_error); 3269 3366 3270 3367 /* rmap swapext tracepoints */ 3271 3368 DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap);

+10 -13

fs/xfs/xfs_xattr.c

··· 130 130 NULL 131 131 }; 132 132 133 - static int 133 + static void 134 134 __xfs_xattr_put_listent( 135 135 struct xfs_attr_list_context *context, 136 136 char *prefix, ··· 148 148 if (arraytop > context->firstu) { 149 149 context->count = -1; /* insufficient space */ 150 150 context->seen_enough = 1; 151 - return 0; 151 + return; 152 152 } 153 153 offset = (char *)context->alist + context->count; 154 154 strncpy(offset, prefix, prefix_len); ··· 159 159 160 160 compute_size: 161 161 context->count += prefix_len + namelen + 1; 162 - return 0; 162 + return; 163 163 } 164 164 165 - static int 165 + static void 166 166 xfs_xattr_put_listent( 167 167 struct xfs_attr_list_context *context, 168 168 int flags, ··· 180 180 if (namelen == SGI_ACL_FILE_SIZE && 181 181 strncmp(name, SGI_ACL_FILE, 182 182 SGI_ACL_FILE_SIZE) == 0) { 183 - int ret = __xfs_xattr_put_listent( 183 + __xfs_xattr_put_listent( 184 184 context, XATTR_SYSTEM_PREFIX, 185 185 XATTR_SYSTEM_PREFIX_LEN, 186 186 XATTR_POSIX_ACL_ACCESS, 187 187 strlen(XATTR_POSIX_ACL_ACCESS)); 188 - if (ret) 189 - return ret; 190 188 } else if (namelen == SGI_ACL_DEFAULT_SIZE && 191 189 strncmp(name, SGI_ACL_DEFAULT, 192 190 SGI_ACL_DEFAULT_SIZE) == 0) { 193 - int ret = __xfs_xattr_put_listent( 191 + __xfs_xattr_put_listent( 194 192 context, XATTR_SYSTEM_PREFIX, 195 193 XATTR_SYSTEM_PREFIX_LEN, 196 194 XATTR_POSIX_ACL_DEFAULT, 197 195 strlen(XATTR_POSIX_ACL_DEFAULT)); 198 - if (ret) 199 - return ret; 200 196 } 201 197 #endif 202 198 ··· 201 205 * see them. 202 206 */ 203 207 if (!capable(CAP_SYS_ADMIN)) 204 - return 0; 208 + return; 205 209 206 210 prefix = XATTR_TRUSTED_PREFIX; 207 211 prefix_len = XATTR_TRUSTED_PREFIX_LEN; ··· 213 217 prefix_len = XATTR_USER_PREFIX_LEN; 214 218 } 215 219 216 - return __xfs_xattr_put_listent(context, prefix, prefix_len, name, 217 - namelen); 220 + __xfs_xattr_put_listent(context, prefix, prefix_len, name, 221 + namelen); 222 + return; 218 223 } 219 224 220 225 ssize_t

+11

include/linux/iomap.h

··· 50 50 #define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */ 51 51 #define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */ 52 52 #define IOMAP_FAULT (1 << 3) /* mapping for page fault */ 53 + #define IOMAP_DIRECT (1 << 4) /* direct I/O */ 53 54 54 55 struct iomap_ops { 55 56 /* ··· 83 82 struct iomap_ops *ops); 84 83 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 85 84 loff_t start, loff_t len, struct iomap_ops *ops); 85 + 86 + /* 87 + * Flags for direct I/O ->end_io: 88 + */ 89 + #define IOMAP_DIO_UNWRITTEN (1 << 0) /* covers unwritten extent(s) */ 90 + #define IOMAP_DIO_COW (1 << 1) /* covers COW extent(s) */ 91 + typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret, 92 + unsigned flags); 93 + ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, 94 + struct iomap_ops *ops, iomap_dio_end_io_t end_io); 86 95 87 96 #endif /* LINUX_IOMAP_H */

+23 -2

include/linux/lockdep.h

··· 338 338 extern void lock_release(struct lockdep_map *lock, int nested, 339 339 unsigned long ip); 340 340 341 - #define lockdep_is_held(lock) lock_is_held(&(lock)->dep_map) 341 + /* 342 + * Same "read" as for lock_acquire(), except -1 means any. 343 + */ 344 + extern int lock_is_held_type(struct lockdep_map *lock, int read); 342 345 343 - extern int lock_is_held(struct lockdep_map *lock); 346 + static inline int lock_is_held(struct lockdep_map *lock) 347 + { 348 + return lock_is_held_type(lock, -1); 349 + } 350 + 351 + #define lockdep_is_held(lock) lock_is_held(&(lock)->dep_map) 352 + #define lockdep_is_held_type(lock, r) lock_is_held_type(&(lock)->dep_map, (r)) 344 353 345 354 extern void lock_set_class(struct lockdep_map *lock, const char *name, 346 355 struct lock_class_key *key, unsigned int subclass, ··· 379 370 380 371 #define lockdep_assert_held(l) do { \ 381 372 WARN_ON(debug_locks && !lockdep_is_held(l)); \ 373 + } while (0) 374 + 375 + #define lockdep_assert_held_exclusive(l) do { \ 376 + WARN_ON(debug_locks && !lockdep_is_held_type(l, 0)); \ 377 + } while (0) 378 + 379 + #define lockdep_assert_held_read(l) do { \ 380 + WARN_ON(debug_locks && !lockdep_is_held_type(l, 1)); \ 382 381 } while (0) 383 382 384 383 #define lockdep_assert_held_once(l) do { \ ··· 445 428 446 429 #define lockdep_depth(tsk) (0) 447 430 431 + #define lockdep_is_held_type(l, r) (1) 432 + 448 433 #define lockdep_assert_held(l) do { (void)(l); } while (0) 434 + #define lockdep_assert_held_exclusive(l) do { (void)(l); } while (0) 435 + #define lockdep_assert_held_read(l) do { (void)(l); } while (0) 449 436 #define lockdep_assert_held_once(l) do { (void)(l); } while (0) 450 437 451 438 #define lockdep_recursing(tsk) (0)

+12 -8

kernel/locking/lockdep.c

··· 3191 3191 return 0; 3192 3192 } 3193 3193 3194 - static int __lock_is_held(struct lockdep_map *lock); 3194 + static int __lock_is_held(struct lockdep_map *lock, int read); 3195 3195 3196 3196 /* 3197 3197 * This gets called for every mutex_lock*()/spin_lock*() operation. ··· 3332 3332 } 3333 3333 chain_key = iterate_chain_key(chain_key, class_idx); 3334 3334 3335 - if (nest_lock && !__lock_is_held(nest_lock)) 3335 + if (nest_lock && !__lock_is_held(nest_lock, -1)) 3336 3336 return print_lock_nested_lock_not_held(curr, hlock, ip); 3337 3337 3338 3338 if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) ··· 3579 3579 return 1; 3580 3580 } 3581 3581 3582 - static int __lock_is_held(struct lockdep_map *lock) 3582 + static int __lock_is_held(struct lockdep_map *lock, int read) 3583 3583 { 3584 3584 struct task_struct *curr = current; 3585 3585 int i; ··· 3587 3587 for (i = 0; i < curr->lockdep_depth; i++) { 3588 3588 struct held_lock *hlock = curr->held_locks + i; 3589 3589 3590 - if (match_held_lock(hlock, lock)) 3591 - return 1; 3590 + if (match_held_lock(hlock, lock)) { 3591 + if (read == -1 || hlock->read == read) 3592 + return 1; 3593 + 3594 + return 0; 3595 + } 3592 3596 } 3593 3597 3594 3598 return 0; ··· 3776 3772 } 3777 3773 EXPORT_SYMBOL_GPL(lock_release); 3778 3774 3779 - int lock_is_held(struct lockdep_map *lock) 3775 + int lock_is_held_type(struct lockdep_map *lock, int read) 3780 3776 { 3781 3777 unsigned long flags; 3782 3778 int ret = 0; ··· 3788 3784 check_flags(flags); 3789 3785 3790 3786 current->lockdep_recursion = 1; 3791 - ret = __lock_is_held(lock); 3787 + ret = __lock_is_held(lock, read); 3792 3788 current->lockdep_recursion = 0; 3793 3789 raw_local_irq_restore(flags); 3794 3790 3795 3791 return ret; 3796 3792 } 3797 - EXPORT_SYMBOL_GPL(lock_is_held); 3793 + EXPORT_SYMBOL_GPL(lock_is_held_type); 3798 3794 3799 3795 struct pin_cookie lock_pin_lock(struct lockdep_map *lock) 3800 3796 {