Merge tag 'xfs-fixes-6.14-rc2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull xfs bug fixes from Carlos Maiolino:
"A few fixes for XFS, but the most notable one is:

- xfs: remove xfs_buf_cache.bc_lock

which has been hit by different persons including syzbot"

* tag 'xfs-fixes-6.14-rc2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: remove xfs_buf_cache.bc_lock
xfs: Add error handling for xfs_reflink_cancel_cow_range
xfs: Propagate errors from xfs_reflink_cancel_cow_range in xfs_dax_write_iomap_end
xfs: don't call remap_verify_area with sb write protection held
xfs: remove an out of data comment in _xfs_buf_alloc
xfs: fix the entry condition of exact EOF block allocation optimization

Linus Torvalds 1 year ago 0a08238a 2014c95a

+58 -76

6 changed files

expand all

xfs

libxfs

xfs_bmap.c

xfs_buf.c

xfs_buf.h

xfs_exchrange.c

xfs_inode.c

xfs_iomap.c

+7 -6

fs/xfs/libxfs/xfs_bmap.c

··· 3563 3563 int error; 3564 3564 3565 3565 /* 3566 - * If there are already extents in the file, try an exact EOF block 3567 - * allocation to extend the file as a contiguous extent. If that fails, 3568 - * or it's the first allocation in a file, just try for a stripe aligned 3569 - * allocation. 3566 + * If there are already extents in the file, and xfs_bmap_adjacent() has 3567 + * given a better blkno, try an exact EOF block allocation to extend the 3568 + * file as a contiguous extent. If that fails, or it's the first 3569 + * allocation in a file, just try for a stripe aligned allocation. 3570 3570 */ 3571 - if (ap->offset) { 3571 + if (ap->eof) { 3572 3572 xfs_extlen_t nextminlen = 0; 3573 3573 3574 3574 /* ··· 3736 3736 int error; 3737 3737 3738 3738 ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino); 3739 - xfs_bmap_adjacent(ap); 3739 + if (!xfs_bmap_adjacent(ap)) 3740 + ap->eof = false; 3740 3741 3741 3742 /* 3742 3743 * Search for an allocation group with a single extent large enough for

+17 -19

fs/xfs/xfs_buf.c

··· 41 41 * 42 42 * xfs_buf_rele: 43 43 * b_lock 44 - * pag_buf_lock 45 - * lru_lock 44 + * lru_lock 46 45 * 47 46 * xfs_buftarg_drain_rele 48 47 * lru_lock ··· 219 220 */ 220 221 flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD); 221 222 222 - spin_lock_init(&bp->b_lock); 223 + /* 224 + * A new buffer is held and locked by the owner. This ensures that the 225 + * buffer is owned by the caller and racing RCU lookups right after 226 + * inserting into the hash table are safe (and will have to wait for 227 + * the unlock to do anything non-trivial). 228 + */ 223 229 bp->b_hold = 1; 230 + sema_init(&bp->b_sema, 0); /* held, no waiters */ 231 + 232 + spin_lock_init(&bp->b_lock); 224 233 atomic_set(&bp->b_lru_ref, 1); 225 234 init_completion(&bp->b_iowait); 226 235 INIT_LIST_HEAD(&bp->b_lru); 227 236 INIT_LIST_HEAD(&bp->b_list); 228 237 INIT_LIST_HEAD(&bp->b_li_list); 229 - sema_init(&bp->b_sema, 0); /* held, no waiters */ 230 238 bp->b_target = target; 231 239 bp->b_mount = target->bt_mount; 232 240 bp->b_flags = flags; 233 241 234 - /* 235 - * Set length and io_length to the same value initially. 236 - * I/O routines should use io_length, which will be the same in 237 - * most cases but may be reset (e.g. XFS recovery). 238 - */ 239 242 error = xfs_buf_get_maps(bp, nmaps); 240 243 if (error) { 241 244 kmem_cache_free(xfs_buf_cache, bp); ··· 503 502 xfs_buf_cache_init( 504 503 struct xfs_buf_cache *bch) 505 504 { 506 - spin_lock_init(&bch->bc_lock); 507 505 return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params); 508 506 } 509 507 ··· 652 652 if (error) 653 653 goto out_free_buf; 654 654 655 - spin_lock(&bch->bc_lock); 655 + /* The new buffer keeps the perag reference until it is freed. */ 656 + new_bp->b_pag = pag; 657 + 658 + rcu_read_lock(); 656 659 bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash, 657 660 &new_bp->b_rhash_head, xfs_buf_hash_params); 658 661 if (IS_ERR(bp)) { 662 + rcu_read_unlock(); 659 663 error = PTR_ERR(bp); 660 - spin_unlock(&bch->bc_lock); 661 664 goto out_free_buf; 662 665 } 663 666 if (bp && xfs_buf_try_hold(bp)) { 664 667 /* found an existing buffer */ 665 - spin_unlock(&bch->bc_lock); 668 + rcu_read_unlock(); 666 669 error = xfs_buf_find_lock(bp, flags); 667 670 if (error) 668 671 xfs_buf_rele(bp); ··· 673 670 *bpp = bp; 674 671 goto out_free_buf; 675 672 } 673 + rcu_read_unlock(); 676 674 677 - /* The new buffer keeps the perag reference until it is freed. */ 678 - new_bp->b_pag = pag; 679 - spin_unlock(&bch->bc_lock); 680 675 *bpp = new_bp; 681 676 return 0; 682 677 ··· 1091 1090 } 1092 1091 1093 1092 /* we are asked to drop the last reference */ 1094 - spin_lock(&bch->bc_lock); 1095 1093 __xfs_buf_ioacct_dec(bp); 1096 1094 if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { 1097 1095 /* ··· 1102 1102 bp->b_state &= ~XFS_BSTATE_DISPOSE; 1103 1103 else 1104 1104 bp->b_hold--; 1105 - spin_unlock(&bch->bc_lock); 1106 1105 } else { 1107 1106 bp->b_hold--; 1108 1107 /* ··· 1119 1120 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 1120 1121 rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head, 1121 1122 xfs_buf_hash_params); 1122 - spin_unlock(&bch->bc_lock); 1123 1123 if (pag) 1124 1124 xfs_perag_put(pag); 1125 1125 freebuf = true;

-1

fs/xfs/xfs_buf.h

··· 80 80 #define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ 81 81 82 82 struct xfs_buf_cache { 83 - spinlock_t bc_lock; 84 83 struct rhashtable bc_hash; 85 84 }; 86 85

+27 -44

fs/xfs/xfs_exchrange.c

··· 329 329 * successfully but before locks are dropped. 330 330 */ 331 331 332 - /* Verify that we have security clearance to perform this operation. */ 333 - static int 334 - xfs_exchange_range_verify_area( 335 - struct xfs_exchrange *fxr) 336 - { 337 - int ret; 338 - 339 - ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length, 340 - true); 341 - if (ret) 342 - return ret; 343 - 344 - return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length, 345 - true); 346 - } 347 - 348 332 /* 349 333 * Performs necessary checks before doing a range exchange, having stabilized 350 334 * mutable inode attributes via i_rwsem. ··· 339 355 unsigned int alloc_unit) 340 356 { 341 357 struct inode *inode1 = file_inode(fxr->file1); 358 + loff_t size1 = i_size_read(inode1); 342 359 struct inode *inode2 = file_inode(fxr->file2); 360 + loff_t size2 = i_size_read(inode2); 343 361 uint64_t allocmask = alloc_unit - 1; 344 362 int64_t test_len; 345 363 uint64_t blen; 346 - loff_t size1, size2, tmp; 364 + loff_t tmp; 347 365 int error; 348 366 349 367 /* Don't touch certain kinds of inodes */ ··· 354 368 if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2)) 355 369 return -ETXTBSY; 356 370 357 - size1 = i_size_read(inode1); 358 - size2 = i_size_read(inode2); 359 - 360 371 /* Ranges cannot start after EOF. */ 361 372 if (fxr->file1_offset > size1 || fxr->file2_offset > size2) 362 373 return -EINVAL; 363 374 364 - /* 365 - * If the caller said to exchange to EOF, we set the length of the 366 - * request large enough to cover everything to the end of both files. 367 - */ 368 375 if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { 376 + /* 377 + * If the caller said to exchange to EOF, we set the length of 378 + * the request large enough to cover everything to the end of 379 + * both files. 380 + */ 369 381 fxr->length = max_t(int64_t, size1 - fxr->file1_offset, 370 382 size2 - fxr->file2_offset); 371 - 372 - error = xfs_exchange_range_verify_area(fxr); 373 - if (error) 374 - return error; 383 + } else { 384 + /* 385 + * Otherwise we require both ranges to end within EOF. 386 + */ 387 + if (fxr->file1_offset + fxr->length > size1 || 388 + fxr->file2_offset + fxr->length > size2) 389 + return -EINVAL; 375 390 } 376 391 377 392 /* ··· 386 399 /* Ensure offsets don't wrap. */ 387 400 if (check_add_overflow(fxr->file1_offset, fxr->length, &tmp) || 388 401 check_add_overflow(fxr->file2_offset, fxr->length, &tmp)) 389 - return -EINVAL; 390 - 391 - /* 392 - * We require both ranges to end within EOF, unless we're exchanging 393 - * to EOF. 394 - */ 395 - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) && 396 - (fxr->file1_offset + fxr->length > size1 || 397 - fxr->file2_offset + fxr->length > size2)) 398 402 return -EINVAL; 399 403 400 404 /* ··· 725 747 { 726 748 struct inode *inode1 = file_inode(fxr->file1); 727 749 struct inode *inode2 = file_inode(fxr->file2); 750 + loff_t check_len = fxr->length; 728 751 int ret; 729 752 730 753 BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS & ··· 758 779 return -EBADF; 759 780 760 781 /* 761 - * If we're not exchanging to EOF, we can check the areas before 762 - * stabilizing both files' i_size. 782 + * If we're exchanging to EOF we can't calculate the length until taking 783 + * the iolock. Pass a 0 length to remap_verify_area similar to the 784 + * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well. 763 785 */ 764 - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) { 765 - ret = xfs_exchange_range_verify_area(fxr); 766 - if (ret) 767 - return ret; 768 - } 786 + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) 787 + check_len = 0; 788 + ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true); 789 + if (ret) 790 + return ret; 791 + ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true); 792 + if (ret) 793 + return ret; 769 794 770 795 /* Update cmtime if the fd/inode don't forbid it. */ 771 796 if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1))

+5 -2

fs/xfs/xfs_inode.c

··· 1404 1404 goto out; 1405 1405 1406 1406 /* Try to clean out the cow blocks if there are any. */ 1407 - if (xfs_inode_has_cow_data(ip)) 1408 - xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); 1407 + if (xfs_inode_has_cow_data(ip)) { 1408 + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); 1409 + if (error) 1410 + goto out; 1411 + } 1409 1412 1410 1413 if (VFS_I(ip)->i_nlink != 0) { 1411 1414 /*

+2 -4

fs/xfs/xfs_iomap.c

··· 976 976 if (!xfs_is_cow_inode(ip)) 977 977 return 0; 978 978 979 - if (!written) { 980 - xfs_reflink_cancel_cow_range(ip, pos, length, true); 981 - return 0; 982 - } 979 + if (!written) 980 + return xfs_reflink_cancel_cow_range(ip, pos, length, true); 983 981 984 982 return xfs_reflink_end_cow(ip, pos, written); 985 983 }