Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'fallocate-insert-range' into for-next

+497 -92
+6 -2
fs/open.c
··· 231 231 return -EINVAL; 232 232 233 233 /* Return error if mode is not supported */ 234 - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 235 - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) 234 + if (mode & ~FALLOC_FL_SUPPORTED_MASK) 236 235 return -EOPNOTSUPP; 237 236 238 237 /* Punch hole and zero range are mutually exclusive */ ··· 247 248 /* Collapse range should only be used exclusively. */ 248 249 if ((mode & FALLOC_FL_COLLAPSE_RANGE) && 249 250 (mode & ~FALLOC_FL_COLLAPSE_RANGE)) 251 + return -EINVAL; 252 + 253 + /* Insert range should only be used exclusively. */ 254 + if ((mode & FALLOC_FL_INSERT_RANGE) && 255 + (mode & ~FALLOC_FL_INSERT_RANGE)) 250 256 return -EINVAL; 251 257 252 258 if (!(file->f_mode & FMODE_WRITE))
+320 -50
fs/xfs/libxfs/xfs_bmap.c
··· 5486 5486 int *current_ext, 5487 5487 struct xfs_bmbt_rec_host *gotp, 5488 5488 struct xfs_btree_cur *cur, 5489 - int *logflags) 5489 + int *logflags, 5490 + enum shift_direction direction) 5490 5491 { 5491 5492 struct xfs_ifork *ifp; 5492 5493 struct xfs_mount *mp; 5493 5494 xfs_fileoff_t startoff; 5494 - struct xfs_bmbt_rec_host *leftp; 5495 + struct xfs_bmbt_rec_host *adj_irecp; 5495 5496 struct xfs_bmbt_irec got; 5496 - struct xfs_bmbt_irec left; 5497 + struct xfs_bmbt_irec adj_irec; 5497 5498 int error; 5498 5499 int i; 5500 + int total_extents; 5499 5501 5500 5502 mp = ip->i_mount; 5501 5503 ifp = XFS_IFORK_PTR(ip, whichfork); 5504 + total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5502 5505 5503 5506 xfs_bmbt_get_all(gotp, &got); 5504 - startoff = got.br_startoff - offset_shift_fsb; 5505 5507 5506 5508 /* delalloc extents should be prevented by caller */ 5507 5509 XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); 5508 5510 5509 - /* 5510 - * Check for merge if we've got an extent to the left, otherwise make 5511 - * sure there's enough room at the start of the file for the shift. 5512 - */ 5513 - if (*current_ext) { 5514 - /* grab the left extent and check for a large enough hole */ 5515 - leftp = xfs_iext_get_ext(ifp, *current_ext - 1); 5516 - xfs_bmbt_get_all(leftp, &left); 5511 + if (direction == SHIFT_LEFT) { 5512 + startoff = got.br_startoff - offset_shift_fsb; 5517 5513 5518 - if (startoff < left.br_startoff + left.br_blockcount) 5514 + /* 5515 + * Check for merge if we've got an extent to the left, 5516 + * otherwise make sure there's enough room at the start 5517 + * of the file for the shift. 5518 + */ 5519 + if (!*current_ext) { 5520 + if (got.br_startoff < offset_shift_fsb) 5521 + return -EINVAL; 5522 + goto update_current_ext; 5523 + } 5524 + /* 5525 + * grab the left extent and check for a large 5526 + * enough hole. 5527 + */ 5528 + adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1); 5529 + xfs_bmbt_get_all(adj_irecp, &adj_irec); 5530 + 5531 + if (startoff < 5532 + adj_irec.br_startoff + adj_irec.br_blockcount) 5519 5533 return -EINVAL; 5520 5534 5521 5535 /* check whether to merge the extent or shift it down */ 5522 - if (xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) { 5536 + if (xfs_bmse_can_merge(&adj_irec, &got, 5537 + offset_shift_fsb)) { 5523 5538 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, 5524 - *current_ext, gotp, leftp, cur, 5525 - logflags); 5539 + *current_ext, gotp, adj_irecp, 5540 + cur, logflags); 5526 5541 } 5527 - } else if (got.br_startoff < offset_shift_fsb) 5528 - return -EINVAL; 5529 - 5542 + } else { 5543 + startoff = got.br_startoff + offset_shift_fsb; 5544 + /* nothing to move if this is the last extent */ 5545 + if (*current_ext >= (total_extents - 1)) 5546 + goto update_current_ext; 5547 + /* 5548 + * If this is not the last extent in the file, make sure there 5549 + * is enough room between current extent and next extent for 5550 + * accommodating the shift. 5551 + */ 5552 + adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1); 5553 + xfs_bmbt_get_all(adj_irecp, &adj_irec); 5554 + if (startoff + got.br_blockcount > adj_irec.br_startoff) 5555 + return -EINVAL; 5556 + /* 5557 + * Unlike a left shift (which involves a hole punch), 5558 + * a right shift does not modify extent neighbors 5559 + * in any way. We should never find mergeable extents 5560 + * in this scenario. Check anyways and warn if we 5561 + * encounter two extents that could be one. 5562 + */ 5563 + if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb)) 5564 + WARN_ON_ONCE(1); 5565 + } 5530 5566 /* 5531 5567 * Increment the extent index for the next iteration, update the start 5532 5568 * offset of the in-core extent and update the btree if applicable. 5533 5569 */ 5534 - (*current_ext)++; 5570 + update_current_ext: 5571 + if (direction == SHIFT_LEFT) 5572 + (*current_ext)++; 5573 + else 5574 + (*current_ext)--; 5535 5575 xfs_bmbt_set_startoff(gotp, startoff); 5536 5576 *logflags |= XFS_ILOG_CORE; 5537 5577 if (!cur) { ··· 5587 5547 5588 5548 got.br_startoff = startoff; 5589 5549 return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, 5590 - got.br_blockcount, got.br_state); 5550 + got.br_blockcount, got.br_state); 5591 5551 } 5592 5552 5593 5553 /* 5594 - * Shift extent records to the left to cover a hole. 5554 + * Shift extent records to the left/right to cover/create a hole. 5595 5555 * 5596 5556 * The maximum number of extents to be shifted in a single operation is 5597 - * @num_exts. @start_fsb specifies the file offset to start the shift and the 5557 + * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the 5598 5558 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb 5599 5559 * is the length by which each extent is shifted. If there is no hole to shift 5600 5560 * the extents into, this will be considered invalid operation and we abort ··· 5604 5564 xfs_bmap_shift_extents( 5605 5565 struct xfs_trans *tp, 5606 5566 struct xfs_inode *ip, 5607 - xfs_fileoff_t start_fsb, 5567 + xfs_fileoff_t *next_fsb, 5608 5568 xfs_fileoff_t offset_shift_fsb, 5609 5569 int *done, 5610 - xfs_fileoff_t *next_fsb, 5570 + xfs_fileoff_t stop_fsb, 5611 5571 xfs_fsblock_t *firstblock, 5612 5572 struct xfs_bmap_free *flist, 5573 + enum shift_direction direction, 5613 5574 int num_exts) 5614 5575 { 5615 5576 struct xfs_btree_cur *cur = NULL; ··· 5620 5579 struct xfs_ifork *ifp; 5621 5580 xfs_extnum_t nexts = 0; 5622 5581 xfs_extnum_t current_ext; 5582 + xfs_extnum_t total_extents; 5583 + xfs_extnum_t stop_extent; 5623 5584 int error = 0; 5624 5585 int whichfork = XFS_DATA_FORK; 5625 5586 int logflags = 0; 5626 - int total_extents; 5627 5587 5628 5588 if (unlikely(XFS_TEST_ERROR( 5629 5589 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && ··· 5640 5598 5641 5599 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 5642 5600 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 5601 + ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); 5602 + ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT); 5643 5603 5644 5604 ifp = XFS_IFORK_PTR(ip, whichfork); 5645 5605 if (!(ifp->if_flags & XFS_IFEXTENTS)) { ··· 5659 5615 } 5660 5616 5661 5617 /* 5662 - * Look up the extent index for the fsb where we start shifting. We can 5663 - * henceforth iterate with current_ext as extent list changes are locked 5664 - * out via ilock. 5665 - * 5666 - * gotp can be null in 2 cases: 1) if there are no extents or 2) 5667 - * start_fsb lies in a hole beyond which there are no extents. Either 5668 - * way, we are done. 5669 - */ 5670 - gotp = xfs_iext_bno_to_ext(ifp, start_fsb, &current_ext); 5671 - if (!gotp) { 5672 - *done = 1; 5673 - goto del_cursor; 5674 - } 5675 - 5676 - /* 5677 5618 * There may be delalloc extents in the data fork before the range we 5678 5619 * are collapsing out, so we cannot use the count of real extents here. 5679 5620 * Instead we have to calculate it from the incore fork. 5680 5621 */ 5681 5622 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5682 - while (nexts++ < num_exts && current_ext < total_extents) { 5623 + if (total_extents == 0) { 5624 + *done = 1; 5625 + goto del_cursor; 5626 + } 5627 + 5628 + /* 5629 + * In case of first right shift, we need to initialize next_fsb 5630 + */ 5631 + if (*next_fsb == NULLFSBLOCK) { 5632 + gotp = xfs_iext_get_ext(ifp, total_extents - 1); 5633 + xfs_bmbt_get_all(gotp, &got); 5634 + *next_fsb = got.br_startoff; 5635 + if (stop_fsb > *next_fsb) { 5636 + *done = 1; 5637 + goto del_cursor; 5638 + } 5639 + } 5640 + 5641 + /* Lookup the extent index at which we have to stop */ 5642 + if (direction == SHIFT_RIGHT) { 5643 + gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent); 5644 + /* Make stop_extent exclusive of shift range */ 5645 + stop_extent--; 5646 + } else 5647 + stop_extent = total_extents; 5648 + 5649 + /* 5650 + * Look up the extent index for the fsb where we start shifting. We can 5651 + * henceforth iterate with current_ext as extent list changes are locked 5652 + * out via ilock. 5653 + * 5654 + * gotp can be null in 2 cases: 1) if there are no extents or 2) 5655 + * *next_fsb lies in a hole beyond which there are no extents. Either 5656 + * way, we are done. 5657 + */ 5658 + gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, &current_ext); 5659 + if (!gotp) { 5660 + *done = 1; 5661 + goto del_cursor; 5662 + } 5663 + 5664 + /* some sanity checking before we finally start shifting extents */ 5665 + if ((direction == SHIFT_LEFT && current_ext >= stop_extent) || 5666 + (direction == SHIFT_RIGHT && current_ext <= stop_extent)) { 5667 + error = -EIO; 5668 + goto del_cursor; 5669 + } 5670 + 5671 + while (nexts++ < num_exts) { 5683 5672 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, 5684 - &current_ext, gotp, cur, &logflags); 5673 + &current_ext, gotp, cur, &logflags, 5674 + direction); 5685 5675 if (error) 5686 5676 goto del_cursor; 5677 + /* 5678 + * If there was an extent merge during the shift, the extent 5679 + * count can change. Update the total and grade the next record. 5680 + */ 5681 + if (direction == SHIFT_LEFT) { 5682 + total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5683 + stop_extent = total_extents; 5684 + } 5687 5685 5688 - /* update total extent count and grab the next record */ 5689 - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5690 - if (current_ext >= total_extents) 5686 + if (current_ext == stop_extent) { 5687 + *done = 1; 5688 + *next_fsb = NULLFSBLOCK; 5691 5689 break; 5690 + } 5692 5691 gotp = xfs_iext_get_ext(ifp, current_ext); 5693 5692 } 5694 5693 5695 - /* Check if we are done */ 5696 - if (current_ext == total_extents) { 5697 - *done = 1; 5698 - } else if (next_fsb) { 5694 + if (!*done) { 5699 5695 xfs_bmbt_get_all(gotp, &got); 5700 5696 *next_fsb = got.br_startoff; 5701 5697 } ··· 5748 5664 if (logflags) 5749 5665 xfs_trans_log_inode(tp, ip, logflags); 5750 5666 5667 + return error; 5668 + } 5669 + 5670 + /* 5671 + * Splits an extent into two extents at split_fsb block such that it is 5672 + * the first block of the current_ext. @current_ext is a target extent 5673 + * to be split. @split_fsb is a block where the extents is split. 5674 + * If split_fsb lies in a hole or the first block of extents, just return 0. 5675 + */ 5676 + STATIC int 5677 + xfs_bmap_split_extent_at( 5678 + struct xfs_trans *tp, 5679 + struct xfs_inode *ip, 5680 + xfs_fileoff_t split_fsb, 5681 + xfs_fsblock_t *firstfsb, 5682 + struct xfs_bmap_free *free_list) 5683 + { 5684 + int whichfork = XFS_DATA_FORK; 5685 + struct xfs_btree_cur *cur = NULL; 5686 + struct xfs_bmbt_rec_host *gotp; 5687 + struct xfs_bmbt_irec got; 5688 + struct xfs_bmbt_irec new; /* split extent */ 5689 + struct xfs_mount *mp = ip->i_mount; 5690 + struct xfs_ifork *ifp; 5691 + xfs_fsblock_t gotblkcnt; /* new block count for got */ 5692 + xfs_extnum_t current_ext; 5693 + int error = 0; 5694 + int logflags = 0; 5695 + int i = 0; 5696 + 5697 + if (unlikely(XFS_TEST_ERROR( 5698 + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5699 + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 5700 + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { 5701 + XFS_ERROR_REPORT("xfs_bmap_split_extent_at", 5702 + XFS_ERRLEVEL_LOW, mp); 5703 + return -EFSCORRUPTED; 5704 + } 5705 + 5706 + if (XFS_FORCED_SHUTDOWN(mp)) 5707 + return -EIO; 5708 + 5709 + ifp = XFS_IFORK_PTR(ip, whichfork); 5710 + if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5711 + /* Read in all the extents */ 5712 + error = xfs_iread_extents(tp, ip, whichfork); 5713 + if (error) 5714 + return error; 5715 + } 5716 + 5717 + /* 5718 + * gotp can be null in 2 cases: 1) if there are no extents 5719 + * or 2) split_fsb lies in a hole beyond which there are 5720 + * no extents. Either way, we are done. 5721 + */ 5722 + gotp = xfs_iext_bno_to_ext(ifp, split_fsb, &current_ext); 5723 + if (!gotp) 5724 + return 0; 5725 + 5726 + xfs_bmbt_get_all(gotp, &got); 5727 + 5728 + /* 5729 + * Check split_fsb lies in a hole or the start boundary offset 5730 + * of the extent. 5731 + */ 5732 + if (got.br_startoff >= split_fsb) 5733 + return 0; 5734 + 5735 + gotblkcnt = split_fsb - got.br_startoff; 5736 + new.br_startoff = split_fsb; 5737 + new.br_startblock = got.br_startblock + gotblkcnt; 5738 + new.br_blockcount = got.br_blockcount - gotblkcnt; 5739 + new.br_state = got.br_state; 5740 + 5741 + if (ifp->if_flags & XFS_IFBROOT) { 5742 + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5743 + cur->bc_private.b.firstblock = *firstfsb; 5744 + cur->bc_private.b.flist = free_list; 5745 + cur->bc_private.b.flags = 0; 5746 + error = xfs_bmbt_lookup_eq(cur, got.br_startoff, 5747 + got.br_startblock, 5748 + got.br_blockcount, 5749 + &i); 5750 + if (error) 5751 + goto del_cursor; 5752 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor); 5753 + } 5754 + 5755 + xfs_bmbt_set_blockcount(gotp, gotblkcnt); 5756 + got.br_blockcount = gotblkcnt; 5757 + 5758 + logflags = XFS_ILOG_CORE; 5759 + if (cur) { 5760 + error = xfs_bmbt_update(cur, got.br_startoff, 5761 + got.br_startblock, 5762 + got.br_blockcount, 5763 + got.br_state); 5764 + if (error) 5765 + goto del_cursor; 5766 + } else 5767 + logflags |= XFS_ILOG_DEXT; 5768 + 5769 + /* Add new extent */ 5770 + current_ext++; 5771 + xfs_iext_insert(ip, current_ext, 1, &new, 0); 5772 + XFS_IFORK_NEXT_SET(ip, whichfork, 5773 + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 5774 + 5775 + if (cur) { 5776 + error = xfs_bmbt_lookup_eq(cur, new.br_startoff, 5777 + new.br_startblock, new.br_blockcount, 5778 + &i); 5779 + if (error) 5780 + goto del_cursor; 5781 + XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor); 5782 + cur->bc_rec.b.br_state = new.br_state; 5783 + 5784 + error = xfs_btree_insert(cur, &i); 5785 + if (error) 5786 + goto del_cursor; 5787 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor); 5788 + } 5789 + 5790 + /* 5791 + * Convert to a btree if necessary. 5792 + */ 5793 + if (xfs_bmap_needs_btree(ip, whichfork)) { 5794 + int tmp_logflags; /* partial log flag return val */ 5795 + 5796 + ASSERT(cur == NULL); 5797 + error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list, 5798 + &cur, 0, &tmp_logflags, whichfork); 5799 + logflags |= tmp_logflags; 5800 + } 5801 + 5802 + del_cursor: 5803 + if (cur) { 5804 + cur->bc_private.b.allocated = 0; 5805 + xfs_btree_del_cursor(cur, 5806 + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5807 + } 5808 + 5809 + if (logflags) 5810 + xfs_trans_log_inode(tp, ip, logflags); 5811 + return error; 5812 + } 5813 + 5814 + int 5815 + xfs_bmap_split_extent( 5816 + struct xfs_inode *ip, 5817 + xfs_fileoff_t split_fsb) 5818 + { 5819 + struct xfs_mount *mp = ip->i_mount; 5820 + struct xfs_trans *tp; 5821 + struct xfs_bmap_free free_list; 5822 + xfs_fsblock_t firstfsb; 5823 + int committed; 5824 + int error; 5825 + 5826 + tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 5827 + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 5828 + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); 5829 + if (error) { 5830 + xfs_trans_cancel(tp, 0); 5831 + return error; 5832 + } 5833 + 5834 + xfs_ilock(ip, XFS_ILOCK_EXCL); 5835 + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 5836 + 5837 + xfs_bmap_init(&free_list, &firstfsb); 5838 + 5839 + error = xfs_bmap_split_extent_at(tp, ip, split_fsb, 5840 + &firstfsb, &free_list); 5841 + if (error) 5842 + goto out; 5843 + 5844 + error = xfs_bmap_finish(&tp, &free_list, &committed); 5845 + if (error) 5846 + goto out; 5847 + 5848 + return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 5849 + 5850 + 5851 + out: 5852 + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 5751 5853 return error; 5752 5854 }
+10 -3
fs/xfs/libxfs/xfs_bmap.h
··· 166 166 */ 167 167 #define XFS_BMAP_MAX_SHIFT_EXTENTS 1 168 168 169 + enum shift_direction { 170 + SHIFT_LEFT = 0, 171 + SHIFT_RIGHT, 172 + }; 173 + 169 174 #ifdef DEBUG 170 175 void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, 171 176 int whichfork, unsigned long caller_ip); ··· 216 211 xfs_extnum_t num); 217 212 uint xfs_default_attroffset(struct xfs_inode *ip); 218 213 int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, 219 - xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb, 220 - int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock, 221 - struct xfs_bmap_free *flist, int num_exts); 214 + xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, 215 + int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, 216 + struct xfs_bmap_free *flist, enum shift_direction direction, 217 + int num_exts); 218 + int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); 222 219 223 220 #endif /* __XFS_BMAP_H__ */
+96 -35
fs/xfs/xfs_bmap_util.c
··· 1376 1376 } 1377 1377 1378 1378 /* 1379 - * xfs_collapse_file_space() 1380 - * This routine frees disk space and shift extent for the given file. 1381 - * The first thing we do is to free data blocks in the specified range 1382 - * by calling xfs_free_file_space(). It would also sync dirty data 1383 - * and invalidate page cache over the region on which collapse range 1384 - * is working. And Shift extent records to the left to cover a hole. 1385 - * RETURNS: 1386 - * 0 on success 1387 - * errno on error 1388 - * 1379 + * @next_fsb will keep track of the extent currently undergoing shift. 1380 + * @stop_fsb will keep track of the extent at which we have to stop. 1381 + * If we are shifting left, we will start with block (offset + len) and 1382 + * shift each extent till last extent. 1383 + * If we are shifting right, we will start with last extent inside file space 1384 + * and continue until we reach the block corresponding to offset. 1389 1385 */ 1390 1386 int 1391 - xfs_collapse_file_space( 1392 - struct xfs_inode *ip, 1393 - xfs_off_t offset, 1394 - xfs_off_t len) 1387 + xfs_shift_file_space( 1388 + struct xfs_inode *ip, 1389 + xfs_off_t offset, 1390 + xfs_off_t len, 1391 + enum shift_direction direction) 1395 1392 { 1396 1393 int done = 0; 1397 1394 struct xfs_mount *mp = ip->i_mount; ··· 1397 1400 struct xfs_bmap_free free_list; 1398 1401 xfs_fsblock_t first_block; 1399 1402 int committed; 1400 - xfs_fileoff_t start_fsb; 1403 + xfs_fileoff_t stop_fsb; 1401 1404 xfs_fileoff_t next_fsb; 1402 1405 xfs_fileoff_t shift_fsb; 1403 1406 1404 - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1407 + ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); 1405 1408 1406 - trace_xfs_collapse_file_space(ip); 1409 + if (direction == SHIFT_LEFT) { 1410 + next_fsb = XFS_B_TO_FSB(mp, offset + len); 1411 + stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); 1412 + } else { 1413 + /* 1414 + * If right shift, delegate the work of initialization of 1415 + * next_fsb to xfs_bmap_shift_extent as it has ilock held. 1416 + */ 1417 + next_fsb = NULLFSBLOCK; 1418 + stop_fsb = XFS_B_TO_FSB(mp, offset); 1419 + } 1407 1420 1408 - next_fsb = XFS_B_TO_FSB(mp, offset + len); 1409 1421 shift_fsb = XFS_B_TO_FSB(mp, len); 1410 - 1411 - error = xfs_free_file_space(ip, offset, len); 1412 - if (error) 1413 - return error; 1414 1422 1415 1423 /* 1416 1424 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation ··· 1429 1427 1430 1428 /* 1431 1429 * Writeback and invalidate cache for the remainder of the file as we're 1432 - * about to shift down every extent from the collapse range to EOF. The 1433 - * free of the collapse range above might have already done some of 1434 - * this, but we shouldn't rely on it to do anything outside of the range 1435 - * that was freed. 1430 + * about to shift down every extent from offset to EOF. 1436 1431 */ 1437 1432 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1438 - offset + len, -1); 1433 + offset, -1); 1439 1434 if (error) 1440 1435 return error; 1441 1436 error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, 1442 - (offset + len) >> PAGE_CACHE_SHIFT, -1); 1437 + offset >> PAGE_CACHE_SHIFT, -1); 1443 1438 if (error) 1444 1439 return error; 1440 + 1441 + /* 1442 + * The extent shiting code works on extent granularity. So, if 1443 + * stop_fsb is not the starting block of extent, we need to split 1444 + * the extent at stop_fsb. 1445 + */ 1446 + if (direction == SHIFT_RIGHT) { 1447 + error = xfs_bmap_split_extent(ip, stop_fsb); 1448 + if (error) 1449 + return error; 1450 + } 1445 1451 1446 1452 while (!error && !done) { 1447 1453 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); ··· 1474 1464 if (error) 1475 1465 goto out; 1476 1466 1477 - xfs_trans_ijoin(tp, ip, 0); 1467 + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1478 1468 1479 1469 xfs_bmap_init(&free_list, &first_block); 1480 1470 ··· 1482 1472 * We are using the write transaction in which max 2 bmbt 1483 1473 * updates are allowed 1484 1474 */ 1485 - start_fsb = next_fsb; 1486 - error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb, 1487 - &done, &next_fsb, &first_block, &free_list, 1488 - XFS_BMAP_MAX_SHIFT_EXTENTS); 1475 + error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb, 1476 + &done, stop_fsb, &first_block, &free_list, 1477 + direction, XFS_BMAP_MAX_SHIFT_EXTENTS); 1489 1478 if (error) 1490 1479 goto out; 1491 1480 ··· 1493 1484 goto out; 1494 1485 1495 1486 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1496 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 1497 1487 } 1498 1488 1499 1489 return error; 1500 1490 1501 1491 out: 1502 1492 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1503 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 1504 1493 return error; 1494 + } 1495 + 1496 + /* 1497 + * xfs_collapse_file_space() 1498 + * This routine frees disk space and shift extent for the given file. 1499 + * The first thing we do is to free data blocks in the specified range 1500 + * by calling xfs_free_file_space(). It would also sync dirty data 1501 + * and invalidate page cache over the region on which collapse range 1502 + * is working. And Shift extent records to the left to cover a hole. 1503 + * RETURNS: 1504 + * 0 on success 1505 + * errno on error 1506 + * 1507 + */ 1508 + int 1509 + xfs_collapse_file_space( 1510 + struct xfs_inode *ip, 1511 + xfs_off_t offset, 1512 + xfs_off_t len) 1513 + { 1514 + int error; 1515 + 1516 + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1517 + trace_xfs_collapse_file_space(ip); 1518 + 1519 + error = xfs_free_file_space(ip, offset, len); 1520 + if (error) 1521 + return error; 1522 + 1523 + return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT); 1524 + } 1525 + 1526 + /* 1527 + * xfs_insert_file_space() 1528 + * This routine create hole space by shifting extents for the given file. 1529 + * The first thing we do is to sync dirty data and invalidate page cache 1530 + * over the region on which insert range is working. And split an extent 1531 + * to two extents at given offset by calling xfs_bmap_split_extent. 1532 + * And shift all extent records which are laying between [offset, 1533 + * last allocated extent] to the right to reserve hole range. 1534 + * RETURNS: 1535 + * 0 on success 1536 + * errno on error 1537 + */ 1538 + int 1539 + xfs_insert_file_space( 1540 + struct xfs_inode *ip, 1541 + loff_t offset, 1542 + loff_t len) 1543 + { 1544 + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1545 + trace_xfs_insert_file_space(ip); 1546 + 1547 + return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT); 1505 1548 } 1506 1549 1507 1550 /*
+2
fs/xfs/xfs_bmap_util.h
··· 63 63 xfs_off_t len); 64 64 int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, 65 65 xfs_off_t len); 66 + int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset, 67 + xfs_off_t len); 66 68 67 69 /* EOF block manipulation functions */ 68 70 bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
+39 -2
fs/xfs/xfs_file.c
··· 822 822 return ret; 823 823 } 824 824 825 + #define XFS_FALLOC_FL_SUPPORTED \ 826 + (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ 827 + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ 828 + FALLOC_FL_INSERT_RANGE) 829 + 825 830 STATIC long 826 831 xfs_file_fallocate( 827 832 struct file *file, ··· 840 835 enum xfs_prealloc_flags flags = 0; 841 836 uint iolock = XFS_IOLOCK_EXCL; 842 837 loff_t new_size = 0; 838 + bool do_file_insert = 0; 843 839 844 840 if (!S_ISREG(inode->i_mode)) 845 841 return -EINVAL; 846 - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 847 - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) 842 + if (mode & ~XFS_FALLOC_FL_SUPPORTED) 848 843 return -EOPNOTSUPP; 849 844 850 845 xfs_ilock(ip, iolock); ··· 881 876 error = xfs_collapse_file_space(ip, offset, len); 882 877 if (error) 883 878 goto out_unlock; 879 + } else if (mode & FALLOC_FL_INSERT_RANGE) { 880 + unsigned blksize_mask = (1 << inode->i_blkbits) - 1; 881 + 882 + new_size = i_size_read(inode) + len; 883 + if (offset & blksize_mask || len & blksize_mask) { 884 + error = -EINVAL; 885 + goto out_unlock; 886 + } 887 + 888 + /* check the new inode size does not wrap through zero */ 889 + if (new_size > inode->i_sb->s_maxbytes) { 890 + error = -EFBIG; 891 + goto out_unlock; 892 + } 893 + 894 + /* Offset should be less than i_size */ 895 + if (offset >= i_size_read(inode)) { 896 + error = -EINVAL; 897 + goto out_unlock; 898 + } 899 + do_file_insert = 1; 884 900 } else { 885 901 flags |= XFS_PREALLOC_SET; 886 902 ··· 936 910 iattr.ia_valid = ATTR_SIZE; 937 911 iattr.ia_size = new_size; 938 912 error = xfs_setattr_size(ip, &iattr); 913 + if (error) 914 + goto out_unlock; 939 915 } 916 + 917 + /* 918 + * Perform hole insertion now that the file size has been 919 + * updated so that if we crash during the operation we don't 920 + * leave shifted extents past EOF and hence losing access to 921 + * the data that is contained within them. 922 + */ 923 + if (do_file_insert) 924 + error = xfs_insert_file_space(ip, offset, len); 940 925 941 926 out_unlock: 942 927 xfs_iunlock(ip, iolock);
+1
fs/xfs/xfs_trace.h
··· 664 664 DEFINE_INODE_EVENT(xfs_free_file_space); 665 665 DEFINE_INODE_EVENT(xfs_zero_file_space); 666 666 DEFINE_INODE_EVENT(xfs_collapse_file_space); 667 + DEFINE_INODE_EVENT(xfs_insert_file_space); 667 668 DEFINE_INODE_EVENT(xfs_readdir); 668 669 #ifdef CONFIG_XFS_POSIX_ACL 669 670 DEFINE_INODE_EVENT(xfs_get_acl);
+6
include/linux/falloc.h
··· 21 21 #define FS_IOC_RESVSP _IOW('X', 40, struct space_resv) 22 22 #define FS_IOC_RESVSP64 _IOW('X', 42, struct space_resv) 23 23 24 + #define FALLOC_FL_SUPPORTED_MASK (FALLOC_FL_KEEP_SIZE | \ 25 + FALLOC_FL_PUNCH_HOLE | \ 26 + FALLOC_FL_COLLAPSE_RANGE | \ 27 + FALLOC_FL_ZERO_RANGE | \ 28 + FALLOC_FL_INSERT_RANGE) 29 + 24 30 #endif /* _FALLOC_H_ */
+17
include/uapi/linux/falloc.h
··· 41 41 */ 42 42 #define FALLOC_FL_ZERO_RANGE 0x10 43 43 44 + /* 45 + * FALLOC_FL_INSERT_RANGE is use to insert space within the file size without 46 + * overwriting any existing data. The contents of the file beyond offset are 47 + * shifted towards right by len bytes to create a hole. As such, this 48 + * operation will increase the size of the file by len bytes. 49 + * 50 + * Different filesystems may implement different limitations on the granularity 51 + * of the operation. Most will limit operations to filesystem block size 52 + * boundaries, but this boundary may be larger or smaller depending on 53 + * the filesystem and/or the configuration of the filesystem or file. 54 + * 55 + * Attempting to insert space using this flag at OR beyond the end of 56 + * the file is considered an illegal operation - just use ftruncate(2) or 57 + * fallocate(2) with mode 0 for such type of operations. 58 + */ 59 + #define FALLOC_FL_INSERT_RANGE 0x20 60 + 44 61 #endif /* _UAPI_FALLOC_H_ */