commit 9f5974c8734d83d4ab7096ed98136a82f41210d6 · tjh.dev/kernel

+597 -505

fs/xfs/linux-2.6/xfs_aops.c

··· 40 #include "xfs_rw.h" 41 #include "xfs_iomap.h" 42 #include <linux/mpage.h> 43 #include <linux/writeback.h> 44 45 STATIC void xfs_count_page_state(struct page *, int *, int *, int *); 46 - STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *, 47 - struct writeback_control *wbc, void *, int, int); 48 49 #if defined(XFS_RW_TRACE) 50 void ··· 54 int mask) 55 { 56 xfs_inode_t *ip; 57 - bhv_desc_t *bdp; 58 vnode_t *vp = LINVFS_GET_VP(inode); 59 loff_t isize = i_size_read(inode); 60 - loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT; 61 int delalloc = -1, unmapped = -1, unwritten = -1; 62 63 if (page_has_buffers(page)) 64 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 65 66 - bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); 67 - ip = XFS_BHVTOI(bdp); 68 if (!ip->i_rwtrace) 69 return; 70 ··· 100 queue_work(xfsdatad_workqueue, &ioend->io_work); 101 } 102 103 STATIC void 104 xfs_destroy_ioend( 105 xfs_ioend_t *ioend) 106 { 107 vn_iowake(ioend->io_vnode); 108 mempool_free(ioend, xfs_ioend_pool); 109 } 110 111 /* 112 * Issue transactions to convert a buffer range from unwritten 113 * to written extents. 114 */ ··· 161 vnode_t *vp = ioend->io_vnode; 162 xfs_off_t offset = ioend->io_offset; 163 size_t size = ioend->io_size; 164 - struct buffer_head *bh, *next; 165 int error; 166 167 if (ioend->io_uptodate) 168 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); 169 - 170 - /* ioend->io_buffer_head is only non-NULL for buffered I/O */ 171 - for (bh = ioend->io_buffer_head; bh; bh = next) { 172 - next = bh->b_private; 173 - 174 - bh->b_end_io = NULL; 175 - clear_buffer_unwritten(bh); 176 - end_buffer_async_write(bh, ioend->io_uptodate); 177 - } 178 - 179 xfs_destroy_ioend(ioend); 180 } 181 ··· 176 */ 177 STATIC xfs_ioend_t * 178 xfs_alloc_ioend( 179 - struct inode *inode) 180 { 181 xfs_ioend_t *ioend; 182 ··· 190 */ 191 atomic_set(&ioend->io_remaining, 1); 192 ioend->io_uptodate = 1; /* cleared if any I/O fails */ 193 ioend->io_vnode = LINVFS_GET_VP(inode); 194 ioend->io_buffer_head = NULL; 195 atomic_inc(&ioend->io_vnode->v_iocount); 196 ioend->io_offset = 0; 197 ioend->io_size = 0; 198 199 - INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); 200 201 return ioend; 202 - } 203 - 204 - void 205 - linvfs_unwritten_done( 206 - struct buffer_head *bh, 207 - int uptodate) 208 - { 209 - xfs_ioend_t *ioend = bh->b_private; 210 - static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED; 211 - unsigned long flags; 212 - 213 - ASSERT(buffer_unwritten(bh)); 214 - bh->b_end_io = NULL; 215 - 216 - if (!uptodate) 217 - ioend->io_uptodate = 0; 218 - 219 - /* 220 - * Deep magic here. We reuse b_private in the buffer_heads to build 221 - * a chain for completing the I/O from user context after we've issued 222 - * a transaction to convert the unwritten extent. 223 - */ 224 - spin_lock_irqsave(&unwritten_done_lock, flags); 225 - bh->b_private = ioend->io_buffer_head; 226 - ioend->io_buffer_head = bh; 227 - spin_unlock_irqrestore(&unwritten_done_lock, flags); 228 - 229 - xfs_finish_ioend(ioend); 230 } 231 232 STATIC int ··· 226 return -error; 227 } 228 229 - /* 230 - * Finds the corresponding mapping in block @map array of the 231 - * given @offset within a @page. 232 - */ 233 - STATIC xfs_iomap_t * 234 - xfs_offset_to_map( 235 - struct page *page, 236 xfs_iomap_t *iomapp, 237 - unsigned long offset) 238 { 239 - loff_t full_offset; /* offset from start of file */ 240 241 - ASSERT(offset < PAGE_CACHE_SIZE); 242 243 - full_offset = page->index; /* NB: using 64bit number */ 244 - full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */ 245 - full_offset += offset; /* offset from page start */ 246 247 - if (full_offset < iomapp->iomap_offset) 248 - return NULL; 249 - if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset) 250 - return iomapp; 251 - return NULL; 252 } 253 254 STATIC void 255 xfs_map_at_offset( 256 - struct page *page, 257 struct buffer_head *bh, 258 - unsigned long offset, 259 int block_bits, 260 xfs_iomap_t *iomapp) 261 { 262 xfs_daddr_t bn; 263 - loff_t delta; 264 int sector_shift; 265 266 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); 267 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); 268 ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); 269 270 - delta = page->index; 271 - delta <<= PAGE_CACHE_SHIFT; 272 - delta += offset; 273 - delta -= iomapp->iomap_offset; 274 - delta >>= block_bits; 275 - 276 sector_shift = block_bits - BBSHIFT; 277 - bn = iomapp->iomap_bn >> sector_shift; 278 - bn += delta; 279 - BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME)); 280 ASSERT((bn << sector_shift) >= iomapp->iomap_bn); 281 282 lock_buffer(bh); 283 bh->b_blocknr = bn; 284 - bh->b_bdev = iomapp->iomap_target->pbr_bdev; 285 set_buffer_mapped(bh); 286 clear_buffer_delay(bh); 287 } 288 289 /* 290 - * Look for a page at index which is unlocked and contains our 291 - * unwritten extent flagged buffers at its head. Returns page 292 - * locked and with an extra reference count, and length of the 293 - * unwritten extent component on this page that we can write, 294 - * in units of filesystem blocks. 295 - */ 296 - STATIC struct page * 297 - xfs_probe_unwritten_page( 298 - struct address_space *mapping, 299 - pgoff_t index, 300 - xfs_iomap_t *iomapp, 301 - xfs_ioend_t *ioend, 302 - unsigned long max_offset, 303 - unsigned long *fsbs, 304 - unsigned int bbits) 305 - { 306 - struct page *page; 307 - 308 - page = find_trylock_page(mapping, index); 309 - if (!page) 310 - return NULL; 311 - if (PageWriteback(page)) 312 - goto out; 313 - 314 - if (page->mapping && page_has_buffers(page)) { 315 - struct buffer_head *bh, *head; 316 - unsigned long p_offset = 0; 317 - 318 - *fsbs = 0; 319 - bh = head = page_buffers(page); 320 - do { 321 - if (!buffer_unwritten(bh) || !buffer_uptodate(bh)) 322 - break; 323 - if (!xfs_offset_to_map(page, iomapp, p_offset)) 324 - break; 325 - if (p_offset >= max_offset) 326 - break; 327 - xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); 328 - set_buffer_unwritten_io(bh); 329 - bh->b_private = ioend; 330 - p_offset += bh->b_size; 331 - (*fsbs)++; 332 - } while ((bh = bh->b_this_page) != head); 333 - 334 - if (p_offset) 335 - return page; 336 - } 337 - 338 - out: 339 - unlock_page(page); 340 - return NULL; 341 - } 342 - 343 - /* 344 - * Look for a page at index which is unlocked and not mapped 345 - * yet - clustering for mmap write case. 346 */ 347 STATIC unsigned int 348 - xfs_probe_unmapped_page( 349 - struct address_space *mapping, 350 - pgoff_t index, 351 - unsigned int pg_offset) 352 { 353 - struct page *page; 354 int ret = 0; 355 356 - page = find_trylock_page(mapping, index); 357 - if (!page) 358 - return 0; 359 if (PageWriteback(page)) 360 - goto out; 361 362 if (page->mapping && PageDirty(page)) { 363 if (page_has_buffers(page)) { ··· 487 488 bh = head = page_buffers(page); 489 do { 490 - if (buffer_mapped(bh) || !buffer_uptodate(bh)) 491 break; 492 ret += bh->b_size; 493 if (ret >= pg_offset) 494 break; 495 } while ((bh = bh->b_this_page) != head); 496 } else 497 - ret = PAGE_CACHE_SIZE; 498 } 499 500 - out: 501 - unlock_page(page); 502 return ret; 503 } 504 505 - STATIC unsigned int 506 - xfs_probe_unmapped_cluster( 507 struct inode *inode, 508 struct page *startpage, 509 struct buffer_head *bh, 510 - struct buffer_head *head) 511 { 512 pgoff_t tindex, tlast, tloff; 513 - unsigned int pg_offset, len, total = 0; 514 - struct address_space *mapping = inode->i_mapping; 515 516 /* First sum forwards in this page */ 517 do { 518 - if (buffer_mapped(bh)) 519 - break; 520 total += bh->b_size; 521 } while ((bh = bh->b_this_page) != head); 522 523 - /* If we reached the end of the page, sum forwards in 524 - * following pages. 525 - */ 526 - if (bh == head) { 527 - tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; 528 - /* Prune this back to avoid pathological behavior */ 529 - tloff = min(tlast, startpage->index + 64); 530 - for (tindex = startpage->index + 1; tindex < tloff; tindex++) { 531 - len = xfs_probe_unmapped_page(mapping, tindex, 532 - PAGE_CACHE_SIZE); 533 - if (!len) 534 - return total; 535 total += len; 536 } 537 - if (tindex == tlast && 538 - (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { 539 - total += xfs_probe_unmapped_page(mapping, 540 - tindex, pg_offset); 541 - } 542 } 543 return total; 544 } 545 546 /* 547 - * Probe for a given page (index) in the inode and test if it is delayed 548 - * and without unwritten buffers. Returns page locked and with an extra 549 - * reference count. 550 */ 551 - STATIC struct page * 552 - xfs_probe_delalloc_page( 553 - struct inode *inode, 554 - pgoff_t index) 555 { 556 - struct page *page; 557 - 558 - page = find_trylock_page(inode->i_mapping, index); 559 - if (!page) 560 - return NULL; 561 if (PageWriteback(page)) 562 - goto out; 563 564 if (page->mapping && page_has_buffers(page)) { 565 struct buffer_head *bh, *head; ··· 589 590 bh = head = page_buffers(page); 591 do { 592 - if (buffer_unwritten(bh)) { 593 - acceptable = 0; 594 break; 595 - } else if (buffer_delay(bh)) { 596 - acceptable = 1; 597 - } 598 } while ((bh = bh->b_this_page) != head); 599 600 if (acceptable) 601 - return page; 602 } 603 604 - out: 605 - unlock_page(page); 606 - return NULL; 607 - } 608 - 609 - STATIC int 610 - xfs_map_unwritten( 611 - struct inode *inode, 612 - struct page *start_page, 613 - struct buffer_head *head, 614 - struct buffer_head *curr, 615 - unsigned long p_offset, 616 - int block_bits, 617 - xfs_iomap_t *iomapp, 618 - struct writeback_control *wbc, 619 - int startio, 620 - int all_bh) 621 - { 622 - struct buffer_head *bh = curr; 623 - xfs_iomap_t *tmp; 624 - xfs_ioend_t *ioend; 625 - loff_t offset; 626 - unsigned long nblocks = 0; 627 - 628 - offset = start_page->index; 629 - offset <<= PAGE_CACHE_SHIFT; 630 - offset += p_offset; 631 - 632 - ioend = xfs_alloc_ioend(inode); 633 - 634 - /* First map forwards in the page consecutive buffers 635 - * covering this unwritten extent 636 - */ 637 - do { 638 - if (!buffer_unwritten(bh)) 639 - break; 640 - tmp = xfs_offset_to_map(start_page, iomapp, p_offset); 641 - if (!tmp) 642 - break; 643 - xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); 644 - set_buffer_unwritten_io(bh); 645 - bh->b_private = ioend; 646 - p_offset += bh->b_size; 647 - nblocks++; 648 - } while ((bh = bh->b_this_page) != head); 649 - 650 - atomic_add(nblocks, &ioend->io_remaining); 651 - 652 - /* If we reached the end of the page, map forwards in any 653 - * following pages which are also covered by this extent. 654 - */ 655 - if (bh == head) { 656 - struct address_space *mapping = inode->i_mapping; 657 - pgoff_t tindex, tloff, tlast; 658 - unsigned long bs; 659 - unsigned int pg_offset, bbits = inode->i_blkbits; 660 - struct page *page; 661 - 662 - tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; 663 - tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT; 664 - tloff = min(tlast, tloff); 665 - for (tindex = start_page->index + 1; tindex < tloff; tindex++) { 666 - page = xfs_probe_unwritten_page(mapping, 667 - tindex, iomapp, ioend, 668 - PAGE_CACHE_SIZE, &bs, bbits); 669 - if (!page) 670 - break; 671 - nblocks += bs; 672 - atomic_add(bs, &ioend->io_remaining); 673 - xfs_convert_page(inode, page, iomapp, wbc, ioend, 674 - startio, all_bh); 675 - /* stop if converting the next page might add 676 - * enough blocks that the corresponding byte 677 - * count won't fit in our ulong page buf length */ 678 - if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) 679 - goto enough; 680 - } 681 - 682 - if (tindex == tlast && 683 - (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { 684 - page = xfs_probe_unwritten_page(mapping, 685 - tindex, iomapp, ioend, 686 - pg_offset, &bs, bbits); 687 - if (page) { 688 - nblocks += bs; 689 - atomic_add(bs, &ioend->io_remaining); 690 - xfs_convert_page(inode, page, iomapp, wbc, ioend, 691 - startio, all_bh); 692 - if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) 693 - goto enough; 694 - } 695 - } 696 - } 697 - 698 - enough: 699 - ioend->io_size = (xfs_off_t)nblocks << block_bits; 700 - ioend->io_offset = offset; 701 - xfs_finish_ioend(ioend); 702 return 0; 703 - } 704 - 705 - STATIC void 706 - xfs_submit_page( 707 - struct page *page, 708 - struct writeback_control *wbc, 709 - struct buffer_head *bh_arr[], 710 - int bh_count, 711 - int probed_page, 712 - int clear_dirty) 713 - { 714 - struct buffer_head *bh; 715 - int i; 716 - 717 - BUG_ON(PageWriteback(page)); 718 - if (bh_count) 719 - set_page_writeback(page); 720 - if (clear_dirty) 721 - clear_page_dirty(page); 722 - unlock_page(page); 723 - 724 - if (bh_count) { 725 - for (i = 0; i < bh_count; i++) { 726 - bh = bh_arr[i]; 727 - mark_buffer_async_write(bh); 728 - if (buffer_unwritten(bh)) 729 - set_buffer_unwritten_io(bh); 730 - set_buffer_uptodate(bh); 731 - clear_buffer_dirty(bh); 732 - } 733 - 734 - for (i = 0; i < bh_count; i++) 735 - submit_bh(WRITE, bh_arr[i]); 736 - 737 - if (probed_page && clear_dirty) 738 - wbc->nr_to_write--; /* Wrote an "extra" page */ 739 - } 740 } 741 742 /* ··· 612 * delalloc/unwritten pages only, for the original page it is possible 613 * that the page has no mapping at all. 614 */ 615 - STATIC void 616 xfs_convert_page( 617 struct inode *inode, 618 struct page *page, 619 - xfs_iomap_t *iomapp, 620 struct writeback_control *wbc, 621 - void *private, 622 int startio, 623 int all_bh) 624 { 625 - struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; 626 - xfs_iomap_t *mp = iomapp, *tmp; 627 - unsigned long offset, end_offset; 628 - int index = 0; 629 int bbits = inode->i_blkbits; 630 int len, page_dirty; 631 632 - end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)); 633 634 /* 635 * page_dirty is initially a count of buffers on the page before 636 * EOF and is decrememted as we move each into a cleanable state. 637 */ 638 - len = 1 << inode->i_blkbits; 639 - end_offset = max(end_offset, PAGE_CACHE_SIZE); 640 - end_offset = roundup(end_offset, len); 641 - page_dirty = end_offset / len; 642 643 - offset = 0; 644 bh = head = page_buffers(page); 645 do { 646 if (offset >= end_offset) 647 break; 648 - if (!(PageUptodate(page) || buffer_uptodate(bh))) 649 - continue; 650 - if (buffer_mapped(bh) && all_bh && 651 - !(buffer_unwritten(bh) || buffer_delay(bh))) { 652 - if (startio) { 653 - lock_buffer(bh); 654 - bh_arr[index++] = bh; 655 - page_dirty--; 656 - } 657 continue; 658 } 659 - tmp = xfs_offset_to_map(page, mp, offset); 660 - if (!tmp) 661 - continue; 662 - ASSERT(!(tmp->iomap_flags & IOMAP_HOLE)); 663 - ASSERT(!(tmp->iomap_flags & IOMAP_DELAY)); 664 665 - /* If this is a new unwritten extent buffer (i.e. one 666 - * that we haven't passed in private data for, we must 667 - * now map this buffer too. 668 - */ 669 - if (buffer_unwritten(bh) && !bh->b_end_io) { 670 - ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN); 671 - xfs_map_unwritten(inode, page, head, bh, offset, 672 - bbits, tmp, wbc, startio, all_bh); 673 - } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) { 674 - xfs_map_at_offset(page, bh, offset, bbits, tmp); 675 - if (buffer_unwritten(bh)) { 676 - set_buffer_unwritten_io(bh); 677 - bh->b_private = private; 678 - ASSERT(private); 679 } 680 } 681 - if (startio) { 682 - bh_arr[index++] = bh; 683 - } else { 684 - set_buffer_dirty(bh); 685 - unlock_buffer(bh); 686 - mark_buffer_dirty(bh); 687 - } 688 - page_dirty--; 689 } while (offset += len, (bh = bh->b_this_page) != head); 690 691 - if (startio && index) { 692 - xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty); 693 - } else { 694 - unlock_page(page); 695 } 696 } 697 698 /* ··· 750 struct inode *inode, 751 pgoff_t tindex, 752 xfs_iomap_t *iomapp, 753 struct writeback_control *wbc, 754 int startio, 755 int all_bh, 756 pgoff_t tlast) 757 { 758 - struct page *page; 759 760 - for (; tindex <= tlast; tindex++) { 761 - page = xfs_probe_delalloc_page(inode, tindex); 762 - if (!page) 763 break; 764 - xfs_convert_page(inode, page, iomapp, wbc, NULL, 765 - startio, all_bh); 766 } 767 } 768 ··· 805 int startio, 806 int unmapped) /* also implies page uptodate */ 807 { 808 - struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; 809 - xfs_iomap_t *iomp, iomap; 810 loff_t offset; 811 unsigned long p_offset = 0; 812 __uint64_t end_offset; 813 pgoff_t end_index, last_index, tlast; 814 - int len, err, i, cnt = 0, uptodate = 1; 815 - int flags; 816 - int page_dirty; 817 818 /* wait for other IO threads? */ 819 - flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK; 820 821 /* Is this page beyond the end of the file? */ 822 offset = i_size_read(inode); ··· 835 } 836 } 837 838 - end_offset = min_t(unsigned long long, 839 - (loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); 840 - offset = (loff_t)page->index << PAGE_CACHE_SHIFT; 841 - 842 /* 843 * page_dirty is initially a count of buffers on the page before 844 * EOF and is decrememted as we move each into a cleanable state. 845 - */ 846 len = 1 << inode->i_blkbits; 847 - p_offset = max(p_offset, PAGE_CACHE_SIZE); 848 - p_offset = roundup(p_offset, len); 849 page_dirty = p_offset / len; 850 851 - iomp = NULL; 852 - p_offset = 0; 853 bh = head = page_buffers(page); 854 855 do { 856 if (offset >= end_offset) 857 break; 858 if (!buffer_uptodate(bh)) 859 uptodate = 0; 860 - if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) 861 continue; 862 - 863 - if (iomp) { 864 - iomp = xfs_offset_to_map(page, &iomap, p_offset); 865 } 866 867 /* 868 * First case, map an unwritten extent and prepare for 869 * extent state conversion transaction on completion. 870 - */ 871 - if (buffer_unwritten(bh)) { 872 - if (!startio) 873 - continue; 874 - if (!iomp) { 875 - err = xfs_map_blocks(inode, offset, len, &iomap, 876 - BMAPI_WRITE|BMAPI_IGNSTATE); 877 - if (err) { 878 - goto error; 879 - } 880 - iomp = xfs_offset_to_map(page, &iomap, 881 - p_offset); 882 - } 883 - if (iomp) { 884 - if (!bh->b_end_io) { 885 - err = xfs_map_unwritten(inode, page, 886 - head, bh, p_offset, 887 - inode->i_blkbits, iomp, 888 - wbc, startio, unmapped); 889 - if (err) { 890 - goto error; 891 - } 892 - } else { 893 - set_bit(BH_Lock, &bh->b_state); 894 - } 895 - BUG_ON(!buffer_locked(bh)); 896 - bh_arr[cnt++] = bh; 897 - page_dirty--; 898 - } 899 - /* 900 * Second case, allocate space for a delalloc buffer. 901 * We can return EAGAIN here in the release page case. 902 - */ 903 - } else if (buffer_delay(bh)) { 904 - if (!iomp) { 905 - err = xfs_map_blocks(inode, offset, len, &iomap, 906 - BMAPI_ALLOCATE | flags); 907 - if (err) { 908 - goto error; 909 - } 910 - iomp = xfs_offset_to_map(page, &iomap, 911 - p_offset); 912 } 913 - if (iomp) { 914 - xfs_map_at_offset(page, bh, p_offset, 915 - inode->i_blkbits, iomp); 916 if (startio) { 917 - bh_arr[cnt++] = bh; 918 } else { 919 set_buffer_dirty(bh); 920 unlock_buffer(bh); 921 mark_buffer_dirty(bh); 922 } 923 page_dirty--; 924 } 925 } else if ((buffer_uptodate(bh) || PageUptodate(page)) && 926 (unmapped || startio)) { 927 - 928 - if (!buffer_mapped(bh)) { 929 - int size; 930 - 931 - /* 932 - * Getting here implies an unmapped buffer 933 - * was found, and we are in a path where we 934 - * need to write the whole page out. 935 - */ 936 - if (!iomp) { 937 - size = xfs_probe_unmapped_cluster( 938 - inode, page, bh, head); 939 - err = xfs_map_blocks(inode, offset, 940 - size, &iomap, 941 - BMAPI_WRITE|BMAPI_MMAP); 942 - if (err) { 943 - goto error; 944 - } 945 - iomp = xfs_offset_to_map(page, &iomap, 946 - p_offset); 947 - } 948 - if (iomp) { 949 - xfs_map_at_offset(page, 950 - bh, p_offset, 951 - inode->i_blkbits, iomp); 952 - if (startio) { 953 - bh_arr[cnt++] = bh; 954 - } else { 955 - set_buffer_dirty(bh); 956 - unlock_buffer(bh); 957 - mark_buffer_dirty(bh); 958 - } 959 - page_dirty--; 960 - } 961 - } else if (startio) { 962 - if (buffer_uptodate(bh) && 963 - !test_and_set_bit(BH_Lock, &bh->b_state)) { 964 - bh_arr[cnt++] = bh; 965 - page_dirty--; 966 - } 967 - } 968 } 969 - } while (offset += len, p_offset += len, 970 - ((bh = bh->b_this_page) != head)); 971 972 if (uptodate && bh == head) 973 SetPageUptodate(page); 974 975 - if (startio) { 976 - xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty); 977 - } 978 979 - if (iomp) { 980 - offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >> 981 PAGE_CACHE_SHIFT; 982 tlast = min_t(pgoff_t, offset, last_index); 983 - xfs_cluster_write(inode, page->index + 1, iomp, wbc, 984 - startio, unmapped, tlast); 985 } 986 987 return page_dirty; 988 989 error: 990 - for (i = 0; i < cnt; i++) { 991 - unlock_buffer(bh_arr[i]); 992 - } 993 994 /* 995 * If it's delalloc and we have nowhere to put it, ··· 1009 * us to try again. 1010 */ 1011 if (err != -EAGAIN) { 1012 - if (!unmapped) { 1013 block_invalidatepage(page, 0); 1014 - } 1015 ClearPageUptodate(page); 1016 } 1017 return err; ··· 1074 } 1075 1076 /* If this is a realtime file, data might be on a new device */ 1077 - bh_result->b_bdev = iomap.iomap_target->pbr_bdev; 1078 1079 /* If we previously allocated a block out beyond eof and 1080 * we are now coming back to use it then we will need to ··· 1186 if (error) 1187 return -error; 1188 1189 - iocb->private = xfs_alloc_ioend(inode); 1190 1191 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1192 - iomap.iomap_target->pbr_bdev, 1193 iov, offset, nr_segs, 1194 linvfs_get_blocks_direct, 1195 linvfs_end_io_direct);

··· 40 #include "xfs_rw.h" 41 #include "xfs_iomap.h" 42 #include <linux/mpage.h> 43 + #include <linux/pagevec.h> 44 #include <linux/writeback.h> 45 46 STATIC void xfs_count_page_state(struct page *, int *, int *, int *); 47 48 #if defined(XFS_RW_TRACE) 49 void ··· 55 int mask) 56 { 57 xfs_inode_t *ip; 58 vnode_t *vp = LINVFS_GET_VP(inode); 59 loff_t isize = i_size_read(inode); 60 + loff_t offset = page_offset(page); 61 int delalloc = -1, unmapped = -1, unwritten = -1; 62 63 if (page_has_buffers(page)) 64 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 65 66 + ip = xfs_vtoi(vp); 67 if (!ip->i_rwtrace) 68 return; 69 ··· 103 queue_work(xfsdatad_workqueue, &ioend->io_work); 104 } 105 106 + /* 107 + * We're now finished for good with this ioend structure. 108 + * Update the page state via the associated buffer_heads, 109 + * release holds on the inode and bio, and finally free 110 + * up memory. Do not use the ioend after this. 111 + */ 112 STATIC void 113 xfs_destroy_ioend( 114 xfs_ioend_t *ioend) 115 { 116 + struct buffer_head *bh, *next; 117 + 118 + for (bh = ioend->io_buffer_head; bh; bh = next) { 119 + next = bh->b_private; 120 + bh->b_end_io(bh, ioend->io_uptodate); 121 + } 122 + 123 vn_iowake(ioend->io_vnode); 124 mempool_free(ioend, xfs_ioend_pool); 125 } 126 127 /* 128 + * Buffered IO write completion for delayed allocate extents. 129 + * TODO: Update ondisk isize now that we know the file data 130 + * has been flushed (i.e. the notorious "NULL file" problem). 131 + */ 132 + STATIC void 133 + xfs_end_bio_delalloc( 134 + void *data) 135 + { 136 + xfs_ioend_t *ioend = data; 137 + 138 + xfs_destroy_ioend(ioend); 139 + } 140 + 141 + /* 142 + * Buffered IO write completion for regular, written extents. 143 + */ 144 + STATIC void 145 + xfs_end_bio_written( 146 + void *data) 147 + { 148 + xfs_ioend_t *ioend = data; 149 + 150 + xfs_destroy_ioend(ioend); 151 + } 152 + 153 + /* 154 + * IO write completion for unwritten extents. 155 + * 156 * Issue transactions to convert a buffer range from unwritten 157 * to written extents. 158 */ ··· 123 vnode_t *vp = ioend->io_vnode; 124 xfs_off_t offset = ioend->io_offset; 125 size_t size = ioend->io_size; 126 int error; 127 128 if (ioend->io_uptodate) 129 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); 130 xfs_destroy_ioend(ioend); 131 } 132 ··· 149 */ 150 STATIC xfs_ioend_t * 151 xfs_alloc_ioend( 152 + struct inode *inode, 153 + unsigned int type) 154 { 155 xfs_ioend_t *ioend; 156 ··· 162 */ 163 atomic_set(&ioend->io_remaining, 1); 164 ioend->io_uptodate = 1; /* cleared if any I/O fails */ 165 + ioend->io_list = NULL; 166 + ioend->io_type = type; 167 ioend->io_vnode = LINVFS_GET_VP(inode); 168 ioend->io_buffer_head = NULL; 169 + ioend->io_buffer_tail = NULL; 170 atomic_inc(&ioend->io_vnode->v_iocount); 171 ioend->io_offset = 0; 172 ioend->io_size = 0; 173 174 + if (type == IOMAP_UNWRITTEN) 175 + INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); 176 + else if (type == IOMAP_DELAY) 177 + INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend); 178 + else 179 + INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend); 180 181 return ioend; 182 } 183 184 STATIC int ··· 218 return -error; 219 } 220 221 + STATIC inline int 222 + xfs_iomap_valid( 223 xfs_iomap_t *iomapp, 224 + loff_t offset) 225 { 226 + return offset >= iomapp->iomap_offset && 227 + offset < iomapp->iomap_offset + iomapp->iomap_bsize; 228 + } 229 230 + /* 231 + * BIO completion handler for buffered IO. 232 + */ 233 + STATIC int 234 + xfs_end_bio( 235 + struct bio *bio, 236 + unsigned int bytes_done, 237 + int error) 238 + { 239 + xfs_ioend_t *ioend = bio->bi_private; 240 241 + if (bio->bi_size) 242 + return 1; 243 244 + ASSERT(ioend); 245 + ASSERT(atomic_read(&bio->bi_cnt) >= 1); 246 + 247 + /* Toss bio and pass work off to an xfsdatad thread */ 248 + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 249 + ioend->io_uptodate = 0; 250 + bio->bi_private = NULL; 251 + bio->bi_end_io = NULL; 252 + 253 + bio_put(bio); 254 + xfs_finish_ioend(ioend); 255 + return 0; 256 + } 257 + 258 + STATIC void 259 + xfs_submit_ioend_bio( 260 + xfs_ioend_t *ioend, 261 + struct bio *bio) 262 + { 263 + atomic_inc(&ioend->io_remaining); 264 + 265 + bio->bi_private = ioend; 266 + bio->bi_end_io = xfs_end_bio; 267 + 268 + submit_bio(WRITE, bio); 269 + ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); 270 + bio_put(bio); 271 + } 272 + 273 + STATIC struct bio * 274 + xfs_alloc_ioend_bio( 275 + struct buffer_head *bh) 276 + { 277 + struct bio *bio; 278 + int nvecs = bio_get_nr_vecs(bh->b_bdev); 279 + 280 + do { 281 + bio = bio_alloc(GFP_NOIO, nvecs); 282 + nvecs >>= 1; 283 + } while (!bio); 284 + 285 + ASSERT(bio->bi_private == NULL); 286 + bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 287 + bio->bi_bdev = bh->b_bdev; 288 + bio_get(bio); 289 + return bio; 290 + } 291 + 292 + STATIC void 293 + xfs_start_buffer_writeback( 294 + struct buffer_head *bh) 295 + { 296 + ASSERT(buffer_mapped(bh)); 297 + ASSERT(buffer_locked(bh)); 298 + ASSERT(!buffer_delay(bh)); 299 + ASSERT(!buffer_unwritten(bh)); 300 + 301 + mark_buffer_async_write(bh); 302 + set_buffer_uptodate(bh); 303 + clear_buffer_dirty(bh); 304 + } 305 + 306 + STATIC void 307 + xfs_start_page_writeback( 308 + struct page *page, 309 + struct writeback_control *wbc, 310 + int clear_dirty, 311 + int buffers) 312 + { 313 + ASSERT(PageLocked(page)); 314 + ASSERT(!PageWriteback(page)); 315 + set_page_writeback(page); 316 + if (clear_dirty) 317 + clear_page_dirty(page); 318 + unlock_page(page); 319 + if (!buffers) { 320 + end_page_writeback(page); 321 + wbc->pages_skipped++; /* We didn't write this page */ 322 + } 323 + } 324 + 325 + static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) 326 + { 327 + return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 328 + } 329 + 330 + /* 331 + * Submit all of the bios for all of the ioends we have saved up, 332 + * covering the initial writepage page and also any probed pages. 333 + */ 334 + STATIC void 335 + xfs_submit_ioend( 336 + xfs_ioend_t *ioend) 337 + { 338 + xfs_ioend_t *next; 339 + struct buffer_head *bh; 340 + struct bio *bio; 341 + sector_t lastblock = 0; 342 + 343 + do { 344 + next = ioend->io_list; 345 + bio = NULL; 346 + 347 + for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 348 + xfs_start_buffer_writeback(bh); 349 + 350 + if (!bio) { 351 + retry: 352 + bio = xfs_alloc_ioend_bio(bh); 353 + } else if (bh->b_blocknr != lastblock + 1) { 354 + xfs_submit_ioend_bio(ioend, bio); 355 + goto retry; 356 + } 357 + 358 + if (bio_add_buffer(bio, bh) != bh->b_size) { 359 + xfs_submit_ioend_bio(ioend, bio); 360 + goto retry; 361 + } 362 + 363 + lastblock = bh->b_blocknr; 364 + } 365 + if (bio) 366 + xfs_submit_ioend_bio(ioend, bio); 367 + xfs_finish_ioend(ioend); 368 + } while ((ioend = next) != NULL); 369 + } 370 + 371 + /* 372 + * Cancel submission of all buffer_heads so far in this endio. 373 + * Toss the endio too. Only ever called for the initial page 374 + * in a writepage request, so only ever one page. 375 + */ 376 + STATIC void 377 + xfs_cancel_ioend( 378 + xfs_ioend_t *ioend) 379 + { 380 + xfs_ioend_t *next; 381 + struct buffer_head *bh, *next_bh; 382 + 383 + do { 384 + next = ioend->io_list; 385 + bh = ioend->io_buffer_head; 386 + do { 387 + next_bh = bh->b_private; 388 + clear_buffer_async_write(bh); 389 + unlock_buffer(bh); 390 + } while ((bh = next_bh) != NULL); 391 + 392 + vn_iowake(ioend->io_vnode); 393 + mempool_free(ioend, xfs_ioend_pool); 394 + } while ((ioend = next) != NULL); 395 + } 396 + 397 + /* 398 + * Test to see if we've been building up a completion structure for 399 + * earlier buffers -- if so, we try to append to this ioend if we 400 + * can, otherwise we finish off any current ioend and start another. 401 + * Return true if we've finished the given ioend. 402 + */ 403 + STATIC void 404 + xfs_add_to_ioend( 405 + struct inode *inode, 406 + struct buffer_head *bh, 407 + xfs_off_t offset, 408 + unsigned int type, 409 + xfs_ioend_t **result, 410 + int need_ioend) 411 + { 412 + xfs_ioend_t *ioend = *result; 413 + 414 + if (!ioend || need_ioend || type != ioend->io_type) { 415 + xfs_ioend_t *previous = *result; 416 + 417 + ioend = xfs_alloc_ioend(inode, type); 418 + ioend->io_offset = offset; 419 + ioend->io_buffer_head = bh; 420 + ioend->io_buffer_tail = bh; 421 + if (previous) 422 + previous->io_list = ioend; 423 + *result = ioend; 424 + } else { 425 + ioend->io_buffer_tail->b_private = bh; 426 + ioend->io_buffer_tail = bh; 427 + } 428 + 429 + bh->b_private = NULL; 430 + ioend->io_size += bh->b_size; 431 } 432 433 STATIC void 434 xfs_map_at_offset( 435 struct buffer_head *bh, 436 + loff_t offset, 437 int block_bits, 438 xfs_iomap_t *iomapp) 439 { 440 xfs_daddr_t bn; 441 int sector_shift; 442 443 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); 444 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); 445 ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); 446 447 sector_shift = block_bits - BBSHIFT; 448 + bn = (iomapp->iomap_bn >> sector_shift) + 449 + ((offset - iomapp->iomap_offset) >> block_bits); 450 + 451 + ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME)); 452 ASSERT((bn << sector_shift) >= iomapp->iomap_bn); 453 454 lock_buffer(bh); 455 bh->b_blocknr = bn; 456 + bh->b_bdev = iomapp->iomap_target->bt_bdev; 457 set_buffer_mapped(bh); 458 clear_buffer_delay(bh); 459 + clear_buffer_unwritten(bh); 460 } 461 462 /* 463 + * Look for a page at index that is suitable for clustering. 464 */ 465 STATIC unsigned int 466 + xfs_probe_page( 467 + struct page *page, 468 + unsigned int pg_offset, 469 + int mapped) 470 { 471 int ret = 0; 472 473 if (PageWriteback(page)) 474 + return 0; 475 476 if (page->mapping && PageDirty(page)) { 477 if (page_has_buffers(page)) { ··· 357 358 bh = head = page_buffers(page); 359 do { 360 + if (!buffer_uptodate(bh)) 361 + break; 362 + if (mapped != buffer_mapped(bh)) 363 break; 364 ret += bh->b_size; 365 if (ret >= pg_offset) 366 break; 367 } while ((bh = bh->b_this_page) != head); 368 } else 369 + ret = mapped ? 0 : PAGE_CACHE_SIZE; 370 } 371 372 return ret; 373 } 374 375 + STATIC size_t 376 + xfs_probe_cluster( 377 struct inode *inode, 378 struct page *startpage, 379 struct buffer_head *bh, 380 + struct buffer_head *head, 381 + int mapped) 382 { 383 + struct pagevec pvec; 384 pgoff_t tindex, tlast, tloff; 385 + size_t total = 0; 386 + int done = 0, i; 387 388 /* First sum forwards in this page */ 389 do { 390 + if (mapped != buffer_mapped(bh)) 391 + return total; 392 total += bh->b_size; 393 } while ((bh = bh->b_this_page) != head); 394 395 + /* if we reached the end of the page, sum forwards in following pages */ 396 + tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; 397 + tindex = startpage->index + 1; 398 + 399 + /* Prune this back to avoid pathological behavior */ 400 + tloff = min(tlast, startpage->index + 64); 401 + 402 + pagevec_init(&pvec, 0); 403 + while (!done && tindex <= tloff) { 404 + unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); 405 + 406 + if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) 407 + break; 408 + 409 + for (i = 0; i < pagevec_count(&pvec); i++) { 410 + struct page *page = pvec.pages[i]; 411 + size_t pg_offset, len = 0; 412 + 413 + if (tindex == tlast) { 414 + pg_offset = 415 + i_size_read(inode) & (PAGE_CACHE_SIZE - 1); 416 + if (!pg_offset) { 417 + done = 1; 418 + break; 419 + } 420 + } else 421 + pg_offset = PAGE_CACHE_SIZE; 422 + 423 + if (page->index == tindex && !TestSetPageLocked(page)) { 424 + len = xfs_probe_page(page, pg_offset, mapped); 425 + unlock_page(page); 426 + } 427 + 428 + if (!len) { 429 + done = 1; 430 + break; 431 + } 432 + 433 total += len; 434 + tindex++; 435 } 436 + 437 + pagevec_release(&pvec); 438 + cond_resched(); 439 } 440 + 441 return total; 442 } 443 444 /* 445 + * Test if a given page is suitable for writing as part of an unwritten 446 + * or delayed allocate extent. 447 */ 448 + STATIC int 449 + xfs_is_delayed_page( 450 + struct page *page, 451 + unsigned int type) 452 { 453 if (PageWriteback(page)) 454 + return 0; 455 456 if (page->mapping && page_has_buffers(page)) { 457 struct buffer_head *bh, *head; ··· 437 438 bh = head = page_buffers(page); 439 do { 440 + if (buffer_unwritten(bh)) 441 + acceptable = (type == IOMAP_UNWRITTEN); 442 + else if (buffer_delay(bh)) 443 + acceptable = (type == IOMAP_DELAY); 444 + else if (buffer_mapped(bh)) 445 + acceptable = (type == 0); 446 + else 447 break; 448 } while ((bh = bh->b_this_page) != head); 449 450 if (acceptable) 451 + return 1; 452 } 453 454 return 0; 455 } 456 457 /* ··· 593 * delalloc/unwritten pages only, for the original page it is possible 594 * that the page has no mapping at all. 595 */ 596 + STATIC int 597 xfs_convert_page( 598 struct inode *inode, 599 struct page *page, 600 + loff_t tindex, 601 + xfs_iomap_t *mp, 602 + xfs_ioend_t **ioendp, 603 struct writeback_control *wbc, 604 int startio, 605 int all_bh) 606 { 607 + struct buffer_head *bh, *head; 608 + xfs_off_t end_offset; 609 + unsigned long p_offset; 610 + unsigned int type; 611 int bbits = inode->i_blkbits; 612 int len, page_dirty; 613 + int count = 0, done = 0, uptodate = 1; 614 + xfs_off_t offset = page_offset(page); 615 616 + if (page->index != tindex) 617 + goto fail; 618 + if (TestSetPageLocked(page)) 619 + goto fail; 620 + if (PageWriteback(page)) 621 + goto fail_unlock_page; 622 + if (page->mapping != inode->i_mapping) 623 + goto fail_unlock_page; 624 + if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) 625 + goto fail_unlock_page; 626 627 /* 628 * page_dirty is initially a count of buffers on the page before 629 * EOF and is decrememted as we move each into a cleanable state. 630 + * 631 + * Derivation: 632 + * 633 + * End offset is the highest offset that this page should represent. 634 + * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) 635 + * will evaluate non-zero and be less than PAGE_CACHE_SIZE and 636 + * hence give us the correct page_dirty count. On any other page, 637 + * it will be zero and in that case we need page_dirty to be the 638 + * count of buffers on the page. 639 */ 640 + end_offset = min_t(unsigned long long, 641 + (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, 642 + i_size_read(inode)); 643 644 + len = 1 << inode->i_blkbits; 645 + p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), 646 + PAGE_CACHE_SIZE); 647 + p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; 648 + page_dirty = p_offset / len; 649 + 650 bh = head = page_buffers(page); 651 do { 652 if (offset >= end_offset) 653 break; 654 + if (!buffer_uptodate(bh)) 655 + uptodate = 0; 656 + if (!(PageUptodate(page) || buffer_uptodate(bh))) { 657 + done = 1; 658 continue; 659 } 660 661 + if (buffer_unwritten(bh) || buffer_delay(bh)) { 662 + if (buffer_unwritten(bh)) 663 + type = IOMAP_UNWRITTEN; 664 + else 665 + type = IOMAP_DELAY; 666 + 667 + if (!xfs_iomap_valid(mp, offset)) { 668 + done = 1; 669 + continue; 670 + } 671 + 672 + ASSERT(!(mp->iomap_flags & IOMAP_HOLE)); 673 + ASSERT(!(mp->iomap_flags & IOMAP_DELAY)); 674 + 675 + xfs_map_at_offset(bh, offset, bbits, mp); 676 + if (startio) { 677 + xfs_add_to_ioend(inode, bh, offset, 678 + type, ioendp, done); 679 + } else { 680 + set_buffer_dirty(bh); 681 + unlock_buffer(bh); 682 + mark_buffer_dirty(bh); 683 + } 684 + page_dirty--; 685 + count++; 686 + } else { 687 + type = 0; 688 + if (buffer_mapped(bh) && all_bh && startio) { 689 + lock_buffer(bh); 690 + xfs_add_to_ioend(inode, bh, offset, 691 + type, ioendp, done); 692 + count++; 693 + page_dirty--; 694 + } else { 695 + done = 1; 696 } 697 } 698 } while (offset += len, (bh = bh->b_this_page) != head); 699 700 + if (uptodate && bh == head) 701 + SetPageUptodate(page); 702 + 703 + if (startio) { 704 + if (count) { 705 + struct backing_dev_info *bdi; 706 + 707 + bdi = inode->i_mapping->backing_dev_info; 708 + if (bdi_write_congested(bdi)) { 709 + wbc->encountered_congestion = 1; 710 + done = 1; 711 + } else if (--wbc->nr_to_write <= 0) { 712 + done = 1; 713 + } 714 + } 715 + xfs_start_page_writeback(page, wbc, !page_dirty, count); 716 } 717 + 718 + return done; 719 + fail_unlock_page: 720 + unlock_page(page); 721 + fail: 722 + return 1; 723 } 724 725 /* ··· 685 struct inode *inode, 686 pgoff_t tindex, 687 xfs_iomap_t *iomapp, 688 + xfs_ioend_t **ioendp, 689 struct writeback_control *wbc, 690 int startio, 691 int all_bh, 692 pgoff_t tlast) 693 { 694 + struct pagevec pvec; 695 + int done = 0, i; 696 697 + pagevec_init(&pvec, 0); 698 + while (!done && tindex <= tlast) { 699 + unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); 700 + 701 + if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) 702 break; 703 + 704 + for (i = 0; i < pagevec_count(&pvec); i++) { 705 + done = xfs_convert_page(inode, pvec.pages[i], tindex++, 706 + iomapp, ioendp, wbc, startio, all_bh); 707 + if (done) 708 + break; 709 + } 710 + 711 + pagevec_release(&pvec); 712 + cond_resched(); 713 } 714 } 715 ··· 728 int startio, 729 int unmapped) /* also implies page uptodate */ 730 { 731 + struct buffer_head *bh, *head; 732 + xfs_iomap_t iomap; 733 + xfs_ioend_t *ioend = NULL, *iohead = NULL; 734 loff_t offset; 735 unsigned long p_offset = 0; 736 + unsigned int type; 737 __uint64_t end_offset; 738 pgoff_t end_index, last_index, tlast; 739 + ssize_t size, len; 740 + int flags, err, iomap_valid = 0, uptodate = 1; 741 + int page_dirty, count = 0, trylock_flag = 0; 742 + int all_bh = unmapped; 743 744 /* wait for other IO threads? */ 745 + if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)) 746 + trylock_flag |= BMAPI_TRYLOCK; 747 748 /* Is this page beyond the end of the file? */ 749 offset = i_size_read(inode); ··· 754 } 755 } 756 757 /* 758 * page_dirty is initially a count of buffers on the page before 759 * EOF and is decrememted as we move each into a cleanable state. 760 + * 761 + * Derivation: 762 + * 763 + * End offset is the highest offset that this page should represent. 764 + * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) 765 + * will evaluate non-zero and be less than PAGE_CACHE_SIZE and 766 + * hence give us the correct page_dirty count. On any other page, 767 + * it will be zero and in that case we need page_dirty to be the 768 + * count of buffers on the page. 769 + */ 770 + end_offset = min_t(unsigned long long, 771 + (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); 772 len = 1 << inode->i_blkbits; 773 + p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), 774 + PAGE_CACHE_SIZE); 775 + p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; 776 page_dirty = p_offset / len; 777 778 bh = head = page_buffers(page); 779 + offset = page_offset(page); 780 + flags = -1; 781 + type = 0; 782 + 783 + /* TODO: cleanup count and page_dirty */ 784 785 do { 786 if (offset >= end_offset) 787 break; 788 if (!buffer_uptodate(bh)) 789 uptodate = 0; 790 + if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { 791 + /* 792 + * the iomap is actually still valid, but the ioend 793 + * isn't. shouldn't happen too often. 794 + */ 795 + iomap_valid = 0; 796 continue; 797 } 798 + 799 + if (iomap_valid) 800 + iomap_valid = xfs_iomap_valid(&iomap, offset); 801 802 /* 803 * First case, map an unwritten extent and prepare for 804 * extent state conversion transaction on completion. 805 + * 806 * Second case, allocate space for a delalloc buffer. 807 * We can return EAGAIN here in the release page case. 808 + * 809 + * Third case, an unmapped buffer was found, and we are 810 + * in a path where we need to write the whole page out. 811 + */ 812 + if (buffer_unwritten(bh) || buffer_delay(bh) || 813 + ((buffer_uptodate(bh) || PageUptodate(page)) && 814 + !buffer_mapped(bh) && (unmapped || startio))) { 815 + /* 816 + * Make sure we don't use a read-only iomap 817 + */ 818 + if (flags == BMAPI_READ) 819 + iomap_valid = 0; 820 + 821 + if (buffer_unwritten(bh)) { 822 + type = IOMAP_UNWRITTEN; 823 + flags = BMAPI_WRITE|BMAPI_IGNSTATE; 824 + } else if (buffer_delay(bh)) { 825 + type = IOMAP_DELAY; 826 + flags = BMAPI_ALLOCATE; 827 + if (!startio) 828 + flags |= trylock_flag; 829 + } else { 830 + type = IOMAP_NEW; 831 + flags = BMAPI_WRITE|BMAPI_MMAP; 832 } 833 + 834 + if (!iomap_valid) { 835 + if (type == IOMAP_NEW) { 836 + size = xfs_probe_cluster(inode, 837 + page, bh, head, 0); 838 + } else { 839 + size = len; 840 + } 841 + 842 + err = xfs_map_blocks(inode, offset, size, 843 + &iomap, flags); 844 + if (err) 845 + goto error; 846 + iomap_valid = xfs_iomap_valid(&iomap, offset); 847 + } 848 + if (iomap_valid) { 849 + xfs_map_at_offset(bh, offset, 850 + inode->i_blkbits, &iomap); 851 if (startio) { 852 + xfs_add_to_ioend(inode, bh, offset, 853 + type, &ioend, 854 + !iomap_valid); 855 } else { 856 set_buffer_dirty(bh); 857 unlock_buffer(bh); 858 mark_buffer_dirty(bh); 859 } 860 page_dirty--; 861 + count++; 862 + } 863 + } else if (buffer_uptodate(bh) && startio) { 864 + /* 865 + * we got here because the buffer is already mapped. 866 + * That means it must already have extents allocated 867 + * underneath it. Map the extent by reading it. 868 + */ 869 + if (!iomap_valid || type != 0) { 870 + flags = BMAPI_READ; 871 + size = xfs_probe_cluster(inode, page, bh, 872 + head, 1); 873 + err = xfs_map_blocks(inode, offset, size, 874 + &iomap, flags); 875 + if (err) 876 + goto error; 877 + iomap_valid = xfs_iomap_valid(&iomap, offset); 878 + } 879 + 880 + type = 0; 881 + if (!test_and_set_bit(BH_Lock, &bh->b_state)) { 882 + ASSERT(buffer_mapped(bh)); 883 + if (iomap_valid) 884 + all_bh = 1; 885 + xfs_add_to_ioend(inode, bh, offset, type, 886 + &ioend, !iomap_valid); 887 + page_dirty--; 888 + count++; 889 + } else { 890 + iomap_valid = 0; 891 } 892 } else if ((buffer_uptodate(bh) || PageUptodate(page)) && 893 (unmapped || startio)) { 894 + iomap_valid = 0; 895 } 896 + 897 + if (!iohead) 898 + iohead = ioend; 899 + 900 + } while (offset += len, ((bh = bh->b_this_page) != head)); 901 902 if (uptodate && bh == head) 903 SetPageUptodate(page); 904 905 + if (startio) 906 + xfs_start_page_writeback(page, wbc, 1, count); 907 908 + if (ioend && iomap_valid) { 909 + offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> 910 PAGE_CACHE_SHIFT; 911 tlast = min_t(pgoff_t, offset, last_index); 912 + xfs_cluster_write(inode, page->index + 1, &iomap, &ioend, 913 + wbc, startio, all_bh, tlast); 914 } 915 + 916 + if (iohead) 917 + xfs_submit_ioend(iohead); 918 919 return page_dirty; 920 921 error: 922 + if (iohead) 923 + xfs_cancel_ioend(iohead); 924 925 /* 926 * If it's delalloc and we have nowhere to put it, ··· 916 * us to try again. 917 */ 918 if (err != -EAGAIN) { 919 + if (!unmapped) 920 block_invalidatepage(page, 0); 921 ClearPageUptodate(page); 922 } 923 return err; ··· 982 } 983 984 /* If this is a realtime file, data might be on a new device */ 985 + bh_result->b_bdev = iomap.iomap_target->bt_bdev; 986 987 /* If we previously allocated a block out beyond eof and 988 * we are now coming back to use it then we will need to ··· 1094 if (error) 1095 return -error; 1096 1097 + iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); 1098 1099 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1100 + iomap.iomap_target->bt_bdev, 1101 iov, offset, nr_segs, 1102 linvfs_get_blocks_direct, 1103 linvfs_end_io_direct);

+10

fs/xfs/linux-2.6/xfs_aops.h

··· 23 24 typedef void (*xfs_ioend_func_t)(void *); 25 26 typedef struct xfs_ioend { 27 unsigned int io_uptodate; /* I/O status register */ 28 atomic_t io_remaining; /* hold count */ 29 struct vnode *io_vnode; /* file being written to */ 30 struct buffer_head *io_buffer_head;/* buffer linked list head */ 31 size_t io_size; /* size of the extent */ 32 xfs_off_t io_offset; /* offset in the file */ 33 struct work_struct io_work; /* xfsdatad work queue */ 34 } xfs_ioend_t; 35 36 #endif /* __XFS_IOPS_H__ */

··· 23 24 typedef void (*xfs_ioend_func_t)(void *); 25 26 + /* 27 + * xfs_ioend struct manages large extent writes for XFS. 28 + * It can manage several multi-page bio's at once. 29 + */ 30 typedef struct xfs_ioend { 31 + struct xfs_ioend *io_list; /* next ioend in chain */ 32 + unsigned int io_type; /* delalloc / unwritten */ 33 unsigned int io_uptodate; /* I/O status register */ 34 atomic_t io_remaining; /* hold count */ 35 struct vnode *io_vnode; /* file being written to */ 36 struct buffer_head *io_buffer_head;/* buffer linked list head */ 37 + struct buffer_head *io_buffer_tail;/* buffer linked list tail */ 38 size_t io_size; /* size of the extent */ 39 xfs_off_t io_offset; /* offset in the file */ 40 struct work_struct io_work; /* xfsdatad work queue */ 41 } xfs_ioend_t; 42 + 43 + extern struct address_space_operations linvfs_aops; 44 + extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int); 45 46 #endif /* __XFS_IOPS_H__ */

+661 -702

fs/xfs/linux-2.6/xfs_buf.c

··· 31 #include <linux/kthread.h> 32 #include "xfs_linux.h" 33 34 - STATIC kmem_cache_t *pagebuf_zone; 35 - STATIC kmem_shaker_t pagebuf_shake; 36 STATIC int xfsbufd_wakeup(int, gfp_t); 37 - STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); 38 39 STATIC struct workqueue_struct *xfslogd_workqueue; 40 struct workqueue_struct *xfsdatad_workqueue; 41 42 - #ifdef PAGEBUF_TRACE 43 void 44 - pagebuf_trace( 45 - xfs_buf_t *pb, 46 char *id, 47 void *data, 48 void *ra) 49 { 50 - ktrace_enter(pagebuf_trace_buf, 51 - pb, id, 52 - (void *)(unsigned long)pb->pb_flags, 53 - (void *)(unsigned long)pb->pb_hold.counter, 54 - (void *)(unsigned long)pb->pb_sema.count.counter, 55 (void *)current, 56 data, ra, 57 - (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff), 58 - (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff), 59 - (void *)(unsigned long)pb->pb_buffer_length, 60 NULL, NULL, NULL, NULL, NULL); 61 } 62 - ktrace_t *pagebuf_trace_buf; 63 - #define PAGEBUF_TRACE_SIZE 4096 64 - #define PB_TRACE(pb, id, data) \ 65 - pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0)) 66 #else 67 - #define PB_TRACE(pb, id, data) do { } while (0) 68 #endif 69 70 - #ifdef PAGEBUF_LOCK_TRACKING 71 - # define PB_SET_OWNER(pb) ((pb)->pb_last_holder = current->pid) 72 - # define PB_CLEAR_OWNER(pb) ((pb)->pb_last_holder = -1) 73 - # define PB_GET_OWNER(pb) ((pb)->pb_last_holder) 74 #else 75 - # define PB_SET_OWNER(pb) do { } while (0) 76 - # define PB_CLEAR_OWNER(pb) do { } while (0) 77 - # define PB_GET_OWNER(pb) do { } while (0) 78 #endif 79 80 - #define pb_to_gfp(flags) \ 81 - ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \ 82 - ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) 83 84 - #define pb_to_km(flags) \ 85 - (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) 86 87 - #define pagebuf_allocate(flags) \ 88 - kmem_zone_alloc(pagebuf_zone, pb_to_km(flags)) 89 - #define pagebuf_deallocate(pb) \ 90 - kmem_zone_free(pagebuf_zone, (pb)); 91 92 /* 93 - * Page Region interfaces. 94 * 95 - * For pages in filesystems where the blocksize is smaller than the 96 - * pagesize, we use the page->private field (long) to hold a bitmap 97 - * of uptodate regions within the page. 98 * 99 - * Each such region is "bytes per page / bits per long" bytes long. 100 * 101 - * NBPPR == number-of-bytes-per-page-region 102 - * BTOPR == bytes-to-page-region (rounded up) 103 - * BTOPRT == bytes-to-page-region-truncated (rounded down) 104 */ 105 #if (BITS_PER_LONG == 32) 106 #define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ ··· 160 } 161 162 /* 163 - * Mapping of multi-page buffers into contiguous virtual space 164 */ 165 166 typedef struct a_list { ··· 173 STATIC DEFINE_SPINLOCK(as_lock); 174 175 /* 176 - * Try to batch vunmaps because they are costly. 177 */ 178 STATIC void 179 free_address( ··· 216 } 217 218 /* 219 - * Internal pagebuf object manipulation 220 */ 221 222 STATIC void 223 - _pagebuf_initialize( 224 - xfs_buf_t *pb, 225 xfs_buftarg_t *target, 226 - loff_t range_base, 227 size_t range_length, 228 - page_buf_flags_t flags) 229 { 230 /* 231 - * We don't want certain flags to appear in pb->pb_flags. 232 */ 233 - flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD); 234 235 - memset(pb, 0, sizeof(xfs_buf_t)); 236 - atomic_set(&pb->pb_hold, 1); 237 - init_MUTEX_LOCKED(&pb->pb_iodonesema); 238 - INIT_LIST_HEAD(&pb->pb_list); 239 - INIT_LIST_HEAD(&pb->pb_hash_list); 240 - init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */ 241 - PB_SET_OWNER(pb); 242 - pb->pb_target = target; 243 - pb->pb_file_offset = range_base; 244 /* 245 * Set buffer_length and count_desired to the same value initially. 246 * I/O routines should use count_desired, which will be the same in 247 * most cases but may be reset (e.g. XFS recovery). 248 */ 249 - pb->pb_buffer_length = pb->pb_count_desired = range_length; 250 - pb->pb_flags = flags; 251 - pb->pb_bn = XFS_BUF_DADDR_NULL; 252 - atomic_set(&pb->pb_pin_count, 0); 253 - init_waitqueue_head(&pb->pb_waiters); 254 255 - XFS_STATS_INC(pb_create); 256 - PB_TRACE(pb, "initialize", target); 257 } 258 259 /* 260 - * Allocate a page array capable of holding a specified number 261 - * of pages, and point the page buf at it. 262 */ 263 STATIC int 264 - _pagebuf_get_pages( 265 - xfs_buf_t *pb, 266 int page_count, 267 - page_buf_flags_t flags) 268 { 269 /* Make sure that we have a page list */ 270 - if (pb->pb_pages == NULL) { 271 - pb->pb_offset = page_buf_poff(pb->pb_file_offset); 272 - pb->pb_page_count = page_count; 273 - if (page_count <= PB_PAGES) { 274 - pb->pb_pages = pb->pb_page_array; 275 } else { 276 - pb->pb_pages = kmem_alloc(sizeof(struct page *) * 277 - page_count, pb_to_km(flags)); 278 - if (pb->pb_pages == NULL) 279 return -ENOMEM; 280 } 281 - memset(pb->pb_pages, 0, sizeof(struct page *) * page_count); 282 } 283 return 0; 284 } 285 286 /* 287 - * Frees pb_pages if it was malloced. 288 */ 289 STATIC void 290 - _pagebuf_free_pages( 291 xfs_buf_t *bp) 292 { 293 - if (bp->pb_pages != bp->pb_page_array) { 294 - kmem_free(bp->pb_pages, 295 - bp->pb_page_count * sizeof(struct page *)); 296 } 297 } 298 ··· 300 * Releases the specified buffer. 301 * 302 * The modification state of any associated pages is left unchanged. 303 - * The buffer most not be on any hash - use pagebuf_rele instead for 304 * hashed and refcounted buffers 305 */ 306 void 307 - pagebuf_free( 308 xfs_buf_t *bp) 309 { 310 - PB_TRACE(bp, "free", 0); 311 312 - ASSERT(list_empty(&bp->pb_hash_list)); 313 314 - if (bp->pb_flags & _PBF_PAGE_CACHE) { 315 uint i; 316 317 - if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1)) 318 - free_address(bp->pb_addr - bp->pb_offset); 319 320 - for (i = 0; i < bp->pb_page_count; i++) 321 - page_cache_release(bp->pb_pages[i]); 322 - _pagebuf_free_pages(bp); 323 - } else if (bp->pb_flags & _PBF_KMEM_ALLOC) { 324 /* 325 - * XXX(hch): bp->pb_count_desired might be incorrect (see 326 - * pagebuf_associate_memory for details), but fortunately 327 * the Linux version of kmem_free ignores the len argument.. 328 */ 329 - kmem_free(bp->pb_addr, bp->pb_count_desired); 330 - _pagebuf_free_pages(bp); 331 } 332 333 - pagebuf_deallocate(bp); 334 } 335 336 /* 337 * Finds all pages for buffer in question and builds it's page list. 338 */ 339 STATIC int 340 - _pagebuf_lookup_pages( 341 xfs_buf_t *bp, 342 uint flags) 343 { 344 - struct address_space *mapping = bp->pb_target->pbr_mapping; 345 - size_t blocksize = bp->pb_target->pbr_bsize; 346 - size_t size = bp->pb_count_desired; 347 size_t nbytes, offset; 348 - gfp_t gfp_mask = pb_to_gfp(flags); 349 unsigned short page_count, i; 350 pgoff_t first; 351 - loff_t end; 352 int error; 353 354 - end = bp->pb_file_offset + bp->pb_buffer_length; 355 - page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset); 356 357 - error = _pagebuf_get_pages(bp, page_count, flags); 358 if (unlikely(error)) 359 return error; 360 - bp->pb_flags |= _PBF_PAGE_CACHE; 361 362 - offset = bp->pb_offset; 363 - first = bp->pb_file_offset >> PAGE_CACHE_SHIFT; 364 365 - for (i = 0; i < bp->pb_page_count; i++) { 366 struct page *page; 367 uint retries = 0; 368 369 retry: 370 page = find_or_create_page(mapping, first + i, gfp_mask); 371 if (unlikely(page == NULL)) { 372 - if (flags & PBF_READ_AHEAD) { 373 - bp->pb_page_count = i; 374 - for (i = 0; i < bp->pb_page_count; i++) 375 - unlock_page(bp->pb_pages[i]); 376 return -ENOMEM; 377 } 378 ··· 388 "deadlock in %s (mode:0x%x)\n", 389 __FUNCTION__, gfp_mask); 390 391 - XFS_STATS_INC(pb_page_retries); 392 xfsbufd_wakeup(0, gfp_mask); 393 blk_congestion_wait(WRITE, HZ/50); 394 goto retry; 395 } 396 397 - XFS_STATS_INC(pb_page_found); 398 399 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); 400 size -= nbytes; ··· 402 if (!PageUptodate(page)) { 403 page_count--; 404 if (blocksize >= PAGE_CACHE_SIZE) { 405 - if (flags & PBF_READ) 406 - bp->pb_locked = 1; 407 } else if (!PagePrivate(page)) { 408 if (test_page_region(page, offset, nbytes)) 409 page_count++; 410 } 411 } 412 413 - bp->pb_pages[i] = page; 414 offset = 0; 415 } 416 417 - if (!bp->pb_locked) { 418 - for (i = 0; i < bp->pb_page_count; i++) 419 - unlock_page(bp->pb_pages[i]); 420 } 421 422 - if (page_count == bp->pb_page_count) 423 - bp->pb_flags |= PBF_DONE; 424 425 - PB_TRACE(bp, "lookup_pages", (long)page_count); 426 return error; 427 } 428 ··· 430 * Map buffer into kernel address-space if nessecary. 431 */ 432 STATIC int 433 - _pagebuf_map_pages( 434 xfs_buf_t *bp, 435 uint flags) 436 { 437 /* A single page buffer is always mappable */ 438 - if (bp->pb_page_count == 1) { 439 - bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset; 440 - bp->pb_flags |= PBF_MAPPED; 441 - } else if (flags & PBF_MAPPED) { 442 if (as_list_len > 64) 443 purge_addresses(); 444 - bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count, 445 - VM_MAP, PAGE_KERNEL); 446 - if (unlikely(bp->pb_addr == NULL)) 447 return -ENOMEM; 448 - bp->pb_addr += bp->pb_offset; 449 - bp->pb_flags |= PBF_MAPPED; 450 } 451 452 return 0; ··· 457 */ 458 459 /* 460 - * _pagebuf_find 461 - * 462 - * Looks up, and creates if absent, a lockable buffer for 463 * a given range of an inode. The buffer is returned 464 * locked. If other overlapping buffers exist, they are 465 * released before the new buffer is created and locked, ··· 465 * are unlocked. No I/O is implied by this call. 466 */ 467 xfs_buf_t * 468 - _pagebuf_find( 469 xfs_buftarg_t *btp, /* block device target */ 470 - loff_t ioff, /* starting offset of range */ 471 size_t isize, /* length of range */ 472 - page_buf_flags_t flags, /* PBF_TRYLOCK */ 473 - xfs_buf_t *new_pb)/* newly allocated buffer */ 474 { 475 - loff_t range_base; 476 size_t range_length; 477 xfs_bufhash_t *hash; 478 - xfs_buf_t *pb, *n; 479 480 range_base = (ioff << BBSHIFT); 481 range_length = (isize << BBSHIFT); 482 483 /* Check for IOs smaller than the sector size / not sector aligned */ 484 - ASSERT(!(range_length < (1 << btp->pbr_sshift))); 485 - ASSERT(!(range_base & (loff_t)btp->pbr_smask)); 486 487 hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; 488 489 spin_lock(&hash->bh_lock); 490 491 - list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) { 492 - ASSERT(btp == pb->pb_target); 493 - if (pb->pb_file_offset == range_base && 494 - pb->pb_buffer_length == range_length) { 495 /* 496 - * If we look at something bring it to the 497 * front of the list for next time. 498 */ 499 - atomic_inc(&pb->pb_hold); 500 - list_move(&pb->pb_hash_list, &hash->bh_list); 501 goto found; 502 } 503 } 504 505 /* No match found */ 506 - if (new_pb) { 507 - _pagebuf_initialize(new_pb, btp, range_base, 508 range_length, flags); 509 - new_pb->pb_hash = hash; 510 - list_add(&new_pb->pb_hash_list, &hash->bh_list); 511 } else { 512 - XFS_STATS_INC(pb_miss_locked); 513 } 514 515 spin_unlock(&hash->bh_lock); 516 - return new_pb; 517 518 found: 519 spin_unlock(&hash->bh_lock); ··· 522 * if this does not work then we need to drop the 523 * spinlock and do a hard attempt on the semaphore. 524 */ 525 - if (down_trylock(&pb->pb_sema)) { 526 - if (!(flags & PBF_TRYLOCK)) { 527 /* wait for buffer ownership */ 528 - PB_TRACE(pb, "get_lock", 0); 529 - pagebuf_lock(pb); 530 - XFS_STATS_INC(pb_get_locked_waited); 531 } else { 532 /* We asked for a trylock and failed, no need 533 * to look at file offset and length here, we 534 - * know that this pagebuf at least overlaps our 535 - * pagebuf and is locked, therefore our buffer 536 - * either does not exist, or is this buffer 537 */ 538 - 539 - pagebuf_rele(pb); 540 - XFS_STATS_INC(pb_busy_locked); 541 - return (NULL); 542 } 543 } else { 544 /* trylock worked */ 545 - PB_SET_OWNER(pb); 546 } 547 548 - if (pb->pb_flags & PBF_STALE) { 549 - ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0); 550 - pb->pb_flags &= PBF_MAPPED; 551 } 552 - PB_TRACE(pb, "got_lock", 0); 553 - XFS_STATS_INC(pb_get_locked); 554 - return (pb); 555 } 556 557 /* 558 - * xfs_buf_get_flags assembles a buffer covering the specified range. 559 - * 560 * Storage in memory for all portions of the buffer will be allocated, 561 * although backing storage may not be. 562 */ 563 xfs_buf_t * 564 - xfs_buf_get_flags( /* allocate a buffer */ 565 xfs_buftarg_t *target,/* target for buffer */ 566 - loff_t ioff, /* starting offset of range */ 567 size_t isize, /* length of range */ 568 - page_buf_flags_t flags) /* PBF_TRYLOCK */ 569 { 570 - xfs_buf_t *pb, *new_pb; 571 int error = 0, i; 572 573 - new_pb = pagebuf_allocate(flags); 574 - if (unlikely(!new_pb)) 575 return NULL; 576 577 - pb = _pagebuf_find(target, ioff, isize, flags, new_pb); 578 - if (pb == new_pb) { 579 - error = _pagebuf_lookup_pages(pb, flags); 580 if (error) 581 goto no_buffer; 582 } else { 583 - pagebuf_deallocate(new_pb); 584 - if (unlikely(pb == NULL)) 585 return NULL; 586 } 587 588 - for (i = 0; i < pb->pb_page_count; i++) 589 - mark_page_accessed(pb->pb_pages[i]); 590 591 - if (!(pb->pb_flags & PBF_MAPPED)) { 592 - error = _pagebuf_map_pages(pb, flags); 593 if (unlikely(error)) { 594 printk(KERN_WARNING "%s: failed to map pages\n", 595 __FUNCTION__); ··· 595 } 596 } 597 598 - XFS_STATS_INC(pb_get); 599 600 /* 601 * Always fill in the block number now, the mapped cases can do 602 * their own overlay of this later. 603 */ 604 - pb->pb_bn = ioff; 605 - pb->pb_count_desired = pb->pb_buffer_length; 606 607 - PB_TRACE(pb, "get", (unsigned long)flags); 608 - return pb; 609 610 no_buffer: 611 - if (flags & (PBF_LOCK | PBF_TRYLOCK)) 612 - pagebuf_unlock(pb); 613 - pagebuf_rele(pb); 614 return NULL; 615 } 616 617 xfs_buf_t * 618 xfs_buf_read_flags( 619 xfs_buftarg_t *target, 620 - loff_t ioff, 621 size_t isize, 622 - page_buf_flags_t flags) 623 { 624 - xfs_buf_t *pb; 625 626 - flags |= PBF_READ; 627 628 - pb = xfs_buf_get_flags(target, ioff, isize, flags); 629 - if (pb) { 630 - if (!XFS_BUF_ISDONE(pb)) { 631 - PB_TRACE(pb, "read", (unsigned long)flags); 632 - XFS_STATS_INC(pb_get_read); 633 - pagebuf_iostart(pb, flags); 634 - } else if (flags & PBF_ASYNC) { 635 - PB_TRACE(pb, "read_async", (unsigned long)flags); 636 /* 637 * Read ahead call which is already satisfied, 638 * drop the buffer 639 */ 640 goto no_buffer; 641 } else { 642 - PB_TRACE(pb, "read_done", (unsigned long)flags); 643 /* We do not want read in the flags */ 644 - pb->pb_flags &= ~PBF_READ; 645 } 646 } 647 648 - return pb; 649 650 no_buffer: 651 - if (flags & (PBF_LOCK | PBF_TRYLOCK)) 652 - pagebuf_unlock(pb); 653 - pagebuf_rele(pb); 654 return NULL; 655 } 656 657 /* 658 - * If we are not low on memory then do the readahead in a deadlock 659 - * safe manner. 660 */ 661 void 662 - pagebuf_readahead( 663 xfs_buftarg_t *target, 664 - loff_t ioff, 665 size_t isize, 666 - page_buf_flags_t flags) 667 { 668 struct backing_dev_info *bdi; 669 670 - bdi = target->pbr_mapping->backing_dev_info; 671 if (bdi_read_congested(bdi)) 672 return; 673 674 - flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD); 675 xfs_buf_read_flags(target, ioff, isize, flags); 676 } 677 678 xfs_buf_t * 679 - pagebuf_get_empty( 680 size_t len, 681 xfs_buftarg_t *target) 682 { 683 - xfs_buf_t *pb; 684 685 - pb = pagebuf_allocate(0); 686 - if (pb) 687 - _pagebuf_initialize(pb, target, 0, len, 0); 688 - return pb; 689 } 690 691 static inline struct page * ··· 701 } 702 703 int 704 - pagebuf_associate_memory( 705 - xfs_buf_t *pb, 706 void *mem, 707 size_t len) 708 { ··· 719 page_count++; 720 721 /* Free any previous set of page pointers */ 722 - if (pb->pb_pages) 723 - _pagebuf_free_pages(pb); 724 725 - pb->pb_pages = NULL; 726 - pb->pb_addr = mem; 727 728 - rval = _pagebuf_get_pages(pb, page_count, 0); 729 if (rval) 730 return rval; 731 732 - pb->pb_offset = offset; 733 ptr = (size_t) mem & PAGE_CACHE_MASK; 734 end = PAGE_CACHE_ALIGN((size_t) mem + len); 735 end_cur = end; 736 /* set up first page */ 737 - pb->pb_pages[0] = mem_to_page(mem); 738 739 ptr += PAGE_CACHE_SIZE; 740 - pb->pb_page_count = ++i; 741 while (ptr < end) { 742 - pb->pb_pages[i] = mem_to_page((void *)ptr); 743 - pb->pb_page_count = ++i; 744 ptr += PAGE_CACHE_SIZE; 745 } 746 - pb->pb_locked = 0; 747 748 - pb->pb_count_desired = pb->pb_buffer_length = len; 749 - pb->pb_flags |= PBF_MAPPED; 750 751 return 0; 752 } 753 754 xfs_buf_t * 755 - pagebuf_get_no_daddr( 756 size_t len, 757 xfs_buftarg_t *target) 758 { ··· 761 void *data; 762 int error; 763 764 - bp = pagebuf_allocate(0); 765 if (unlikely(bp == NULL)) 766 goto fail; 767 - _pagebuf_initialize(bp, target, 0, len, 0); 768 769 try_again: 770 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL); ··· 773 774 /* check whether alignment matches.. */ 775 if ((__psunsigned_t)data != 776 - ((__psunsigned_t)data & ~target->pbr_smask)) { 777 /* .. else double the size and try again */ 778 kmem_free(data, malloc_len); 779 malloc_len <<= 1; 780 goto try_again; 781 } 782 783 - error = pagebuf_associate_memory(bp, data, len); 784 if (error) 785 goto fail_free_mem; 786 - bp->pb_flags |= _PBF_KMEM_ALLOC; 787 788 - pagebuf_unlock(bp); 789 790 - PB_TRACE(bp, "no_daddr", data); 791 return bp; 792 fail_free_mem: 793 kmem_free(data, malloc_len); 794 fail_free_buf: 795 - pagebuf_free(bp); 796 fail: 797 return NULL; 798 } 799 800 /* 801 - * pagebuf_hold 802 - * 803 * Increment reference count on buffer, to hold the buffer concurrently 804 * with another thread which may release (free) the buffer asynchronously. 805 - * 806 * Must hold the buffer already to call this function. 807 */ 808 void 809 - pagebuf_hold( 810 - xfs_buf_t *pb) 811 { 812 - atomic_inc(&pb->pb_hold); 813 - PB_TRACE(pb, "hold", 0); 814 } 815 816 /* 817 - * pagebuf_rele 818 - * 819 - * pagebuf_rele releases a hold on the specified buffer. If the 820 - * the hold count is 1, pagebuf_rele calls pagebuf_free. 821 */ 822 void 823 - pagebuf_rele( 824 - xfs_buf_t *pb) 825 { 826 - xfs_bufhash_t *hash = pb->pb_hash; 827 828 - PB_TRACE(pb, "rele", pb->pb_relse); 829 830 - if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { 831 - if (pb->pb_relse) { 832 - atomic_inc(&pb->pb_hold); 833 spin_unlock(&hash->bh_lock); 834 - (*(pb->pb_relse)) (pb); 835 - } else if (pb->pb_flags & PBF_FS_MANAGED) { 836 spin_unlock(&hash->bh_lock); 837 } else { 838 - ASSERT(!(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q))); 839 - list_del_init(&pb->pb_hash_list); 840 spin_unlock(&hash->bh_lock); 841 - pagebuf_free(pb); 842 } 843 } else { 844 /* 845 * Catch reference count leaks 846 */ 847 - ASSERT(atomic_read(&pb->pb_hold) >= 0); 848 } 849 } 850 ··· 855 */ 856 857 /* 858 - * pagebuf_cond_lock 859 - * 860 - * pagebuf_cond_lock locks a buffer object, if it is not already locked. 861 - * Note that this in no way 862 - * locks the underlying pages, so it is only useful for synchronizing 863 - * concurrent use of page buffer objects, not for synchronizing independent 864 - * access to the underlying pages. 865 */ 866 int 867 - pagebuf_cond_lock( /* lock buffer, if not locked */ 868 - /* returns -EBUSY if locked) */ 869 - xfs_buf_t *pb) 870 { 871 int locked; 872 873 - locked = down_trylock(&pb->pb_sema) == 0; 874 if (locked) { 875 - PB_SET_OWNER(pb); 876 } 877 - PB_TRACE(pb, "cond_lock", (long)locked); 878 - return(locked ? 0 : -EBUSY); 879 } 880 881 #if defined(DEBUG) || defined(XFS_BLI_TRACE) 882 - /* 883 - * pagebuf_lock_value 884 - * 885 - * Return lock value for a pagebuf 886 - */ 887 int 888 - pagebuf_lock_value( 889 - xfs_buf_t *pb) 890 { 891 - return(atomic_read(&pb->pb_sema.count)); 892 } 893 #endif 894 895 /* 896 - * pagebuf_lock 897 - * 898 - * pagebuf_lock locks a buffer object. Note that this in no way 899 - * locks the underlying pages, so it is only useful for synchronizing 900 - * concurrent use of page buffer objects, not for synchronizing independent 901 - * access to the underlying pages. 902 */ 903 - int 904 - pagebuf_lock( 905 - xfs_buf_t *pb) 906 { 907 - PB_TRACE(pb, "lock", 0); 908 - if (atomic_read(&pb->pb_io_remaining)) 909 - blk_run_address_space(pb->pb_target->pbr_mapping); 910 - down(&pb->pb_sema); 911 - PB_SET_OWNER(pb); 912 - PB_TRACE(pb, "locked", 0); 913 - return 0; 914 } 915 916 /* 917 - * pagebuf_unlock 918 - * 919 - * pagebuf_unlock releases the lock on the buffer object created by 920 - * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages 921 - * created by pagebuf_pin). 922 - * 923 * If the buffer is marked delwri but is not queued, do so before we 924 - * unlock the buffer as we need to set flags correctly. We also need to 925 * take a reference for the delwri queue because the unlocker is going to 926 * drop their's and they don't know we just queued it. 927 */ 928 void 929 - pagebuf_unlock( /* unlock buffer */ 930 - xfs_buf_t *pb) /* buffer to unlock */ 931 { 932 - if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) { 933 - atomic_inc(&pb->pb_hold); 934 - pb->pb_flags |= PBF_ASYNC; 935 - pagebuf_delwri_queue(pb, 0); 936 } 937 938 - PB_CLEAR_OWNER(pb); 939 - up(&pb->pb_sema); 940 - PB_TRACE(pb, "unlock", 0); 941 } 942 943 944 /* 945 * Pinning Buffer Storage in Memory 946 - */ 947 - 948 - /* 949 - * pagebuf_pin 950 - * 951 - * pagebuf_pin locks all of the memory represented by a buffer in 952 - * memory. Multiple calls to pagebuf_pin and pagebuf_unpin, for 953 - * the same or different buffers affecting a given page, will 954 - * properly count the number of outstanding "pin" requests. The 955 - * buffer may be released after the pagebuf_pin and a different 956 - * buffer used when calling pagebuf_unpin, if desired. 957 - * pagebuf_pin should be used by the file system when it wants be 958 - * assured that no attempt will be made to force the affected 959 - * memory to disk. It does not assure that a given logical page 960 - * will not be moved to a different physical page. 961 */ 962 void 963 - pagebuf_pin( 964 - xfs_buf_t *pb) 965 { 966 - atomic_inc(&pb->pb_pin_count); 967 - PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter); 968 } 969 970 - /* 971 - * pagebuf_unpin 972 - * 973 - * pagebuf_unpin reverses the locking of memory performed by 974 - * pagebuf_pin. Note that both functions affected the logical 975 - * pages associated with the buffer, not the buffer itself. 976 - */ 977 void 978 - pagebuf_unpin( 979 - xfs_buf_t *pb) 980 { 981 - if (atomic_dec_and_test(&pb->pb_pin_count)) { 982 - wake_up_all(&pb->pb_waiters); 983 - } 984 - PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter); 985 } 986 987 int 988 - pagebuf_ispin( 989 - xfs_buf_t *pb) 990 { 991 - return atomic_read(&pb->pb_pin_count); 992 } 993 994 - /* 995 - * pagebuf_wait_unpin 996 - * 997 - * pagebuf_wait_unpin waits until all of the memory associated 998 - * with the buffer is not longer locked in memory. It returns 999 - * immediately if none of the affected pages are locked. 1000 - */ 1001 - static inline void 1002 - _pagebuf_wait_unpin( 1003 - xfs_buf_t *pb) 1004 { 1005 DECLARE_WAITQUEUE (wait, current); 1006 1007 - if (atomic_read(&pb->pb_pin_count) == 0) 1008 return; 1009 1010 - add_wait_queue(&pb->pb_waiters, &wait); 1011 for (;;) { 1012 set_current_state(TASK_UNINTERRUPTIBLE); 1013 - if (atomic_read(&pb->pb_pin_count) == 0) 1014 break; 1015 - if (atomic_read(&pb->pb_io_remaining)) 1016 - blk_run_address_space(pb->pb_target->pbr_mapping); 1017 schedule(); 1018 } 1019 - remove_wait_queue(&pb->pb_waiters, &wait); 1020 set_current_state(TASK_RUNNING); 1021 } 1022 ··· 978 * Buffer Utility Routines 979 */ 980 981 - /* 982 - * pagebuf_iodone 983 - * 984 - * pagebuf_iodone marks a buffer for which I/O is in progress 985 - * done with respect to that I/O. The pb_iodone routine, if 986 - * present, will be called as a side-effect. 987 - */ 988 STATIC void 989 - pagebuf_iodone_work( 990 void *v) 991 { 992 xfs_buf_t *bp = (xfs_buf_t *)v; 993 994 - if (bp->pb_iodone) 995 - (*(bp->pb_iodone))(bp); 996 - else if (bp->pb_flags & PBF_ASYNC) 997 xfs_buf_relse(bp); 998 } 999 1000 void 1001 - pagebuf_iodone( 1002 - xfs_buf_t *pb, 1003 int schedule) 1004 { 1005 - pb->pb_flags &= ~(PBF_READ | PBF_WRITE); 1006 - if (pb->pb_error == 0) 1007 - pb->pb_flags |= PBF_DONE; 1008 1009 - PB_TRACE(pb, "iodone", pb->pb_iodone); 1010 1011 - if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) { 1012 if (schedule) { 1013 - INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb); 1014 - queue_work(xfslogd_workqueue, &pb->pb_iodone_work); 1015 } else { 1016 - pagebuf_iodone_work(pb); 1017 } 1018 } else { 1019 - up(&pb->pb_iodonesema); 1020 } 1021 } 1022 1023 - /* 1024 - * pagebuf_ioerror 1025 - * 1026 - * pagebuf_ioerror sets the error code for a buffer. 1027 - */ 1028 void 1029 - pagebuf_ioerror( /* mark/clear buffer error flag */ 1030 - xfs_buf_t *pb, /* buffer to mark */ 1031 - int error) /* error to store (0 if none) */ 1032 { 1033 ASSERT(error >= 0 && error <= 0xffff); 1034 - pb->pb_error = (unsigned short)error; 1035 - PB_TRACE(pb, "ioerror", (unsigned long)error); 1036 } 1037 1038 /* 1039 - * pagebuf_iostart 1040 - * 1041 - * pagebuf_iostart initiates I/O on a buffer, based on the flags supplied. 1042 - * If necessary, it will arrange for any disk space allocation required, 1043 - * and it will break up the request if the block mappings require it. 1044 - * The pb_iodone routine in the buffer supplied will only be called 1045 * when all of the subsidiary I/O requests, if any, have been completed. 1046 - * pagebuf_iostart calls the pagebuf_ioinitiate routine or 1047 - * pagebuf_iorequest, if the former routine is not defined, to start 1048 - * the I/O on a given low-level request. 1049 */ 1050 int 1051 - pagebuf_iostart( /* start I/O on a buffer */ 1052 - xfs_buf_t *pb, /* buffer to start */ 1053 - page_buf_flags_t flags) /* PBF_LOCK, PBF_ASYNC, PBF_READ, */ 1054 - /* PBF_WRITE, PBF_DELWRI, */ 1055 - /* PBF_DONT_BLOCK */ 1056 { 1057 int status = 0; 1058 1059 - PB_TRACE(pb, "iostart", (unsigned long)flags); 1060 1061 - if (flags & PBF_DELWRI) { 1062 - pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC); 1063 - pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC); 1064 - pagebuf_delwri_queue(pb, 1); 1065 return status; 1066 } 1067 1068 - pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \ 1069 - PBF_READ_AHEAD | _PBF_RUN_QUEUES); 1070 - pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \ 1071 - PBF_READ_AHEAD | _PBF_RUN_QUEUES); 1072 1073 - BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL); 1074 1075 /* For writes allow an alternate strategy routine to precede 1076 * the actual I/O request (which may not be issued at all in 1077 * a shutdown situation, for example). 1078 */ 1079 - status = (flags & PBF_WRITE) ? 1080 - pagebuf_iostrategy(pb) : pagebuf_iorequest(pb); 1081 1082 /* Wait for I/O if we are not an async request. 1083 * Note: async I/O request completion will release the buffer, 1084 * and that can already be done by this point. So using the 1085 * buffer pointer from here on, after async I/O, is invalid. 1086 */ 1087 - if (!status && !(flags & PBF_ASYNC)) 1088 - status = pagebuf_iowait(pb); 1089 1090 return status; 1091 } 1092 1093 - /* 1094 - * Helper routine for pagebuf_iorequest 1095 - */ 1096 - 1097 STATIC __inline__ int 1098 - _pagebuf_iolocked( 1099 - xfs_buf_t *pb) 1100 { 1101 - ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE)); 1102 - if (pb->pb_flags & PBF_READ) 1103 - return pb->pb_locked; 1104 return 0; 1105 } 1106 1107 STATIC __inline__ void 1108 - _pagebuf_iodone( 1109 - xfs_buf_t *pb, 1110 int schedule) 1111 { 1112 - if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { 1113 - pb->pb_locked = 0; 1114 - pagebuf_iodone(pb, schedule); 1115 } 1116 } 1117 1118 STATIC int 1119 - bio_end_io_pagebuf( 1120 struct bio *bio, 1121 unsigned int bytes_done, 1122 int error) 1123 { 1124 - xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; 1125 - unsigned int blocksize = pb->pb_target->pbr_bsize; 1126 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1127 1128 if (bio->bi_size) 1129 return 1; 1130 1131 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1132 - pb->pb_error = EIO; 1133 1134 do { 1135 struct page *page = bvec->bv_page; 1136 1137 - if (unlikely(pb->pb_error)) { 1138 - if (pb->pb_flags & PBF_READ) 1139 ClearPageUptodate(page); 1140 SetPageError(page); 1141 - } else if (blocksize == PAGE_CACHE_SIZE) { 1142 SetPageUptodate(page); 1143 } else if (!PagePrivate(page) && 1144 - (pb->pb_flags & _PBF_PAGE_CACHE)) { 1145 set_page_region(page, bvec->bv_offset, bvec->bv_len); 1146 } 1147 1148 if (--bvec >= bio->bi_io_vec) 1149 prefetchw(&bvec->bv_page->flags); 1150 1151 - if (_pagebuf_iolocked(pb)) { 1152 unlock_page(page); 1153 } 1154 } while (bvec >= bio->bi_io_vec); 1155 1156 - _pagebuf_iodone(pb, 1); 1157 bio_put(bio); 1158 return 0; 1159 } 1160 1161 STATIC void 1162 - _pagebuf_ioapply( 1163 - xfs_buf_t *pb) 1164 { 1165 int i, rw, map_i, total_nr_pages, nr_pages; 1166 struct bio *bio; 1167 - int offset = pb->pb_offset; 1168 - int size = pb->pb_count_desired; 1169 - sector_t sector = pb->pb_bn; 1170 - unsigned int blocksize = pb->pb_target->pbr_bsize; 1171 - int locking = _pagebuf_iolocked(pb); 1172 1173 - total_nr_pages = pb->pb_page_count; 1174 map_i = 0; 1175 1176 - if (pb->pb_flags & _PBF_RUN_QUEUES) { 1177 - pb->pb_flags &= ~_PBF_RUN_QUEUES; 1178 - rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC; 1179 } else { 1180 - rw = (pb->pb_flags & PBF_READ) ? READ : WRITE; 1181 } 1182 1183 - if (pb->pb_flags & PBF_ORDERED) { 1184 - ASSERT(!(pb->pb_flags & PBF_READ)); 1185 rw = WRITE_BARRIER; 1186 } 1187 1188 - /* Special code path for reading a sub page size pagebuf in -- 1189 * we populate up the whole page, and hence the other metadata 1190 * in the same page. This optimization is only valid when the 1191 - * filesystem block size and the page size are equal. 1192 */ 1193 - if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) && 1194 - (pb->pb_flags & PBF_READ) && locking && 1195 - (blocksize == PAGE_CACHE_SIZE)) { 1196 bio = bio_alloc(GFP_NOIO, 1); 1197 1198 - bio->bi_bdev = pb->pb_target->pbr_bdev; 1199 bio->bi_sector = sector - (offset >> BBSHIFT); 1200 - bio->bi_end_io = bio_end_io_pagebuf; 1201 - bio->bi_private = pb; 1202 1203 - bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0); 1204 size = 0; 1205 1206 - atomic_inc(&pb->pb_io_remaining); 1207 1208 goto submit_io; 1209 } 1210 1211 /* Lock down the pages which we need to for the request */ 1212 - if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) { 1213 for (i = 0; size; i++) { 1214 int nbytes = PAGE_CACHE_SIZE - offset; 1215 - struct page *page = pb->pb_pages[i]; 1216 1217 if (nbytes > size) 1218 nbytes = size; ··· 1197 size -= nbytes; 1198 offset = 0; 1199 } 1200 - offset = pb->pb_offset; 1201 - size = pb->pb_count_desired; 1202 } 1203 1204 next_chunk: 1205 - atomic_inc(&pb->pb_io_remaining); 1206 nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); 1207 if (nr_pages > total_nr_pages) 1208 nr_pages = total_nr_pages; 1209 1210 bio = bio_alloc(GFP_NOIO, nr_pages); 1211 - bio->bi_bdev = pb->pb_target->pbr_bdev; 1212 bio->bi_sector = sector; 1213 - bio->bi_end_io = bio_end_io_pagebuf; 1214 - bio->bi_private = pb; 1215 1216 for (; size && nr_pages; nr_pages--, map_i++) { 1217 - int nbytes = PAGE_CACHE_SIZE - offset; 1218 1219 if (nbytes > size) 1220 nbytes = size; 1221 1222 - if (bio_add_page(bio, pb->pb_pages[map_i], 1223 - nbytes, offset) < nbytes) 1224 break; 1225 1226 offset = 0; ··· 1236 goto next_chunk; 1237 } else { 1238 bio_put(bio); 1239 - pagebuf_ioerror(pb, EIO); 1240 } 1241 } 1242 1243 - /* 1244 - * pagebuf_iorequest -- the core I/O request routine. 1245 - */ 1246 int 1247 - pagebuf_iorequest( /* start real I/O */ 1248 - xfs_buf_t *pb) /* buffer to convey to device */ 1249 { 1250 - PB_TRACE(pb, "iorequest", 0); 1251 1252 - if (pb->pb_flags & PBF_DELWRI) { 1253 - pagebuf_delwri_queue(pb, 1); 1254 return 0; 1255 } 1256 1257 - if (pb->pb_flags & PBF_WRITE) { 1258 - _pagebuf_wait_unpin(pb); 1259 } 1260 1261 - pagebuf_hold(pb); 1262 1263 /* Set the count to 1 initially, this will stop an I/O 1264 * completion callout which happens before we have started 1265 - * all the I/O from calling pagebuf_iodone too early. 1266 */ 1267 - atomic_set(&pb->pb_io_remaining, 1); 1268 - _pagebuf_ioapply(pb); 1269 - _pagebuf_iodone(pb, 0); 1270 1271 - pagebuf_rele(pb); 1272 return 0; 1273 } 1274 1275 /* 1276 - * pagebuf_iowait 1277 - * 1278 - * pagebuf_iowait waits for I/O to complete on the buffer supplied. 1279 - * It returns immediately if no I/O is pending. In any case, it returns 1280 - * the error code, if any, or 0 if there is no error. 1281 */ 1282 int 1283 - pagebuf_iowait( 1284 - xfs_buf_t *pb) 1285 { 1286 - PB_TRACE(pb, "iowait", 0); 1287 - if (atomic_read(&pb->pb_io_remaining)) 1288 - blk_run_address_space(pb->pb_target->pbr_mapping); 1289 - down(&pb->pb_iodonesema); 1290 - PB_TRACE(pb, "iowaited", (long)pb->pb_error); 1291 - return pb->pb_error; 1292 } 1293 1294 - caddr_t 1295 - pagebuf_offset( 1296 - xfs_buf_t *pb, 1297 size_t offset) 1298 { 1299 struct page *page; 1300 1301 - offset += pb->pb_offset; 1302 1303 - page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT]; 1304 - return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1)); 1305 } 1306 1307 /* 1308 - * pagebuf_iomove 1309 - * 1310 * Move data into or out of a buffer. 1311 */ 1312 void 1313 - pagebuf_iomove( 1314 - xfs_buf_t *pb, /* buffer to process */ 1315 size_t boff, /* starting buffer offset */ 1316 size_t bsize, /* length to copy */ 1317 caddr_t data, /* data address */ 1318 - page_buf_rw_t mode) /* read/write flag */ 1319 { 1320 size_t bend, cpoff, csize; 1321 struct page *page; 1322 1323 bend = boff + bsize; 1324 while (boff < bend) { 1325 - page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)]; 1326 - cpoff = page_buf_poff(boff + pb->pb_offset); 1327 csize = min_t(size_t, 1328 - PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff); 1329 1330 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); 1331 1332 switch (mode) { 1333 - case PBRW_ZERO: 1334 memset(page_address(page) + cpoff, 0, csize); 1335 break; 1336 - case PBRW_READ: 1337 memcpy(data, page_address(page) + cpoff, csize); 1338 break; 1339 - case PBRW_WRITE: 1340 memcpy(page_address(page) + cpoff, data, csize); 1341 } 1342 ··· 1341 } 1342 1343 /* 1344 - * Handling of buftargs. 1345 */ 1346 1347 /* 1348 - * Wait for any bufs with callbacks that have been submitted but 1349 - * have not yet returned... walk the hash list for the target. 1350 */ 1351 void 1352 xfs_wait_buftarg( ··· 1360 hash = &btp->bt_hash[i]; 1361 again: 1362 spin_lock(&hash->bh_lock); 1363 - list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) { 1364 - ASSERT(btp == bp->pb_target); 1365 - if (!(bp->pb_flags & PBF_FS_MANAGED)) { 1366 spin_unlock(&hash->bh_lock); 1367 /* 1368 * Catch superblock reference count leaks 1369 * immediately 1370 */ 1371 - BUG_ON(bp->pb_bn == 0); 1372 delay(100); 1373 goto again; 1374 } ··· 1378 } 1379 1380 /* 1381 - * Allocate buffer hash table for a given target. 1382 - * For devices containing metadata (i.e. not the log/realtime devices) 1383 - * we need to allocate a much larger hash table. 1384 */ 1385 STATIC void 1386 xfs_alloc_bufhash( ··· 1403 xfs_free_bufhash( 1404 xfs_buftarg_t *btp) 1405 { 1406 - kmem_free(btp->bt_hash, 1407 - (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t)); 1408 btp->bt_hash = NULL; 1409 } 1410 1411 void ··· 1438 { 1439 xfs_flush_buftarg(btp, 1); 1440 if (external) 1441 - xfs_blkdev_put(btp->pbr_bdev); 1442 xfs_free_bufhash(btp); 1443 - iput(btp->pbr_mapping->host); 1444 kmem_free(btp, sizeof(*btp)); 1445 } 1446 ··· 1458 unsigned int sectorsize, 1459 int verbose) 1460 { 1461 - btp->pbr_bsize = blocksize; 1462 - btp->pbr_sshift = ffs(sectorsize) - 1; 1463 - btp->pbr_smask = sectorsize - 1; 1464 1465 - if (set_blocksize(btp->pbr_bdev, sectorsize)) { 1466 printk(KERN_WARNING 1467 "XFS: Cannot set_blocksize to %u on device %s\n", 1468 sectorsize, XFS_BUFTARG_NAME(btp)); ··· 1482 } 1483 1484 /* 1485 - * When allocating the initial buffer target we have not yet 1486 - * read in the superblock, so don't know what sized sectors 1487 - * are being used is at this early stage. Play safe. 1488 - */ 1489 STATIC int 1490 xfs_setsize_buftarg_early( 1491 xfs_buftarg_t *btp, ··· 1533 mapping->a_ops = &mapping_aops; 1534 mapping->backing_dev_info = bdi; 1535 mapping_set_gfp_mask(mapping, GFP_NOFS); 1536 - btp->pbr_mapping = mapping; 1537 return 0; 1538 } 1539 1540 xfs_buftarg_t * ··· 1566 1567 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); 1568 1569 - btp->pbr_dev = bdev->bd_dev; 1570 - btp->pbr_bdev = bdev; 1571 if (xfs_setsize_buftarg_early(btp, bdev)) 1572 goto error; 1573 if (xfs_mapping_buftarg(btp, bdev)) 1574 goto error; 1575 xfs_alloc_bufhash(btp, external); 1576 return btp; ··· 1584 1585 1586 /* 1587 - * Pagebuf delayed write buffer handling 1588 */ 1589 - 1590 - STATIC LIST_HEAD(pbd_delwrite_queue); 1591 - STATIC DEFINE_SPINLOCK(pbd_delwrite_lock); 1592 - 1593 STATIC void 1594 - pagebuf_delwri_queue( 1595 - xfs_buf_t *pb, 1596 int unlock) 1597 { 1598 - PB_TRACE(pb, "delwri_q", (long)unlock); 1599 - ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) == 1600 - (PBF_DELWRI|PBF_ASYNC)); 1601 1602 - spin_lock(&pbd_delwrite_lock); 1603 /* If already in the queue, dequeue and place at tail */ 1604 - if (!list_empty(&pb->pb_list)) { 1605 - ASSERT(pb->pb_flags & _PBF_DELWRI_Q); 1606 - if (unlock) { 1607 - atomic_dec(&pb->pb_hold); 1608 - } 1609 - list_del(&pb->pb_list); 1610 } 1611 1612 - pb->pb_flags |= _PBF_DELWRI_Q; 1613 - list_add_tail(&pb->pb_list, &pbd_delwrite_queue); 1614 - pb->pb_queuetime = jiffies; 1615 - spin_unlock(&pbd_delwrite_lock); 1616 1617 if (unlock) 1618 - pagebuf_unlock(pb); 1619 } 1620 1621 void 1622 - pagebuf_delwri_dequeue( 1623 - xfs_buf_t *pb) 1624 { 1625 int dequeued = 0; 1626 1627 - spin_lock(&pbd_delwrite_lock); 1628 - if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { 1629 - ASSERT(pb->pb_flags & _PBF_DELWRI_Q); 1630 - list_del_init(&pb->pb_list); 1631 dequeued = 1; 1632 } 1633 - pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); 1634 - spin_unlock(&pbd_delwrite_lock); 1635 1636 if (dequeued) 1637 - pagebuf_rele(pb); 1638 1639 - PB_TRACE(pb, "delwri_dq", (long)dequeued); 1640 } 1641 1642 STATIC void 1643 - pagebuf_runall_queues( 1644 struct workqueue_struct *queue) 1645 { 1646 flush_workqueue(queue); 1647 } 1648 - 1649 - /* Defines for pagebuf daemon */ 1650 - STATIC struct task_struct *xfsbufd_task; 1651 - STATIC int xfsbufd_force_flush; 1652 - STATIC int xfsbufd_force_sleep; 1653 1654 STATIC int 1655 xfsbufd_wakeup( 1656 int priority, 1657 gfp_t mask) 1658 { 1659 - if (xfsbufd_force_sleep) 1660 - return 0; 1661 - xfsbufd_force_flush = 1; 1662 - barrier(); 1663 - wake_up_process(xfsbufd_task); 1664 return 0; 1665 } 1666 ··· 1668 { 1669 struct list_head tmp; 1670 unsigned long age; 1671 - xfs_buftarg_t *target; 1672 - xfs_buf_t *pb, *n; 1673 1674 current->flags |= PF_MEMALLOC; 1675 1676 INIT_LIST_HEAD(&tmp); 1677 do { 1678 if (unlikely(freezing(current))) { 1679 - xfsbufd_force_sleep = 1; 1680 refrigerator(); 1681 } else { 1682 - xfsbufd_force_sleep = 0; 1683 } 1684 1685 schedule_timeout_interruptible( 1686 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1687 1688 age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1689 - spin_lock(&pbd_delwrite_lock); 1690 - list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { 1691 - PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); 1692 - ASSERT(pb->pb_flags & PBF_DELWRI); 1693 1694 - if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { 1695 - if (!xfsbufd_force_flush && 1696 time_before(jiffies, 1697 - pb->pb_queuetime + age)) { 1698 - pagebuf_unlock(pb); 1699 break; 1700 } 1701 1702 - pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); 1703 - pb->pb_flags |= PBF_WRITE; 1704 - list_move(&pb->pb_list, &tmp); 1705 } 1706 } 1707 - spin_unlock(&pbd_delwrite_lock); 1708 1709 while (!list_empty(&tmp)) { 1710 - pb = list_entry(tmp.next, xfs_buf_t, pb_list); 1711 - target = pb->pb_target; 1712 1713 - list_del_init(&pb->pb_list); 1714 - pagebuf_iostrategy(pb); 1715 1716 - blk_run_address_space(target->pbr_mapping); 1717 } 1718 1719 if (as_list_len > 0) 1720 purge_addresses(); 1721 1722 - xfsbufd_force_flush = 0; 1723 } while (!kthread_should_stop()); 1724 1725 return 0; 1726 } 1727 1728 /* 1729 - * Go through all incore buffers, and release buffers if they belong to 1730 - * the given device. This is used in filesystem error handling to 1731 - * preserve the consistency of its metadata. 1732 */ 1733 int 1734 xfs_flush_buftarg( ··· 1739 int wait) 1740 { 1741 struct list_head tmp; 1742 - xfs_buf_t *pb, *n; 1743 int pincount = 0; 1744 1745 - pagebuf_runall_queues(xfsdatad_workqueue); 1746 - pagebuf_runall_queues(xfslogd_workqueue); 1747 1748 INIT_LIST_HEAD(&tmp); 1749 - spin_lock(&pbd_delwrite_lock); 1750 - list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { 1751 - 1752 - if (pb->pb_target != target) 1753 - continue; 1754 - 1755 - ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)); 1756 - PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); 1757 - if (pagebuf_ispin(pb)) { 1758 pincount++; 1759 continue; 1760 } 1761 1762 - list_move(&pb->pb_list, &tmp); 1763 } 1764 - spin_unlock(&pbd_delwrite_lock); 1765 1766 /* 1767 * Dropped the delayed write list lock, now walk the temporary list 1768 */ 1769 - list_for_each_entry_safe(pb, n, &tmp, pb_list) { 1770 - pagebuf_lock(pb); 1771 - pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); 1772 - pb->pb_flags |= PBF_WRITE; 1773 if (wait) 1774 - pb->pb_flags &= ~PBF_ASYNC; 1775 else 1776 - list_del_init(&pb->pb_list); 1777 1778 - pagebuf_iostrategy(pb); 1779 } 1780 1781 /* 1782 * Remaining list items must be flushed before returning 1783 */ 1784 while (!list_empty(&tmp)) { 1785 - pb = list_entry(tmp.next, xfs_buf_t, pb_list); 1786 1787 - list_del_init(&pb->pb_list); 1788 - xfs_iowait(pb); 1789 - xfs_buf_relse(pb); 1790 } 1791 1792 if (wait) 1793 - blk_run_address_space(target->pbr_mapping); 1794 1795 return pincount; 1796 } 1797 1798 int __init 1799 - pagebuf_init(void) 1800 { 1801 int error = -ENOMEM; 1802 1803 - #ifdef PAGEBUF_TRACE 1804 - pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP); 1805 #endif 1806 1807 - pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf"); 1808 - if (!pagebuf_zone) 1809 goto out_free_trace_buf; 1810 1811 xfslogd_workqueue = create_workqueue("xfslogd"); ··· 1815 if (!xfsdatad_workqueue) 1816 goto out_destroy_xfslogd_workqueue; 1817 1818 - xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd"); 1819 - if (IS_ERR(xfsbufd_task)) { 1820 - error = PTR_ERR(xfsbufd_task); 1821 goto out_destroy_xfsdatad_workqueue; 1822 - } 1823 - 1824 - pagebuf_shake = kmem_shake_register(xfsbufd_wakeup); 1825 - if (!pagebuf_shake) 1826 - goto out_stop_xfsbufd; 1827 1828 return 0; 1829 1830 - out_stop_xfsbufd: 1831 - kthread_stop(xfsbufd_task); 1832 out_destroy_xfsdatad_workqueue: 1833 destroy_workqueue(xfsdatad_workqueue); 1834 out_destroy_xfslogd_workqueue: 1835 destroy_workqueue(xfslogd_workqueue); 1836 out_free_buf_zone: 1837 - kmem_zone_destroy(pagebuf_zone); 1838 out_free_trace_buf: 1839 - #ifdef PAGEBUF_TRACE 1840 - ktrace_free(pagebuf_trace_buf); 1841 #endif 1842 return error; 1843 } 1844 1845 void 1846 - pagebuf_terminate(void) 1847 { 1848 - kmem_shake_deregister(pagebuf_shake); 1849 - kthread_stop(xfsbufd_task); 1850 destroy_workqueue(xfsdatad_workqueue); 1851 destroy_workqueue(xfslogd_workqueue); 1852 - kmem_zone_destroy(pagebuf_zone); 1853 - #ifdef PAGEBUF_TRACE 1854 - ktrace_free(pagebuf_trace_buf); 1855 #endif 1856 }

··· 31 #include <linux/kthread.h> 32 #include "xfs_linux.h" 33 34 + STATIC kmem_zone_t *xfs_buf_zone; 35 + STATIC kmem_shaker_t xfs_buf_shake; 36 + STATIC int xfsbufd(void *); 37 STATIC int xfsbufd_wakeup(int, gfp_t); 38 + STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); 39 40 STATIC struct workqueue_struct *xfslogd_workqueue; 41 struct workqueue_struct *xfsdatad_workqueue; 42 43 + #ifdef XFS_BUF_TRACE 44 void 45 + xfs_buf_trace( 46 + xfs_buf_t *bp, 47 char *id, 48 void *data, 49 void *ra) 50 { 51 + ktrace_enter(xfs_buf_trace_buf, 52 + bp, id, 53 + (void *)(unsigned long)bp->b_flags, 54 + (void *)(unsigned long)bp->b_hold.counter, 55 + (void *)(unsigned long)bp->b_sema.count.counter, 56 (void *)current, 57 data, ra, 58 + (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff), 59 + (void *)(unsigned long)(bp->b_file_offset & 0xffffffff), 60 + (void *)(unsigned long)bp->b_buffer_length, 61 NULL, NULL, NULL, NULL, NULL); 62 } 63 + ktrace_t *xfs_buf_trace_buf; 64 + #define XFS_BUF_TRACE_SIZE 4096 65 + #define XB_TRACE(bp, id, data) \ 66 + xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0)) 67 #else 68 + #define XB_TRACE(bp, id, data) do { } while (0) 69 #endif 70 71 + #ifdef XFS_BUF_LOCK_TRACKING 72 + # define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) 73 + # define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) 74 + # define XB_GET_OWNER(bp) ((bp)->b_last_holder) 75 #else 76 + # define XB_SET_OWNER(bp) do { } while (0) 77 + # define XB_CLEAR_OWNER(bp) do { } while (0) 78 + # define XB_GET_OWNER(bp) do { } while (0) 79 #endif 80 81 + #define xb_to_gfp(flags) \ 82 + ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \ 83 + ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) 84 85 + #define xb_to_km(flags) \ 86 + (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) 87 88 + #define xfs_buf_allocate(flags) \ 89 + kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)) 90 + #define xfs_buf_deallocate(bp) \ 91 + kmem_zone_free(xfs_buf_zone, (bp)); 92 93 /* 94 + * Page Region interfaces. 95 * 96 + * For pages in filesystems where the blocksize is smaller than the 97 + * pagesize, we use the page->private field (long) to hold a bitmap 98 + * of uptodate regions within the page. 99 * 100 + * Each such region is "bytes per page / bits per long" bytes long. 101 * 102 + * NBPPR == number-of-bytes-per-page-region 103 + * BTOPR == bytes-to-page-region (rounded up) 104 + * BTOPRT == bytes-to-page-region-truncated (rounded down) 105 */ 106 #if (BITS_PER_LONG == 32) 107 #define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ ··· 159 } 160 161 /* 162 + * Mapping of multi-page buffers into contiguous virtual space 163 */ 164 165 typedef struct a_list { ··· 172 STATIC DEFINE_SPINLOCK(as_lock); 173 174 /* 175 + * Try to batch vunmaps because they are costly. 176 */ 177 STATIC void 178 free_address( ··· 215 } 216 217 /* 218 + * Internal xfs_buf_t object manipulation 219 */ 220 221 STATIC void 222 + _xfs_buf_initialize( 223 + xfs_buf_t *bp, 224 xfs_buftarg_t *target, 225 + xfs_off_t range_base, 226 size_t range_length, 227 + xfs_buf_flags_t flags) 228 { 229 /* 230 + * We don't want certain flags to appear in b_flags. 231 */ 232 + flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD); 233 234 + memset(bp, 0, sizeof(xfs_buf_t)); 235 + atomic_set(&bp->b_hold, 1); 236 + init_MUTEX_LOCKED(&bp->b_iodonesema); 237 + INIT_LIST_HEAD(&bp->b_list); 238 + INIT_LIST_HEAD(&bp->b_hash_list); 239 + init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ 240 + XB_SET_OWNER(bp); 241 + bp->b_target = target; 242 + bp->b_file_offset = range_base; 243 /* 244 * Set buffer_length and count_desired to the same value initially. 245 * I/O routines should use count_desired, which will be the same in 246 * most cases but may be reset (e.g. XFS recovery). 247 */ 248 + bp->b_buffer_length = bp->b_count_desired = range_length; 249 + bp->b_flags = flags; 250 + bp->b_bn = XFS_BUF_DADDR_NULL; 251 + atomic_set(&bp->b_pin_count, 0); 252 + init_waitqueue_head(&bp->b_waiters); 253 254 + XFS_STATS_INC(xb_create); 255 + XB_TRACE(bp, "initialize", target); 256 } 257 258 /* 259 + * Allocate a page array capable of holding a specified number 260 + * of pages, and point the page buf at it. 261 */ 262 STATIC int 263 + _xfs_buf_get_pages( 264 + xfs_buf_t *bp, 265 int page_count, 266 + xfs_buf_flags_t flags) 267 { 268 /* Make sure that we have a page list */ 269 + if (bp->b_pages == NULL) { 270 + bp->b_offset = xfs_buf_poff(bp->b_file_offset); 271 + bp->b_page_count = page_count; 272 + if (page_count <= XB_PAGES) { 273 + bp->b_pages = bp->b_page_array; 274 } else { 275 + bp->b_pages = kmem_alloc(sizeof(struct page *) * 276 + page_count, xb_to_km(flags)); 277 + if (bp->b_pages == NULL) 278 return -ENOMEM; 279 } 280 + memset(bp->b_pages, 0, sizeof(struct page *) * page_count); 281 } 282 return 0; 283 } 284 285 /* 286 + * Frees b_pages if it was allocated. 287 */ 288 STATIC void 289 + _xfs_buf_free_pages( 290 xfs_buf_t *bp) 291 { 292 + if (bp->b_pages != bp->b_page_array) { 293 + kmem_free(bp->b_pages, 294 + bp->b_page_count * sizeof(struct page *)); 295 } 296 } 297 ··· 299 * Releases the specified buffer. 300 * 301 * The modification state of any associated pages is left unchanged. 302 + * The buffer most not be on any hash - use xfs_buf_rele instead for 303 * hashed and refcounted buffers 304 */ 305 void 306 + xfs_buf_free( 307 xfs_buf_t *bp) 308 { 309 + XB_TRACE(bp, "free", 0); 310 311 + ASSERT(list_empty(&bp->b_hash_list)); 312 313 + if (bp->b_flags & _XBF_PAGE_CACHE) { 314 uint i; 315 316 + if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) 317 + free_address(bp->b_addr - bp->b_offset); 318 319 + for (i = 0; i < bp->b_page_count; i++) 320 + page_cache_release(bp->b_pages[i]); 321 + _xfs_buf_free_pages(bp); 322 + } else if (bp->b_flags & _XBF_KMEM_ALLOC) { 323 /* 324 + * XXX(hch): bp->b_count_desired might be incorrect (see 325 + * xfs_buf_associate_memory for details), but fortunately 326 * the Linux version of kmem_free ignores the len argument.. 327 */ 328 + kmem_free(bp->b_addr, bp->b_count_desired); 329 + _xfs_buf_free_pages(bp); 330 } 331 332 + xfs_buf_deallocate(bp); 333 } 334 335 /* 336 * Finds all pages for buffer in question and builds it's page list. 337 */ 338 STATIC int 339 + _xfs_buf_lookup_pages( 340 xfs_buf_t *bp, 341 uint flags) 342 { 343 + struct address_space *mapping = bp->b_target->bt_mapping; 344 + size_t blocksize = bp->b_target->bt_bsize; 345 + size_t size = bp->b_count_desired; 346 size_t nbytes, offset; 347 + gfp_t gfp_mask = xb_to_gfp(flags); 348 unsigned short page_count, i; 349 pgoff_t first; 350 + xfs_off_t end; 351 int error; 352 353 + end = bp->b_file_offset + bp->b_buffer_length; 354 + page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); 355 356 + error = _xfs_buf_get_pages(bp, page_count, flags); 357 if (unlikely(error)) 358 return error; 359 + bp->b_flags |= _XBF_PAGE_CACHE; 360 361 + offset = bp->b_offset; 362 + first = bp->b_file_offset >> PAGE_CACHE_SHIFT; 363 364 + for (i = 0; i < bp->b_page_count; i++) { 365 struct page *page; 366 uint retries = 0; 367 368 retry: 369 page = find_or_create_page(mapping, first + i, gfp_mask); 370 if (unlikely(page == NULL)) { 371 + if (flags & XBF_READ_AHEAD) { 372 + bp->b_page_count = i; 373 + for (i = 0; i < bp->b_page_count; i++) 374 + unlock_page(bp->b_pages[i]); 375 return -ENOMEM; 376 } 377 ··· 387 "deadlock in %s (mode:0x%x)\n", 388 __FUNCTION__, gfp_mask); 389 390 + XFS_STATS_INC(xb_page_retries); 391 xfsbufd_wakeup(0, gfp_mask); 392 blk_congestion_wait(WRITE, HZ/50); 393 goto retry; 394 } 395 396 + XFS_STATS_INC(xb_page_found); 397 398 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); 399 size -= nbytes; ··· 401 if (!PageUptodate(page)) { 402 page_count--; 403 if (blocksize >= PAGE_CACHE_SIZE) { 404 + if (flags & XBF_READ) 405 + bp->b_locked = 1; 406 } else if (!PagePrivate(page)) { 407 if (test_page_region(page, offset, nbytes)) 408 page_count++; 409 } 410 } 411 412 + bp->b_pages[i] = page; 413 offset = 0; 414 } 415 416 + if (!bp->b_locked) { 417 + for (i = 0; i < bp->b_page_count; i++) 418 + unlock_page(bp->b_pages[i]); 419 } 420 421 + if (page_count == bp->b_page_count) 422 + bp->b_flags |= XBF_DONE; 423 424 + XB_TRACE(bp, "lookup_pages", (long)page_count); 425 return error; 426 } 427 ··· 429 * Map buffer into kernel address-space if nessecary. 430 */ 431 STATIC int 432 + _xfs_buf_map_pages( 433 xfs_buf_t *bp, 434 uint flags) 435 { 436 /* A single page buffer is always mappable */ 437 + if (bp->b_page_count == 1) { 438 + bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; 439 + bp->b_flags |= XBF_MAPPED; 440 + } else if (flags & XBF_MAPPED) { 441 if (as_list_len > 64) 442 purge_addresses(); 443 + bp->b_addr = vmap(bp->b_pages, bp->b_page_count, 444 + VM_MAP, PAGE_KERNEL); 445 + if (unlikely(bp->b_addr == NULL)) 446 return -ENOMEM; 447 + bp->b_addr += bp->b_offset; 448 + bp->b_flags |= XBF_MAPPED; 449 } 450 451 return 0; ··· 456 */ 457 458 /* 459 + * Look up, and creates if absent, a lockable buffer for 460 * a given range of an inode. The buffer is returned 461 * locked. If other overlapping buffers exist, they are 462 * released before the new buffer is created and locked, ··· 466 * are unlocked. No I/O is implied by this call. 467 */ 468 xfs_buf_t * 469 + _xfs_buf_find( 470 xfs_buftarg_t *btp, /* block device target */ 471 + xfs_off_t ioff, /* starting offset of range */ 472 size_t isize, /* length of range */ 473 + xfs_buf_flags_t flags, 474 + xfs_buf_t *new_bp) 475 { 476 + xfs_off_t range_base; 477 size_t range_length; 478 xfs_bufhash_t *hash; 479 + xfs_buf_t *bp, *n; 480 481 range_base = (ioff << BBSHIFT); 482 range_length = (isize << BBSHIFT); 483 484 /* Check for IOs smaller than the sector size / not sector aligned */ 485 + ASSERT(!(range_length < (1 << btp->bt_sshift))); 486 + ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); 487 488 hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; 489 490 spin_lock(&hash->bh_lock); 491 492 + list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { 493 + ASSERT(btp == bp->b_target); 494 + if (bp->b_file_offset == range_base && 495 + bp->b_buffer_length == range_length) { 496 /* 497 + * If we look at something, bring it to the 498 * front of the list for next time. 499 */ 500 + atomic_inc(&bp->b_hold); 501 + list_move(&bp->b_hash_list, &hash->bh_list); 502 goto found; 503 } 504 } 505 506 /* No match found */ 507 + if (new_bp) { 508 + _xfs_buf_initialize(new_bp, btp, range_base, 509 range_length, flags); 510 + new_bp->b_hash = hash; 511 + list_add(&new_bp->b_hash_list, &hash->bh_list); 512 } else { 513 + XFS_STATS_INC(xb_miss_locked); 514 } 515 516 spin_unlock(&hash->bh_lock); 517 + return new_bp; 518 519 found: 520 spin_unlock(&hash->bh_lock); ··· 523 * if this does not work then we need to drop the 524 * spinlock and do a hard attempt on the semaphore. 525 */ 526 + if (down_trylock(&bp->b_sema)) { 527 + if (!(flags & XBF_TRYLOCK)) { 528 /* wait for buffer ownership */ 529 + XB_TRACE(bp, "get_lock", 0); 530 + xfs_buf_lock(bp); 531 + XFS_STATS_INC(xb_get_locked_waited); 532 } else { 533 /* We asked for a trylock and failed, no need 534 * to look at file offset and length here, we 535 + * know that this buffer at least overlaps our 536 + * buffer and is locked, therefore our buffer 537 + * either does not exist, or is this buffer. 538 */ 539 + xfs_buf_rele(bp); 540 + XFS_STATS_INC(xb_busy_locked); 541 + return NULL; 542 } 543 } else { 544 /* trylock worked */ 545 + XB_SET_OWNER(bp); 546 } 547 548 + if (bp->b_flags & XBF_STALE) { 549 + ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); 550 + bp->b_flags &= XBF_MAPPED; 551 } 552 + XB_TRACE(bp, "got_lock", 0); 553 + XFS_STATS_INC(xb_get_locked); 554 + return bp; 555 } 556 557 /* 558 + * Assembles a buffer covering the specified range. 559 * Storage in memory for all portions of the buffer will be allocated, 560 * although backing storage may not be. 561 */ 562 xfs_buf_t * 563 + xfs_buf_get_flags( 564 xfs_buftarg_t *target,/* target for buffer */ 565 + xfs_off_t ioff, /* starting offset of range */ 566 size_t isize, /* length of range */ 567 + xfs_buf_flags_t flags) 568 { 569 + xfs_buf_t *bp, *new_bp; 570 int error = 0, i; 571 572 + new_bp = xfs_buf_allocate(flags); 573 + if (unlikely(!new_bp)) 574 return NULL; 575 576 + bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); 577 + if (bp == new_bp) { 578 + error = _xfs_buf_lookup_pages(bp, flags); 579 if (error) 580 goto no_buffer; 581 } else { 582 + xfs_buf_deallocate(new_bp); 583 + if (unlikely(bp == NULL)) 584 return NULL; 585 } 586 587 + for (i = 0; i < bp->b_page_count; i++) 588 + mark_page_accessed(bp->b_pages[i]); 589 590 + if (!(bp->b_flags & XBF_MAPPED)) { 591 + error = _xfs_buf_map_pages(bp, flags); 592 if (unlikely(error)) { 593 printk(KERN_WARNING "%s: failed to map pages\n", 594 __FUNCTION__); ··· 598 } 599 } 600 601 + XFS_STATS_INC(xb_get); 602 603 /* 604 * Always fill in the block number now, the mapped cases can do 605 * their own overlay of this later. 606 */ 607 + bp->b_bn = ioff; 608 + bp->b_count_desired = bp->b_buffer_length; 609 610 + XB_TRACE(bp, "get", (unsigned long)flags); 611 + return bp; 612 613 no_buffer: 614 + if (flags & (XBF_LOCK | XBF_TRYLOCK)) 615 + xfs_buf_unlock(bp); 616 + xfs_buf_rele(bp); 617 return NULL; 618 } 619 620 xfs_buf_t * 621 xfs_buf_read_flags( 622 xfs_buftarg_t *target, 623 + xfs_off_t ioff, 624 size_t isize, 625 + xfs_buf_flags_t flags) 626 { 627 + xfs_buf_t *bp; 628 629 + flags |= XBF_READ; 630 631 + bp = xfs_buf_get_flags(target, ioff, isize, flags); 632 + if (bp) { 633 + if (!XFS_BUF_ISDONE(bp)) { 634 + XB_TRACE(bp, "read", (unsigned long)flags); 635 + XFS_STATS_INC(xb_get_read); 636 + xfs_buf_iostart(bp, flags); 637 + } else if (flags & XBF_ASYNC) { 638 + XB_TRACE(bp, "read_async", (unsigned long)flags); 639 /* 640 * Read ahead call which is already satisfied, 641 * drop the buffer 642 */ 643 goto no_buffer; 644 } else { 645 + XB_TRACE(bp, "read_done", (unsigned long)flags); 646 /* We do not want read in the flags */ 647 + bp->b_flags &= ~XBF_READ; 648 } 649 } 650 651 + return bp; 652 653 no_buffer: 654 + if (flags & (XBF_LOCK | XBF_TRYLOCK)) 655 + xfs_buf_unlock(bp); 656 + xfs_buf_rele(bp); 657 return NULL; 658 } 659 660 /* 661 + * If we are not low on memory then do the readahead in a deadlock 662 + * safe manner. 663 */ 664 void 665 + xfs_buf_readahead( 666 xfs_buftarg_t *target, 667 + xfs_off_t ioff, 668 size_t isize, 669 + xfs_buf_flags_t flags) 670 { 671 struct backing_dev_info *bdi; 672 673 + bdi = target->bt_mapping->backing_dev_info; 674 if (bdi_read_congested(bdi)) 675 return; 676 677 + flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); 678 xfs_buf_read_flags(target, ioff, isize, flags); 679 } 680 681 xfs_buf_t * 682 + xfs_buf_get_empty( 683 size_t len, 684 xfs_buftarg_t *target) 685 { 686 + xfs_buf_t *bp; 687 688 + bp = xfs_buf_allocate(0); 689 + if (bp) 690 + _xfs_buf_initialize(bp, target, 0, len, 0); 691 + return bp; 692 } 693 694 static inline struct page * ··· 704 } 705 706 int 707 + xfs_buf_associate_memory( 708 + xfs_buf_t *bp, 709 void *mem, 710 size_t len) 711 { ··· 722 page_count++; 723 724 /* Free any previous set of page pointers */ 725 + if (bp->b_pages) 726 + _xfs_buf_free_pages(bp); 727 728 + bp->b_pages = NULL; 729 + bp->b_addr = mem; 730 731 + rval = _xfs_buf_get_pages(bp, page_count, 0); 732 if (rval) 733 return rval; 734 735 + bp->b_offset = offset; 736 ptr = (size_t) mem & PAGE_CACHE_MASK; 737 end = PAGE_CACHE_ALIGN((size_t) mem + len); 738 end_cur = end; 739 /* set up first page */ 740 + bp->b_pages[0] = mem_to_page(mem); 741 742 ptr += PAGE_CACHE_SIZE; 743 + bp->b_page_count = ++i; 744 while (ptr < end) { 745 + bp->b_pages[i] = mem_to_page((void *)ptr); 746 + bp->b_page_count = ++i; 747 ptr += PAGE_CACHE_SIZE; 748 } 749 + bp->b_locked = 0; 750 751 + bp->b_count_desired = bp->b_buffer_length = len; 752 + bp->b_flags |= XBF_MAPPED; 753 754 return 0; 755 } 756 757 xfs_buf_t * 758 + xfs_buf_get_noaddr( 759 size_t len, 760 xfs_buftarg_t *target) 761 { ··· 764 void *data; 765 int error; 766 767 + bp = xfs_buf_allocate(0); 768 if (unlikely(bp == NULL)) 769 goto fail; 770 + _xfs_buf_initialize(bp, target, 0, len, 0); 771 772 try_again: 773 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL); ··· 776 777 /* check whether alignment matches.. */ 778 if ((__psunsigned_t)data != 779 + ((__psunsigned_t)data & ~target->bt_smask)) { 780 /* .. else double the size and try again */ 781 kmem_free(data, malloc_len); 782 malloc_len <<= 1; 783 goto try_again; 784 } 785 786 + error = xfs_buf_associate_memory(bp, data, len); 787 if (error) 788 goto fail_free_mem; 789 + bp->b_flags |= _XBF_KMEM_ALLOC; 790 791 + xfs_buf_unlock(bp); 792 793 + XB_TRACE(bp, "no_daddr", data); 794 return bp; 795 fail_free_mem: 796 kmem_free(data, malloc_len); 797 fail_free_buf: 798 + xfs_buf_free(bp); 799 fail: 800 return NULL; 801 } 802 803 /* 804 * Increment reference count on buffer, to hold the buffer concurrently 805 * with another thread which may release (free) the buffer asynchronously. 806 * Must hold the buffer already to call this function. 807 */ 808 void 809 + xfs_buf_hold( 810 + xfs_buf_t *bp) 811 { 812 + atomic_inc(&bp->b_hold); 813 + XB_TRACE(bp, "hold", 0); 814 } 815 816 /* 817 + * Releases a hold on the specified buffer. If the 818 + * the hold count is 1, calls xfs_buf_free. 819 */ 820 void 821 + xfs_buf_rele( 822 + xfs_buf_t *bp) 823 { 824 + xfs_bufhash_t *hash = bp->b_hash; 825 826 + XB_TRACE(bp, "rele", bp->b_relse); 827 828 + if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { 829 + if (bp->b_relse) { 830 + atomic_inc(&bp->b_hold); 831 spin_unlock(&hash->bh_lock); 832 + (*(bp->b_relse)) (bp); 833 + } else if (bp->b_flags & XBF_FS_MANAGED) { 834 spin_unlock(&hash->bh_lock); 835 } else { 836 + ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); 837 + list_del_init(&bp->b_hash_list); 838 spin_unlock(&hash->bh_lock); 839 + xfs_buf_free(bp); 840 } 841 } else { 842 /* 843 * Catch reference count leaks 844 */ 845 + ASSERT(atomic_read(&bp->b_hold) >= 0); 846 } 847 } 848 ··· 863 */ 864 865 /* 866 + * Locks a buffer object, if it is not already locked. 867 + * Note that this in no way locks the underlying pages, so it is only 868 + * useful for synchronizing concurrent use of buffer objects, not for 869 + * synchronizing independent access to the underlying pages. 870 */ 871 int 872 + xfs_buf_cond_lock( 873 + xfs_buf_t *bp) 874 { 875 int locked; 876 877 + locked = down_trylock(&bp->b_sema) == 0; 878 if (locked) { 879 + XB_SET_OWNER(bp); 880 } 881 + XB_TRACE(bp, "cond_lock", (long)locked); 882 + return locked ? 0 : -EBUSY; 883 } 884 885 #if defined(DEBUG) || defined(XFS_BLI_TRACE) 886 int 887 + xfs_buf_lock_value( 888 + xfs_buf_t *bp) 889 { 890 + return atomic_read(&bp->b_sema.count); 891 } 892 #endif 893 894 /* 895 + * Locks a buffer object. 896 + * Note that this in no way locks the underlying pages, so it is only 897 + * useful for synchronizing concurrent use of buffer objects, not for 898 + * synchronizing independent access to the underlying pages. 899 */ 900 + void 901 + xfs_buf_lock( 902 + xfs_buf_t *bp) 903 { 904 + XB_TRACE(bp, "lock", 0); 905 + if (atomic_read(&bp->b_io_remaining)) 906 + blk_run_address_space(bp->b_target->bt_mapping); 907 + down(&bp->b_sema); 908 + XB_SET_OWNER(bp); 909 + XB_TRACE(bp, "locked", 0); 910 } 911 912 /* 913 + * Releases the lock on the buffer object. 914 * If the buffer is marked delwri but is not queued, do so before we 915 + * unlock the buffer as we need to set flags correctly. We also need to 916 * take a reference for the delwri queue because the unlocker is going to 917 * drop their's and they don't know we just queued it. 918 */ 919 void 920 + xfs_buf_unlock( 921 + xfs_buf_t *bp) 922 { 923 + if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { 924 + atomic_inc(&bp->b_hold); 925 + bp->b_flags |= XBF_ASYNC; 926 + xfs_buf_delwri_queue(bp, 0); 927 } 928 929 + XB_CLEAR_OWNER(bp); 930 + up(&bp->b_sema); 931 + XB_TRACE(bp, "unlock", 0); 932 } 933 934 935 /* 936 * Pinning Buffer Storage in Memory 937 + * Ensure that no attempt to force a buffer to disk will succeed. 938 */ 939 void 940 + xfs_buf_pin( 941 + xfs_buf_t *bp) 942 { 943 + atomic_inc(&bp->b_pin_count); 944 + XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter); 945 } 946 947 void 948 + xfs_buf_unpin( 949 + xfs_buf_t *bp) 950 { 951 + if (atomic_dec_and_test(&bp->b_pin_count)) 952 + wake_up_all(&bp->b_waiters); 953 + XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter); 954 } 955 956 int 957 + xfs_buf_ispin( 958 + xfs_buf_t *bp) 959 { 960 + return atomic_read(&bp->b_pin_count); 961 } 962 963 + STATIC void 964 + xfs_buf_wait_unpin( 965 + xfs_buf_t *bp) 966 { 967 DECLARE_WAITQUEUE (wait, current); 968 969 + if (atomic_read(&bp->b_pin_count) == 0) 970 return; 971 972 + add_wait_queue(&bp->b_waiters, &wait); 973 for (;;) { 974 set_current_state(TASK_UNINTERRUPTIBLE); 975 + if (atomic_read(&bp->b_pin_count) == 0) 976 break; 977 + if (atomic_read(&bp->b_io_remaining)) 978 + blk_run_address_space(bp->b_target->bt_mapping); 979 schedule(); 980 } 981 + remove_wait_queue(&bp->b_waiters, &wait); 982 set_current_state(TASK_RUNNING); 983 } 984 ··· 1032 * Buffer Utility Routines 1033 */ 1034 1035 STATIC void 1036 + xfs_buf_iodone_work( 1037 void *v) 1038 { 1039 xfs_buf_t *bp = (xfs_buf_t *)v; 1040 1041 + if (bp->b_iodone) 1042 + (*(bp->b_iodone))(bp); 1043 + else if (bp->b_flags & XBF_ASYNC) 1044 xfs_buf_relse(bp); 1045 } 1046 1047 void 1048 + xfs_buf_ioend( 1049 + xfs_buf_t *bp, 1050 int schedule) 1051 { 1052 + bp->b_flags &= ~(XBF_READ | XBF_WRITE); 1053 + if (bp->b_error == 0) 1054 + bp->b_flags |= XBF_DONE; 1055 1056 + XB_TRACE(bp, "iodone", bp->b_iodone); 1057 1058 + if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { 1059 if (schedule) { 1060 + INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work, bp); 1061 + queue_work(xfslogd_workqueue, &bp->b_iodone_work); 1062 } else { 1063 + xfs_buf_iodone_work(bp); 1064 } 1065 } else { 1066 + up(&bp->b_iodonesema); 1067 } 1068 } 1069 1070 void 1071 + xfs_buf_ioerror( 1072 + xfs_buf_t *bp, 1073 + int error) 1074 { 1075 ASSERT(error >= 0 && error <= 0xffff); 1076 + bp->b_error = (unsigned short)error; 1077 + XB_TRACE(bp, "ioerror", (unsigned long)error); 1078 } 1079 1080 /* 1081 + * Initiate I/O on a buffer, based on the flags supplied. 1082 + * The b_iodone routine in the buffer supplied will only be called 1083 * when all of the subsidiary I/O requests, if any, have been completed. 1084 */ 1085 int 1086 + xfs_buf_iostart( 1087 + xfs_buf_t *bp, 1088 + xfs_buf_flags_t flags) 1089 { 1090 int status = 0; 1091 1092 + XB_TRACE(bp, "iostart", (unsigned long)flags); 1093 1094 + if (flags & XBF_DELWRI) { 1095 + bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC); 1096 + bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC); 1097 + xfs_buf_delwri_queue(bp, 1); 1098 return status; 1099 } 1100 1101 + bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ 1102 + XBF_READ_AHEAD | _XBF_RUN_QUEUES); 1103 + bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \ 1104 + XBF_READ_AHEAD | _XBF_RUN_QUEUES); 1105 1106 + BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL); 1107 1108 /* For writes allow an alternate strategy routine to precede 1109 * the actual I/O request (which may not be issued at all in 1110 * a shutdown situation, for example). 1111 */ 1112 + status = (flags & XBF_WRITE) ? 1113 + xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp); 1114 1115 /* Wait for I/O if we are not an async request. 1116 * Note: async I/O request completion will release the buffer, 1117 * and that can already be done by this point. So using the 1118 * buffer pointer from here on, after async I/O, is invalid. 1119 */ 1120 + if (!status && !(flags & XBF_ASYNC)) 1121 + status = xfs_buf_iowait(bp); 1122 1123 return status; 1124 } 1125 1126 STATIC __inline__ int 1127 + _xfs_buf_iolocked( 1128 + xfs_buf_t *bp) 1129 { 1130 + ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE)); 1131 + if (bp->b_flags & XBF_READ) 1132 + return bp->b_locked; 1133 return 0; 1134 } 1135 1136 STATIC __inline__ void 1137 + _xfs_buf_ioend( 1138 + xfs_buf_t *bp, 1139 int schedule) 1140 { 1141 + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { 1142 + bp->b_locked = 0; 1143 + xfs_buf_ioend(bp, schedule); 1144 } 1145 } 1146 1147 STATIC int 1148 + xfs_buf_bio_end_io( 1149 struct bio *bio, 1150 unsigned int bytes_done, 1151 int error) 1152 { 1153 + xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; 1154 + unsigned int blocksize = bp->b_target->bt_bsize; 1155 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1156 1157 if (bio->bi_size) 1158 return 1; 1159 1160 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1161 + bp->b_error = EIO; 1162 1163 do { 1164 struct page *page = bvec->bv_page; 1165 1166 + if (unlikely(bp->b_error)) { 1167 + if (bp->b_flags & XBF_READ) 1168 ClearPageUptodate(page); 1169 SetPageError(page); 1170 + } else if (blocksize >= PAGE_CACHE_SIZE) { 1171 SetPageUptodate(page); 1172 } else if (!PagePrivate(page) && 1173 + (bp->b_flags & _XBF_PAGE_CACHE)) { 1174 set_page_region(page, bvec->bv_offset, bvec->bv_len); 1175 } 1176 1177 if (--bvec >= bio->bi_io_vec) 1178 prefetchw(&bvec->bv_page->flags); 1179 1180 + if (_xfs_buf_iolocked(bp)) { 1181 unlock_page(page); 1182 } 1183 } while (bvec >= bio->bi_io_vec); 1184 1185 + _xfs_buf_ioend(bp, 1); 1186 bio_put(bio); 1187 return 0; 1188 } 1189 1190 STATIC void 1191 + _xfs_buf_ioapply( 1192 + xfs_buf_t *bp) 1193 { 1194 int i, rw, map_i, total_nr_pages, nr_pages; 1195 struct bio *bio; 1196 + int offset = bp->b_offset; 1197 + int size = bp->b_count_desired; 1198 + sector_t sector = bp->b_bn; 1199 + unsigned int blocksize = bp->b_target->bt_bsize; 1200 + int locking = _xfs_buf_iolocked(bp); 1201 1202 + total_nr_pages = bp->b_page_count; 1203 map_i = 0; 1204 1205 + if (bp->b_flags & _XBF_RUN_QUEUES) { 1206 + bp->b_flags &= ~_XBF_RUN_QUEUES; 1207 + rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC; 1208 } else { 1209 + rw = (bp->b_flags & XBF_READ) ? READ : WRITE; 1210 } 1211 1212 + if (bp->b_flags & XBF_ORDERED) { 1213 + ASSERT(!(bp->b_flags & XBF_READ)); 1214 rw = WRITE_BARRIER; 1215 } 1216 1217 + /* Special code path for reading a sub page size buffer in -- 1218 * we populate up the whole page, and hence the other metadata 1219 * in the same page. This optimization is only valid when the 1220 + * filesystem block size is not smaller than the page size. 1221 */ 1222 + if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && 1223 + (bp->b_flags & XBF_READ) && locking && 1224 + (blocksize >= PAGE_CACHE_SIZE)) { 1225 bio = bio_alloc(GFP_NOIO, 1); 1226 1227 + bio->bi_bdev = bp->b_target->bt_bdev; 1228 bio->bi_sector = sector - (offset >> BBSHIFT); 1229 + bio->bi_end_io = xfs_buf_bio_end_io; 1230 + bio->bi_private = bp; 1231 1232 + bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0); 1233 size = 0; 1234 1235 + atomic_inc(&bp->b_io_remaining); 1236 1237 goto submit_io; 1238 } 1239 1240 /* Lock down the pages which we need to for the request */ 1241 + if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) { 1242 for (i = 0; size; i++) { 1243 int nbytes = PAGE_CACHE_SIZE - offset; 1244 + struct page *page = bp->b_pages[i]; 1245 1246 if (nbytes > size) 1247 nbytes = size; ··· 1276 size -= nbytes; 1277 offset = 0; 1278 } 1279 + offset = bp->b_offset; 1280 + size = bp->b_count_desired; 1281 } 1282 1283 next_chunk: 1284 + atomic_inc(&bp->b_io_remaining); 1285 nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); 1286 if (nr_pages > total_nr_pages) 1287 nr_pages = total_nr_pages; 1288 1289 bio = bio_alloc(GFP_NOIO, nr_pages); 1290 + bio->bi_bdev = bp->b_target->bt_bdev; 1291 bio->bi_sector = sector; 1292 + bio->bi_end_io = xfs_buf_bio_end_io; 1293 + bio->bi_private = bp; 1294 1295 for (; size && nr_pages; nr_pages--, map_i++) { 1296 + int rbytes, nbytes = PAGE_CACHE_SIZE - offset; 1297 1298 if (nbytes > size) 1299 nbytes = size; 1300 1301 + rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset); 1302 + if (rbytes < nbytes) 1303 break; 1304 1305 offset = 0; ··· 1315 goto next_chunk; 1316 } else { 1317 bio_put(bio); 1318 + xfs_buf_ioerror(bp, EIO); 1319 } 1320 } 1321 1322 int 1323 + xfs_buf_iorequest( 1324 + xfs_buf_t *bp) 1325 { 1326 + XB_TRACE(bp, "iorequest", 0); 1327 1328 + if (bp->b_flags & XBF_DELWRI) { 1329 + xfs_buf_delwri_queue(bp, 1); 1330 return 0; 1331 } 1332 1333 + if (bp->b_flags & XBF_WRITE) { 1334 + xfs_buf_wait_unpin(bp); 1335 } 1336 1337 + xfs_buf_hold(bp); 1338 1339 /* Set the count to 1 initially, this will stop an I/O 1340 * completion callout which happens before we have started 1341 + * all the I/O from calling xfs_buf_ioend too early. 1342 */ 1343 + atomic_set(&bp->b_io_remaining, 1); 1344 + _xfs_buf_ioapply(bp); 1345 + _xfs_buf_ioend(bp, 0); 1346 1347 + xfs_buf_rele(bp); 1348 return 0; 1349 } 1350 1351 /* 1352 + * Waits for I/O to complete on the buffer supplied. 1353 + * It returns immediately if no I/O is pending. 1354 + * It returns the I/O error code, if any, or 0 if there was no error. 1355 */ 1356 int 1357 + xfs_buf_iowait( 1358 + xfs_buf_t *bp) 1359 { 1360 + XB_TRACE(bp, "iowait", 0); 1361 + if (atomic_read(&bp->b_io_remaining)) 1362 + blk_run_address_space(bp->b_target->bt_mapping); 1363 + down(&bp->b_iodonesema); 1364 + XB_TRACE(bp, "iowaited", (long)bp->b_error); 1365 + return bp->b_error; 1366 } 1367 1368 + xfs_caddr_t 1369 + xfs_buf_offset( 1370 + xfs_buf_t *bp, 1371 size_t offset) 1372 { 1373 struct page *page; 1374 1375 + if (bp->b_flags & XBF_MAPPED) 1376 + return XFS_BUF_PTR(bp) + offset; 1377 1378 + offset += bp->b_offset; 1379 + page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; 1380 + return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); 1381 } 1382 1383 /* 1384 * Move data into or out of a buffer. 1385 */ 1386 void 1387 + xfs_buf_iomove( 1388 + xfs_buf_t *bp, /* buffer to process */ 1389 size_t boff, /* starting buffer offset */ 1390 size_t bsize, /* length to copy */ 1391 caddr_t data, /* data address */ 1392 + xfs_buf_rw_t mode) /* read/write/zero flag */ 1393 { 1394 size_t bend, cpoff, csize; 1395 struct page *page; 1396 1397 bend = boff + bsize; 1398 while (boff < bend) { 1399 + page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; 1400 + cpoff = xfs_buf_poff(boff + bp->b_offset); 1401 csize = min_t(size_t, 1402 + PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); 1403 1404 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); 1405 1406 switch (mode) { 1407 + case XBRW_ZERO: 1408 memset(page_address(page) + cpoff, 0, csize); 1409 break; 1410 + case XBRW_READ: 1411 memcpy(data, page_address(page) + cpoff, csize); 1412 break; 1413 + case XBRW_WRITE: 1414 memcpy(page_address(page) + cpoff, data, csize); 1415 } 1416 ··· 1425 } 1426 1427 /* 1428 + * Handling of buffer targets (buftargs). 1429 */ 1430 1431 /* 1432 + * Wait for any bufs with callbacks that have been submitted but 1433 + * have not yet returned... walk the hash list for the target. 1434 */ 1435 void 1436 xfs_wait_buftarg( ··· 1444 hash = &btp->bt_hash[i]; 1445 again: 1446 spin_lock(&hash->bh_lock); 1447 + list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { 1448 + ASSERT(btp == bp->b_target); 1449 + if (!(bp->b_flags & XBF_FS_MANAGED)) { 1450 spin_unlock(&hash->bh_lock); 1451 /* 1452 * Catch superblock reference count leaks 1453 * immediately 1454 */ 1455 + BUG_ON(bp->b_bn == 0); 1456 delay(100); 1457 goto again; 1458 } ··· 1462 } 1463 1464 /* 1465 + * Allocate buffer hash table for a given target. 1466 + * For devices containing metadata (i.e. not the log/realtime devices) 1467 + * we need to allocate a much larger hash table. 1468 */ 1469 STATIC void 1470 xfs_alloc_bufhash( ··· 1487 xfs_free_bufhash( 1488 xfs_buftarg_t *btp) 1489 { 1490 + kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t)); 1491 btp->bt_hash = NULL; 1492 + } 1493 + 1494 + /* 1495 + * buftarg list for delwrite queue processing 1496 + */ 1497 + STATIC LIST_HEAD(xfs_buftarg_list); 1498 + STATIC DEFINE_SPINLOCK(xfs_buftarg_lock); 1499 + 1500 + STATIC void 1501 + xfs_register_buftarg( 1502 + xfs_buftarg_t *btp) 1503 + { 1504 + spin_lock(&xfs_buftarg_lock); 1505 + list_add(&btp->bt_list, &xfs_buftarg_list); 1506 + spin_unlock(&xfs_buftarg_lock); 1507 + } 1508 + 1509 + STATIC void 1510 + xfs_unregister_buftarg( 1511 + xfs_buftarg_t *btp) 1512 + { 1513 + spin_lock(&xfs_buftarg_lock); 1514 + list_del(&btp->bt_list); 1515 + spin_unlock(&xfs_buftarg_lock); 1516 } 1517 1518 void ··· 1499 { 1500 xfs_flush_buftarg(btp, 1); 1501 if (external) 1502 + xfs_blkdev_put(btp->bt_bdev); 1503 xfs_free_bufhash(btp); 1504 + iput(btp->bt_mapping->host); 1505 + 1506 + /* Unregister the buftarg first so that we don't get a 1507 + * wakeup finding a non-existent task 1508 + */ 1509 + xfs_unregister_buftarg(btp); 1510 + kthread_stop(btp->bt_task); 1511 + 1512 kmem_free(btp, sizeof(*btp)); 1513 } 1514 ··· 1512 unsigned int sectorsize, 1513 int verbose) 1514 { 1515 + btp->bt_bsize = blocksize; 1516 + btp->bt_sshift = ffs(sectorsize) - 1; 1517 + btp->bt_smask = sectorsize - 1; 1518 1519 + if (set_blocksize(btp->bt_bdev, sectorsize)) { 1520 printk(KERN_WARNING 1521 "XFS: Cannot set_blocksize to %u on device %s\n", 1522 sectorsize, XFS_BUFTARG_NAME(btp)); ··· 1536 } 1537 1538 /* 1539 + * When allocating the initial buffer target we have not yet 1540 + * read in the superblock, so don't know what sized sectors 1541 + * are being used is at this early stage. Play safe. 1542 + */ 1543 STATIC int 1544 xfs_setsize_buftarg_early( 1545 xfs_buftarg_t *btp, ··· 1587 mapping->a_ops = &mapping_aops; 1588 mapping->backing_dev_info = bdi; 1589 mapping_set_gfp_mask(mapping, GFP_NOFS); 1590 + btp->bt_mapping = mapping; 1591 return 0; 1592 + } 1593 + 1594 + STATIC int 1595 + xfs_alloc_delwrite_queue( 1596 + xfs_buftarg_t *btp) 1597 + { 1598 + int error = 0; 1599 + 1600 + INIT_LIST_HEAD(&btp->bt_list); 1601 + INIT_LIST_HEAD(&btp->bt_delwrite_queue); 1602 + spinlock_init(&btp->bt_delwrite_lock, "delwri_lock"); 1603 + btp->bt_flags = 0; 1604 + btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd"); 1605 + if (IS_ERR(btp->bt_task)) { 1606 + error = PTR_ERR(btp->bt_task); 1607 + goto out_error; 1608 + } 1609 + xfs_register_buftarg(btp); 1610 + out_error: 1611 + return error; 1612 } 1613 1614 xfs_buftarg_t * ··· 1600 1601 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); 1602 1603 + btp->bt_dev = bdev->bd_dev; 1604 + btp->bt_bdev = bdev; 1605 if (xfs_setsize_buftarg_early(btp, bdev)) 1606 goto error; 1607 if (xfs_mapping_buftarg(btp, bdev)) 1608 + goto error; 1609 + if (xfs_alloc_delwrite_queue(btp)) 1610 goto error; 1611 xfs_alloc_bufhash(btp, external); 1612 return btp; ··· 1616 1617 1618 /* 1619 + * Delayed write buffer handling 1620 */ 1621 STATIC void 1622 + xfs_buf_delwri_queue( 1623 + xfs_buf_t *bp, 1624 int unlock) 1625 { 1626 + struct list_head *dwq = &bp->b_target->bt_delwrite_queue; 1627 + spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; 1628 1629 + XB_TRACE(bp, "delwri_q", (long)unlock); 1630 + ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); 1631 + 1632 + spin_lock(dwlk); 1633 /* If already in the queue, dequeue and place at tail */ 1634 + if (!list_empty(&bp->b_list)) { 1635 + ASSERT(bp->b_flags & _XBF_DELWRI_Q); 1636 + if (unlock) 1637 + atomic_dec(&bp->b_hold); 1638 + list_del(&bp->b_list); 1639 } 1640 1641 + bp->b_flags |= _XBF_DELWRI_Q; 1642 + list_add_tail(&bp->b_list, dwq); 1643 + bp->b_queuetime = jiffies; 1644 + spin_unlock(dwlk); 1645 1646 if (unlock) 1647 + xfs_buf_unlock(bp); 1648 } 1649 1650 void 1651 + xfs_buf_delwri_dequeue( 1652 + xfs_buf_t *bp) 1653 { 1654 + spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; 1655 int dequeued = 0; 1656 1657 + spin_lock(dwlk); 1658 + if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { 1659 + ASSERT(bp->b_flags & _XBF_DELWRI_Q); 1660 + list_del_init(&bp->b_list); 1661 dequeued = 1; 1662 } 1663 + bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); 1664 + spin_unlock(dwlk); 1665 1666 if (dequeued) 1667 + xfs_buf_rele(bp); 1668 1669 + XB_TRACE(bp, "delwri_dq", (long)dequeued); 1670 } 1671 1672 STATIC void 1673 + xfs_buf_runall_queues( 1674 struct workqueue_struct *queue) 1675 { 1676 flush_workqueue(queue); 1677 } 1678 1679 STATIC int 1680 xfsbufd_wakeup( 1681 int priority, 1682 gfp_t mask) 1683 { 1684 + xfs_buftarg_t *btp; 1685 + 1686 + spin_lock(&xfs_buftarg_lock); 1687 + list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { 1688 + if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) 1689 + continue; 1690 + set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); 1691 + wake_up_process(btp->bt_task); 1692 + } 1693 + spin_unlock(&xfs_buftarg_lock); 1694 return 0; 1695 } 1696 ··· 1702 { 1703 struct list_head tmp; 1704 unsigned long age; 1705 + xfs_buftarg_t *target = (xfs_buftarg_t *)data; 1706 + xfs_buf_t *bp, *n; 1707 + struct list_head *dwq = &target->bt_delwrite_queue; 1708 + spinlock_t *dwlk = &target->bt_delwrite_lock; 1709 1710 current->flags |= PF_MEMALLOC; 1711 1712 INIT_LIST_HEAD(&tmp); 1713 do { 1714 if (unlikely(freezing(current))) { 1715 + set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1716 refrigerator(); 1717 } else { 1718 + clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1719 } 1720 1721 schedule_timeout_interruptible( 1722 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1723 1724 age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1725 + spin_lock(dwlk); 1726 + list_for_each_entry_safe(bp, n, dwq, b_list) { 1727 + XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp)); 1728 + ASSERT(bp->b_flags & XBF_DELWRI); 1729 1730 + if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { 1731 + if (!test_bit(XBT_FORCE_FLUSH, 1732 + &target->bt_flags) && 1733 time_before(jiffies, 1734 + bp->b_queuetime + age)) { 1735 + xfs_buf_unlock(bp); 1736 break; 1737 } 1738 1739 + bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); 1740 + bp->b_flags |= XBF_WRITE; 1741 + list_move(&bp->b_list, &tmp); 1742 } 1743 } 1744 + spin_unlock(dwlk); 1745 1746 while (!list_empty(&tmp)) { 1747 + bp = list_entry(tmp.next, xfs_buf_t, b_list); 1748 + ASSERT(target == bp->b_target); 1749 1750 + list_del_init(&bp->b_list); 1751 + xfs_buf_iostrategy(bp); 1752 1753 + blk_run_address_space(target->bt_mapping); 1754 } 1755 1756 if (as_list_len > 0) 1757 purge_addresses(); 1758 1759 + clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); 1760 } while (!kthread_should_stop()); 1761 1762 return 0; 1763 } 1764 1765 /* 1766 + * Go through all incore buffers, and release buffers if they belong to 1767 + * the given device. This is used in filesystem error handling to 1768 + * preserve the consistency of its metadata. 1769 */ 1770 int 1771 xfs_flush_buftarg( ··· 1770 int wait) 1771 { 1772 struct list_head tmp; 1773 + xfs_buf_t *bp, *n; 1774 int pincount = 0; 1775 + struct list_head *dwq = &target->bt_delwrite_queue; 1776 + spinlock_t *dwlk = &target->bt_delwrite_lock; 1777 1778 + xfs_buf_runall_queues(xfsdatad_workqueue); 1779 + xfs_buf_runall_queues(xfslogd_workqueue); 1780 1781 INIT_LIST_HEAD(&tmp); 1782 + spin_lock(dwlk); 1783 + list_for_each_entry_safe(bp, n, dwq, b_list) { 1784 + ASSERT(bp->b_target == target); 1785 + ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q)); 1786 + XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp)); 1787 + if (xfs_buf_ispin(bp)) { 1788 pincount++; 1789 continue; 1790 } 1791 1792 + list_move(&bp->b_list, &tmp); 1793 } 1794 + spin_unlock(dwlk); 1795 1796 /* 1797 * Dropped the delayed write list lock, now walk the temporary list 1798 */ 1799 + list_for_each_entry_safe(bp, n, &tmp, b_list) { 1800 + xfs_buf_lock(bp); 1801 + bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); 1802 + bp->b_flags |= XBF_WRITE; 1803 if (wait) 1804 + bp->b_flags &= ~XBF_ASYNC; 1805 else 1806 + list_del_init(&bp->b_list); 1807 1808 + xfs_buf_iostrategy(bp); 1809 } 1810 1811 /* 1812 * Remaining list items must be flushed before returning 1813 */ 1814 while (!list_empty(&tmp)) { 1815 + bp = list_entry(tmp.next, xfs_buf_t, b_list); 1816 1817 + list_del_init(&bp->b_list); 1818 + xfs_iowait(bp); 1819 + xfs_buf_relse(bp); 1820 } 1821 1822 if (wait) 1823 + blk_run_address_space(target->bt_mapping); 1824 1825 return pincount; 1826 } 1827 1828 int __init 1829 + xfs_buf_init(void) 1830 { 1831 int error = -ENOMEM; 1832 1833 + #ifdef XFS_BUF_TRACE 1834 + xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP); 1835 #endif 1836 1837 + xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf"); 1838 + if (!xfs_buf_zone) 1839 goto out_free_trace_buf; 1840 1841 xfslogd_workqueue = create_workqueue("xfslogd"); ··· 1847 if (!xfsdatad_workqueue) 1848 goto out_destroy_xfslogd_workqueue; 1849 1850 + xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup); 1851 + if (!xfs_buf_shake) 1852 goto out_destroy_xfsdatad_workqueue; 1853 1854 return 0; 1855 1856 out_destroy_xfsdatad_workqueue: 1857 destroy_workqueue(xfsdatad_workqueue); 1858 out_destroy_xfslogd_workqueue: 1859 destroy_workqueue(xfslogd_workqueue); 1860 out_free_buf_zone: 1861 + kmem_zone_destroy(xfs_buf_zone); 1862 out_free_trace_buf: 1863 + #ifdef XFS_BUF_TRACE 1864 + ktrace_free(xfs_buf_trace_buf); 1865 #endif 1866 return error; 1867 } 1868 1869 void 1870 + xfs_buf_terminate(void) 1871 { 1872 + kmem_shake_deregister(xfs_buf_shake); 1873 destroy_workqueue(xfsdatad_workqueue); 1874 destroy_workqueue(xfslogd_workqueue); 1875 + kmem_zone_destroy(xfs_buf_zone); 1876 + #ifdef XFS_BUF_TRACE 1877 + ktrace_free(xfs_buf_trace_buf); 1878 #endif 1879 }

+262 -386

fs/xfs/linux-2.6/xfs_buf.h

··· 32 * Base types 33 */ 34 35 - #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) 36 37 - #define page_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) 38 - #define page_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) 39 - #define page_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) 40 - #define page_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) 41 42 - typedef enum page_buf_rw_e { 43 - PBRW_READ = 1, /* transfer into target memory */ 44 - PBRW_WRITE = 2, /* transfer from target memory */ 45 - PBRW_ZERO = 3 /* Zero target memory */ 46 - } page_buf_rw_t; 47 48 - 49 - typedef enum page_buf_flags_e { /* pb_flags values */ 50 - PBF_READ = (1 << 0), /* buffer intended for reading from device */ 51 - PBF_WRITE = (1 << 1), /* buffer intended for writing to device */ 52 - PBF_MAPPED = (1 << 2), /* buffer mapped (pb_addr valid) */ 53 - PBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ 54 - PBF_DONE = (1 << 5), /* all pages in the buffer uptodate */ 55 - PBF_DELWRI = (1 << 6), /* buffer has dirty pages */ 56 - PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ 57 - PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ 58 - PBF_ORDERED = (1 << 11), /* use ordered writes */ 59 - PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ 60 61 /* flags used only as arguments to access routines */ 62 - PBF_LOCK = (1 << 14), /* lock requested */ 63 - PBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ 64 - PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ 65 66 /* flags used only internally */ 67 - _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ 68 - _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ 69 - _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ 70 - _PBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ 71 - } page_buf_flags_t; 72 73 74 typedef struct xfs_bufhash { 75 struct list_head bh_list; ··· 80 } xfs_bufhash_t; 81 82 typedef struct xfs_buftarg { 83 - dev_t pbr_dev; 84 - struct block_device *pbr_bdev; 85 - struct address_space *pbr_mapping; 86 - unsigned int pbr_bsize; 87 - unsigned int pbr_sshift; 88 - size_t pbr_smask; 89 90 - /* per-device buffer hash table */ 91 uint bt_hashmask; 92 uint bt_hashshift; 93 xfs_bufhash_t *bt_hash; 94 } xfs_buftarg_t; 95 96 /* 97 - * xfs_buf_t: Buffer structure for page cache-based buffers 98 * 99 - * This buffer structure is used by the page cache buffer management routines 100 - * to refer to an assembly of pages forming a logical buffer. The actual I/O 101 - * is performed with buffer_head structures, as required by drivers. 102 - * 103 - * The buffer structure is used on temporary basis only, and discarded when 104 - * released. The real data storage is recorded in the page cache. Metadata is 105 * hashed to the block device on which the file system resides. 106 */ 107 108 struct xfs_buf; 109 110 - /* call-back function on I/O completion */ 111 - typedef void (*page_buf_iodone_t)(struct xfs_buf *); 112 - /* call-back function on I/O completion */ 113 - typedef void (*page_buf_relse_t)(struct xfs_buf *); 114 - /* pre-write function */ 115 - typedef int (*page_buf_bdstrat_t)(struct xfs_buf *); 116 - 117 - #define PB_PAGES 2 118 119 typedef struct xfs_buf { 120 - struct semaphore pb_sema; /* semaphore for lockables */ 121 - unsigned long pb_queuetime; /* time buffer was queued */ 122 - atomic_t pb_pin_count; /* pin count */ 123 - wait_queue_head_t pb_waiters; /* unpin waiters */ 124 - struct list_head pb_list; 125 - page_buf_flags_t pb_flags; /* status flags */ 126 - struct list_head pb_hash_list; /* hash table list */ 127 - xfs_bufhash_t *pb_hash; /* hash table list start */ 128 - xfs_buftarg_t *pb_target; /* buffer target (device) */ 129 - atomic_t pb_hold; /* reference count */ 130 - xfs_daddr_t pb_bn; /* block number for I/O */ 131 - loff_t pb_file_offset; /* offset in file */ 132 - size_t pb_buffer_length; /* size of buffer in bytes */ 133 - size_t pb_count_desired; /* desired transfer size */ 134 - void *pb_addr; /* virtual address of buffer */ 135 - struct work_struct pb_iodone_work; 136 - atomic_t pb_io_remaining;/* #outstanding I/O requests */ 137 - page_buf_iodone_t pb_iodone; /* I/O completion function */ 138 - page_buf_relse_t pb_relse; /* releasing function */ 139 - page_buf_bdstrat_t pb_strat; /* pre-write function */ 140 - struct semaphore pb_iodonesema; /* Semaphore for I/O waiters */ 141 - void *pb_fspriv; 142 - void *pb_fspriv2; 143 - void *pb_fspriv3; 144 - unsigned short pb_error; /* error code on I/O */ 145 - unsigned short pb_locked; /* page array is locked */ 146 - unsigned int pb_page_count; /* size of page array */ 147 - unsigned int pb_offset; /* page offset in first page */ 148 - struct page **pb_pages; /* array of page pointers */ 149 - struct page *pb_page_array[PB_PAGES]; /* inline pages */ 150 - #ifdef PAGEBUF_LOCK_TRACKING 151 - int pb_last_holder; 152 #endif 153 } xfs_buf_t; 154 155 156 /* Finding and Reading Buffers */ 157 - 158 - extern xfs_buf_t *_pagebuf_find( /* find buffer for block if */ 159 - /* the block is in memory */ 160 - xfs_buftarg_t *, /* inode for block */ 161 - loff_t, /* starting offset of range */ 162 - size_t, /* length of range */ 163 - page_buf_flags_t, /* PBF_LOCK */ 164 - xfs_buf_t *); /* newly allocated buffer */ 165 - 166 #define xfs_incore(buftarg,blkno,len,lockit) \ 167 - _pagebuf_find(buftarg, blkno ,len, lockit, NULL) 168 169 - extern xfs_buf_t *xfs_buf_get_flags( /* allocate a buffer */ 170 - xfs_buftarg_t *, /* inode for buffer */ 171 - loff_t, /* starting offset of range */ 172 - size_t, /* length of range */ 173 - page_buf_flags_t); /* PBF_LOCK, PBF_READ, */ 174 - /* PBF_ASYNC */ 175 - 176 #define xfs_buf_get(target, blkno, len, flags) \ 177 - xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) 178 179 - extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */ 180 - xfs_buftarg_t *, /* inode for buffer */ 181 - loff_t, /* starting offset of range */ 182 - size_t, /* length of range */ 183 - page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC */ 184 - 185 #define xfs_buf_read(target, blkno, len, flags) \ 186 - xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) 187 188 - extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */ 189 - /* no memory or disk address */ 190 - size_t len, 191 - xfs_buftarg_t *); /* mount point "fake" inode */ 192 - 193 - extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct */ 194 - /* without disk address */ 195 - size_t len, 196 - xfs_buftarg_t *); /* mount point "fake" inode */ 197 - 198 - extern int pagebuf_associate_memory( 199 - xfs_buf_t *, 200 - void *, 201 - size_t); 202 - 203 - extern void pagebuf_hold( /* increment reference count */ 204 - xfs_buf_t *); /* buffer to hold */ 205 - 206 - extern void pagebuf_readahead( /* read ahead into cache */ 207 - xfs_buftarg_t *, /* target for buffer (or NULL) */ 208 - loff_t, /* starting offset of range */ 209 - size_t, /* length of range */ 210 - page_buf_flags_t); /* additional read flags */ 211 212 /* Releasing Buffers */ 213 - 214 - extern void pagebuf_free( /* deallocate a buffer */ 215 - xfs_buf_t *); /* buffer to deallocate */ 216 - 217 - extern void pagebuf_rele( /* release hold on a buffer */ 218 - xfs_buf_t *); /* buffer to release */ 219 220 /* Locking and Unlocking Buffers */ 221 - 222 - extern int pagebuf_cond_lock( /* lock buffer, if not locked */ 223 - /* (returns -EBUSY if locked) */ 224 - xfs_buf_t *); /* buffer to lock */ 225 - 226 - extern int pagebuf_lock_value( /* return count on lock */ 227 - xfs_buf_t *); /* buffer to check */ 228 - 229 - extern int pagebuf_lock( /* lock buffer */ 230 - xfs_buf_t *); /* buffer to lock */ 231 - 232 - extern void pagebuf_unlock( /* unlock buffer */ 233 - xfs_buf_t *); /* buffer to unlock */ 234 235 /* Buffer Read and Write Routines */ 236 237 - extern void pagebuf_iodone( /* mark buffer I/O complete */ 238 - xfs_buf_t *, /* buffer to mark */ 239 - int); /* run completion locally, or in 240 - * a helper thread. */ 241 - 242 - extern void pagebuf_ioerror( /* mark buffer in error (or not) */ 243 - xfs_buf_t *, /* buffer to mark */ 244 - int); /* error to store (0 if none) */ 245 - 246 - extern int pagebuf_iostart( /* start I/O on a buffer */ 247 - xfs_buf_t *, /* buffer to start */ 248 - page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC, */ 249 - /* PBF_READ, PBF_WRITE, */ 250 - /* PBF_DELWRI */ 251 - 252 - extern int pagebuf_iorequest( /* start real I/O */ 253 - xfs_buf_t *); /* buffer to convey to device */ 254 - 255 - extern int pagebuf_iowait( /* wait for buffer I/O done */ 256 - xfs_buf_t *); /* buffer to wait on */ 257 - 258 - extern void pagebuf_iomove( /* move data in/out of pagebuf */ 259 - xfs_buf_t *, /* buffer to manipulate */ 260 - size_t, /* starting buffer offset */ 261 - size_t, /* length in buffer */ 262 - caddr_t, /* data pointer */ 263 - page_buf_rw_t); /* direction */ 264 - 265 - static inline int pagebuf_iostrategy(xfs_buf_t *pb) 266 { 267 - return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb); 268 } 269 270 - static inline int pagebuf_geterror(xfs_buf_t *pb) 271 { 272 - return pb ? pb->pb_error : ENOMEM; 273 } 274 275 /* Buffer Utility Routines */ 276 - 277 - extern caddr_t pagebuf_offset( /* pointer at offset in buffer */ 278 - xfs_buf_t *, /* buffer to offset into */ 279 - size_t); /* offset */ 280 281 /* Pinning Buffer Storage in Memory */ 282 - 283 - extern void pagebuf_pin( /* pin buffer in memory */ 284 - xfs_buf_t *); /* buffer to pin */ 285 - 286 - extern void pagebuf_unpin( /* unpin buffered data */ 287 - xfs_buf_t *); /* buffer to unpin */ 288 - 289 - extern int pagebuf_ispin( /* check if buffer is pinned */ 290 - xfs_buf_t *); /* buffer to check */ 291 292 /* Delayed Write Buffer Routines */ 293 - 294 - extern void pagebuf_delwri_dequeue(xfs_buf_t *); 295 296 /* Buffer Daemon Setup Routines */ 297 298 - extern int pagebuf_init(void); 299 - extern void pagebuf_terminate(void); 300 - 301 - 302 - #ifdef PAGEBUF_TRACE 303 - extern ktrace_t *pagebuf_trace_buf; 304 - extern void pagebuf_trace( 305 - xfs_buf_t *, /* buffer being traced */ 306 - char *, /* description of operation */ 307 - void *, /* arbitrary diagnostic value */ 308 - void *); /* return address */ 309 #else 310 - # define pagebuf_trace(pb, id, ptr, ra) do { } while (0) 311 #endif 312 313 - #define pagebuf_target_name(target) \ 314 - ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; }) 315 316 317 318 - /* These are just for xfs_syncsub... it sets an internal variable 319 - * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t 320 - */ 321 - #define XFS_B_ASYNC PBF_ASYNC 322 - #define XFS_B_DELWRI PBF_DELWRI 323 - #define XFS_B_READ PBF_READ 324 - #define XFS_B_WRITE PBF_WRITE 325 - #define XFS_B_STALE PBF_STALE 326 327 - #define XFS_BUF_TRYLOCK PBF_TRYLOCK 328 - #define XFS_INCORE_TRYLOCK PBF_TRYLOCK 329 - #define XFS_BUF_LOCK PBF_LOCK 330 - #define XFS_BUF_MAPPED PBF_MAPPED 331 332 - #define BUF_BUSY PBF_DONT_BLOCK 333 334 - #define XFS_BUF_BFLAGS(x) ((x)->pb_flags) 335 - #define XFS_BUF_ZEROFLAGS(x) \ 336 - ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI)) 337 - 338 - #define XFS_BUF_STALE(x) ((x)->pb_flags |= XFS_B_STALE) 339 - #define XFS_BUF_UNSTALE(x) ((x)->pb_flags &= ~XFS_B_STALE) 340 - #define XFS_BUF_ISSTALE(x) ((x)->pb_flags & XFS_B_STALE) 341 - #define XFS_BUF_SUPER_STALE(x) do { \ 342 - XFS_BUF_STALE(x); \ 343 - pagebuf_delwri_dequeue(x); \ 344 - XFS_BUF_DONE(x); \ 345 } while (0) 346 347 - #define XFS_BUF_MANAGE PBF_FS_MANAGED 348 - #define XFS_BUF_UNMANAGE(x) ((x)->pb_flags &= ~PBF_FS_MANAGED) 349 350 - #define XFS_BUF_DELAYWRITE(x) ((x)->pb_flags |= PBF_DELWRI) 351 - #define XFS_BUF_UNDELAYWRITE(x) pagebuf_delwri_dequeue(x) 352 - #define XFS_BUF_ISDELAYWRITE(x) ((x)->pb_flags & PBF_DELWRI) 353 354 - #define XFS_BUF_ERROR(x,no) pagebuf_ioerror(x,no) 355 - #define XFS_BUF_GETERROR(x) pagebuf_geterror(x) 356 - #define XFS_BUF_ISERROR(x) (pagebuf_geterror(x)?1:0) 357 358 - #define XFS_BUF_DONE(x) ((x)->pb_flags |= PBF_DONE) 359 - #define XFS_BUF_UNDONE(x) ((x)->pb_flags &= ~PBF_DONE) 360 - #define XFS_BUF_ISDONE(x) ((x)->pb_flags & PBF_DONE) 361 362 - #define XFS_BUF_BUSY(x) do { } while (0) 363 - #define XFS_BUF_UNBUSY(x) do { } while (0) 364 - #define XFS_BUF_ISBUSY(x) (1) 365 366 - #define XFS_BUF_ASYNC(x) ((x)->pb_flags |= PBF_ASYNC) 367 - #define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC) 368 - #define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC) 369 370 - #define XFS_BUF_ORDERED(x) ((x)->pb_flags |= PBF_ORDERED) 371 - #define XFS_BUF_UNORDERED(x) ((x)->pb_flags &= ~PBF_ORDERED) 372 - #define XFS_BUF_ISORDERED(x) ((x)->pb_flags & PBF_ORDERED) 373 374 - #define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n") 375 - #define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n") 376 - #define XFS_BUF_ISSHUT(x) (0) 377 378 - #define XFS_BUF_HOLD(x) pagebuf_hold(x) 379 - #define XFS_BUF_READ(x) ((x)->pb_flags |= PBF_READ) 380 - #define XFS_BUF_UNREAD(x) ((x)->pb_flags &= ~PBF_READ) 381 - #define XFS_BUF_ISREAD(x) ((x)->pb_flags & PBF_READ) 382 383 - #define XFS_BUF_WRITE(x) ((x)->pb_flags |= PBF_WRITE) 384 - #define XFS_BUF_UNWRITE(x) ((x)->pb_flags &= ~PBF_WRITE) 385 - #define XFS_BUF_ISWRITE(x) ((x)->pb_flags & PBF_WRITE) 386 387 - #define XFS_BUF_ISUNINITIAL(x) (0) 388 - #define XFS_BUF_UNUNINITIAL(x) (0) 389 390 - #define XFS_BUF_BP_ISMAPPED(bp) 1 391 392 - #define XFS_BUF_IODONE_FUNC(buf) (buf)->pb_iodone 393 - #define XFS_BUF_SET_IODONE_FUNC(buf, func) \ 394 - (buf)->pb_iodone = (func) 395 - #define XFS_BUF_CLR_IODONE_FUNC(buf) \ 396 - (buf)->pb_iodone = NULL 397 - #define XFS_BUF_SET_BDSTRAT_FUNC(buf, func) \ 398 - (buf)->pb_strat = (func) 399 - #define XFS_BUF_CLR_BDSTRAT_FUNC(buf) \ 400 - (buf)->pb_strat = NULL 401 402 - #define XFS_BUF_FSPRIVATE(buf, type) \ 403 - ((type)(buf)->pb_fspriv) 404 - #define XFS_BUF_SET_FSPRIVATE(buf, value) \ 405 - (buf)->pb_fspriv = (void *)(value) 406 - #define XFS_BUF_FSPRIVATE2(buf, type) \ 407 - ((type)(buf)->pb_fspriv2) 408 - #define XFS_BUF_SET_FSPRIVATE2(buf, value) \ 409 - (buf)->pb_fspriv2 = (void *)(value) 410 - #define XFS_BUF_FSPRIVATE3(buf, type) \ 411 - ((type)(buf)->pb_fspriv3) 412 - #define XFS_BUF_SET_FSPRIVATE3(buf, value) \ 413 - (buf)->pb_fspriv3 = (void *)(value) 414 - #define XFS_BUF_SET_START(buf) 415 416 - #define XFS_BUF_SET_BRELSE_FUNC(buf, value) \ 417 - (buf)->pb_relse = (value) 418 419 - #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr) 420 421 - static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) 422 { 423 - if (bp->pb_flags & PBF_MAPPED) 424 - return XFS_BUF_PTR(bp) + offset; 425 - return (xfs_caddr_t) pagebuf_offset(bp, offset); 426 } 427 428 - #define XFS_BUF_SET_PTR(bp, val, count) \ 429 - pagebuf_associate_memory(bp, val, count) 430 - #define XFS_BUF_ADDR(bp) ((bp)->pb_bn) 431 - #define XFS_BUF_SET_ADDR(bp, blk) \ 432 - ((bp)->pb_bn = (xfs_daddr_t)(blk)) 433 - #define XFS_BUF_OFFSET(bp) ((bp)->pb_file_offset) 434 - #define XFS_BUF_SET_OFFSET(bp, off) \ 435 - ((bp)->pb_file_offset = (off)) 436 - #define XFS_BUF_COUNT(bp) ((bp)->pb_count_desired) 437 - #define XFS_BUF_SET_COUNT(bp, cnt) \ 438 - ((bp)->pb_count_desired = (cnt)) 439 - #define XFS_BUF_SIZE(bp) ((bp)->pb_buffer_length) 440 - #define XFS_BUF_SET_SIZE(bp, cnt) \ 441 - ((bp)->pb_buffer_length = (cnt)) 442 - #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) 443 - #define XFS_BUF_SET_VTYPE(bp, type) 444 - #define XFS_BUF_SET_REF(bp, ref) 445 - 446 - #define XFS_BUF_ISPINNED(bp) pagebuf_ispin(bp) 447 - 448 - #define XFS_BUF_VALUSEMA(bp) pagebuf_lock_value(bp) 449 - #define XFS_BUF_CPSEMA(bp) (pagebuf_cond_lock(bp) == 0) 450 - #define XFS_BUF_VSEMA(bp) pagebuf_unlock(bp) 451 - #define XFS_BUF_PSEMA(bp,x) pagebuf_lock(bp) 452 - #define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema); 453 - 454 - /* setup the buffer target from a buftarg structure */ 455 - #define XFS_BUF_SET_TARGET(bp, target) \ 456 - (bp)->pb_target = (target) 457 - #define XFS_BUF_TARGET(bp) ((bp)->pb_target) 458 - #define XFS_BUFTARG_NAME(target) \ 459 - pagebuf_target_name(target) 460 - 461 - #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) 462 - #define XFS_BUF_SET_VTYPE(bp, type) 463 - #define XFS_BUF_SET_REF(bp, ref) 464 - 465 - static inline int xfs_bawrite(void *mp, xfs_buf_t *bp) 466 { 467 - bp->pb_fspriv3 = mp; 468 - bp->pb_strat = xfs_bdstrat_cb; 469 - pagebuf_delwri_dequeue(bp); 470 - return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES); 471 } 472 473 - static inline void xfs_buf_relse(xfs_buf_t *bp) 474 - { 475 - if (!bp->pb_relse) 476 - pagebuf_unlock(bp); 477 - pagebuf_rele(bp); 478 - } 479 - 480 - #define xfs_bpin(bp) pagebuf_pin(bp) 481 - #define xfs_bunpin(bp) pagebuf_unpin(bp) 482 483 #define xfs_buftrace(id, bp) \ 484 - pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0)) 485 486 - #define xfs_biodone(pb) \ 487 - pagebuf_iodone(pb, 0) 488 489 - #define xfs_biomove(pb, off, len, data, rw) \ 490 - pagebuf_iomove((pb), (off), (len), (data), \ 491 - ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ) 492 493 - #define xfs_biozero(pb, off, len) \ 494 - pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO) 495 496 497 - static inline int XFS_bwrite(xfs_buf_t *pb) 498 { 499 - int iowait = (pb->pb_flags & PBF_ASYNC) == 0; 500 int error = 0; 501 502 if (!iowait) 503 - pb->pb_flags |= _PBF_RUN_QUEUES; 504 505 - pagebuf_delwri_dequeue(pb); 506 - pagebuf_iostrategy(pb); 507 if (iowait) { 508 - error = pagebuf_iowait(pb); 509 - xfs_buf_relse(pb); 510 } 511 return error; 512 } 513 514 - #define XFS_bdwrite(pb) \ 515 - pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC) 516 517 static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp) 518 { 519 - bp->pb_strat = xfs_bdstrat_cb; 520 - bp->pb_fspriv3 = mp; 521 - 522 - return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC); 523 } 524 525 - #define XFS_bdstrat(bp) pagebuf_iorequest(bp) 526 527 - #define xfs_iowait(pb) pagebuf_iowait(pb) 528 529 #define xfs_baread(target, rablkno, ralen) \ 530 - pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK) 531 - 532 - #define xfs_buf_get_empty(len, target) pagebuf_get_empty((len), (target)) 533 - #define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target)) 534 - #define xfs_buf_free(bp) pagebuf_free(bp) 535 536 537 /* 538 * Handling of buftargs. 539 */ 540 - 541 extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); 542 extern void xfs_free_buftarg(xfs_buftarg_t *, int); 543 extern void xfs_wait_buftarg(xfs_buftarg_t *); 544 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 545 extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 546 547 - #define xfs_getsize_buftarg(buftarg) \ 548 - block_size((buftarg)->pbr_bdev) 549 - #define xfs_readonly_buftarg(buftarg) \ 550 - bdev_read_only((buftarg)->pbr_bdev) 551 - #define xfs_binval(buftarg) \ 552 - xfs_flush_buftarg(buftarg, 1) 553 - #define XFS_bflush(buftarg) \ 554 - xfs_flush_buftarg(buftarg, 1) 555 556 #endif /* __XFS_BUF_H__ */

··· 32 * Base types 33 */ 34 35 + #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) 36 37 + #define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) 38 + #define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) 39 + #define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) 40 + #define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) 41 42 + typedef enum { 43 + XBRW_READ = 1, /* transfer into target memory */ 44 + XBRW_WRITE = 2, /* transfer from target memory */ 45 + XBRW_ZERO = 3, /* Zero target memory */ 46 + } xfs_buf_rw_t; 47 48 + typedef enum { 49 + XBF_READ = (1 << 0), /* buffer intended for reading from device */ 50 + XBF_WRITE = (1 << 1), /* buffer intended for writing to device */ 51 + XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */ 52 + XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ 53 + XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */ 54 + XBF_DELWRI = (1 << 6), /* buffer has dirty pages */ 55 + XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ 56 + XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ 57 + XBF_ORDERED = (1 << 11), /* use ordered writes */ 58 + XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ 59 60 /* flags used only as arguments to access routines */ 61 + XBF_LOCK = (1 << 14), /* lock requested */ 62 + XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ 63 + XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ 64 65 /* flags used only internally */ 66 + _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ 67 + _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ 68 + _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ 69 + _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ 70 + } xfs_buf_flags_t; 71 72 + typedef enum { 73 + XBT_FORCE_SLEEP = (0 << 1), 74 + XBT_FORCE_FLUSH = (1 << 1), 75 + } xfs_buftarg_flags_t; 76 77 typedef struct xfs_bufhash { 78 struct list_head bh_list; ··· 77 } xfs_bufhash_t; 78 79 typedef struct xfs_buftarg { 80 + dev_t bt_dev; 81 + struct block_device *bt_bdev; 82 + struct address_space *bt_mapping; 83 + unsigned int bt_bsize; 84 + unsigned int bt_sshift; 85 + size_t bt_smask; 86 87 + /* per device buffer hash table */ 88 uint bt_hashmask; 89 uint bt_hashshift; 90 xfs_bufhash_t *bt_hash; 91 + 92 + /* per device delwri queue */ 93 + struct task_struct *bt_task; 94 + struct list_head bt_list; 95 + struct list_head bt_delwrite_queue; 96 + spinlock_t bt_delwrite_lock; 97 + unsigned long bt_flags; 98 } xfs_buftarg_t; 99 100 /* 101 + * xfs_buf_t: Buffer structure for pagecache-based buffers 102 * 103 + * This buffer structure is used by the pagecache buffer management routines 104 + * to refer to an assembly of pages forming a logical buffer. 105 + * 106 + * The buffer structure is used on a temporary basis only, and discarded when 107 + * released. The real data storage is recorded in the pagecache. Buffers are 108 * hashed to the block device on which the file system resides. 109 */ 110 111 struct xfs_buf; 112 + typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); 113 + typedef void (*xfs_buf_relse_t)(struct xfs_buf *); 114 + typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); 115 116 + #define XB_PAGES 2 117 118 typedef struct xfs_buf { 119 + struct semaphore b_sema; /* semaphore for lockables */ 120 + unsigned long b_queuetime; /* time buffer was queued */ 121 + atomic_t b_pin_count; /* pin count */ 122 + wait_queue_head_t b_waiters; /* unpin waiters */ 123 + struct list_head b_list; 124 + xfs_buf_flags_t b_flags; /* status flags */ 125 + struct list_head b_hash_list; /* hash table list */ 126 + xfs_bufhash_t *b_hash; /* hash table list start */ 127 + xfs_buftarg_t *b_target; /* buffer target (device) */ 128 + atomic_t b_hold; /* reference count */ 129 + xfs_daddr_t b_bn; /* block number for I/O */ 130 + xfs_off_t b_file_offset; /* offset in file */ 131 + size_t b_buffer_length;/* size of buffer in bytes */ 132 + size_t b_count_desired;/* desired transfer size */ 133 + void *b_addr; /* virtual address of buffer */ 134 + struct work_struct b_iodone_work; 135 + atomic_t b_io_remaining; /* #outstanding I/O requests */ 136 + xfs_buf_iodone_t b_iodone; /* I/O completion function */ 137 + xfs_buf_relse_t b_relse; /* releasing function */ 138 + xfs_buf_bdstrat_t b_strat; /* pre-write function */ 139 + struct semaphore b_iodonesema; /* Semaphore for I/O waiters */ 140 + void *b_fspriv; 141 + void *b_fspriv2; 142 + void *b_fspriv3; 143 + unsigned short b_error; /* error code on I/O */ 144 + unsigned short b_locked; /* page array is locked */ 145 + unsigned int b_page_count; /* size of page array */ 146 + unsigned int b_offset; /* page offset in first page */ 147 + struct page **b_pages; /* array of page pointers */ 148 + struct page *b_page_array[XB_PAGES]; /* inline pages */ 149 + #ifdef XFS_BUF_LOCK_TRACKING 150 + int b_last_holder; 151 #endif 152 } xfs_buf_t; 153 154 155 /* Finding and Reading Buffers */ 156 + extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t, 157 + xfs_buf_flags_t, xfs_buf_t *); 158 #define xfs_incore(buftarg,blkno,len,lockit) \ 159 + _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) 160 161 + extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t, 162 + xfs_buf_flags_t); 163 #define xfs_buf_get(target, blkno, len, flags) \ 164 + xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED) 165 166 + extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t, 167 + xfs_buf_flags_t); 168 #define xfs_buf_read(target, blkno, len, flags) \ 169 + xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED) 170 171 + extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); 172 + extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); 173 + extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); 174 + extern void xfs_buf_hold(xfs_buf_t *); 175 + extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t, 176 + xfs_buf_flags_t); 177 178 /* Releasing Buffers */ 179 + extern void xfs_buf_free(xfs_buf_t *); 180 + extern void xfs_buf_rele(xfs_buf_t *); 181 182 /* Locking and Unlocking Buffers */ 183 + extern int xfs_buf_cond_lock(xfs_buf_t *); 184 + extern int xfs_buf_lock_value(xfs_buf_t *); 185 + extern void xfs_buf_lock(xfs_buf_t *); 186 + extern void xfs_buf_unlock(xfs_buf_t *); 187 188 /* Buffer Read and Write Routines */ 189 + extern void xfs_buf_ioend(xfs_buf_t *, int); 190 + extern void xfs_buf_ioerror(xfs_buf_t *, int); 191 + extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t); 192 + extern int xfs_buf_iorequest(xfs_buf_t *); 193 + extern int xfs_buf_iowait(xfs_buf_t *); 194 + extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, 195 + xfs_buf_rw_t); 196 197 + static inline int xfs_buf_iostrategy(xfs_buf_t *bp) 198 { 199 + return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp); 200 } 201 202 + static inline int xfs_buf_geterror(xfs_buf_t *bp) 203 { 204 + return bp ? bp->b_error : ENOMEM; 205 } 206 207 /* Buffer Utility Routines */ 208 + extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); 209 210 /* Pinning Buffer Storage in Memory */ 211 + extern void xfs_buf_pin(xfs_buf_t *); 212 + extern void xfs_buf_unpin(xfs_buf_t *); 213 + extern int xfs_buf_ispin(xfs_buf_t *); 214 215 /* Delayed Write Buffer Routines */ 216 + extern void xfs_buf_delwri_dequeue(xfs_buf_t *); 217 218 /* Buffer Daemon Setup Routines */ 219 + extern int xfs_buf_init(void); 220 + extern void xfs_buf_terminate(void); 221 222 + #ifdef XFS_BUF_TRACE 223 + extern ktrace_t *xfs_buf_trace_buf; 224 + extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); 225 #else 226 + #define xfs_buf_trace(bp,id,ptr,ra) do { } while (0) 227 #endif 228 229 + #define xfs_buf_target_name(target) \ 230 + ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) 231 232 233 + #define XFS_B_ASYNC XBF_ASYNC 234 + #define XFS_B_DELWRI XBF_DELWRI 235 + #define XFS_B_READ XBF_READ 236 + #define XFS_B_WRITE XBF_WRITE 237 + #define XFS_B_STALE XBF_STALE 238 239 + #define XFS_BUF_TRYLOCK XBF_TRYLOCK 240 + #define XFS_INCORE_TRYLOCK XBF_TRYLOCK 241 + #define XFS_BUF_LOCK XBF_LOCK 242 + #define XFS_BUF_MAPPED XBF_MAPPED 243 244 + #define BUF_BUSY XBF_DONT_BLOCK 245 246 + #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) 247 + #define XFS_BUF_ZEROFLAGS(bp) \ 248 + ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI)) 249 250 + #define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) 251 + #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) 252 + #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE) 253 + #define XFS_BUF_SUPER_STALE(bp) do { \ 254 + XFS_BUF_STALE(bp); \ 255 + xfs_buf_delwri_dequeue(bp); \ 256 + XFS_BUF_DONE(bp); \ 257 } while (0) 258 259 + #define XFS_BUF_MANAGE XBF_FS_MANAGED 260 + #define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) 261 262 + #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) 263 + #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) 264 + #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) 265 266 + #define XFS_BUF_ERROR(bp,no) xfs_buf_ioerror(bp,no) 267 + #define XFS_BUF_GETERROR(bp) xfs_buf_geterror(bp) 268 + #define XFS_BUF_ISERROR(bp) (xfs_buf_geterror(bp) ? 1 : 0) 269 270 + #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) 271 + #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) 272 + #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) 273 274 + #define XFS_BUF_BUSY(bp) do { } while (0) 275 + #define XFS_BUF_UNBUSY(bp) do { } while (0) 276 + #define XFS_BUF_ISBUSY(bp) (1) 277 278 + #define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC) 279 + #define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) 280 + #define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) 281 282 + #define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED) 283 + #define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED) 284 + #define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED) 285 286 + #define XFS_BUF_SHUT(bp) do { } while (0) 287 + #define XFS_BUF_UNSHUT(bp) do { } while (0) 288 + #define XFS_BUF_ISSHUT(bp) (0) 289 290 + #define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) 291 + #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) 292 + #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) 293 + #define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ) 294 295 + #define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE) 296 + #define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) 297 + #define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) 298 299 + #define XFS_BUF_ISUNINITIAL(bp) (0) 300 + #define XFS_BUF_UNUNINITIAL(bp) (0) 301 302 + #define XFS_BUF_BP_ISMAPPED(bp) (1) 303 304 + #define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone) 305 + #define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func)) 306 + #define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL) 307 + #define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func)) 308 + #define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL) 309 310 + #define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv) 311 + #define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) 312 + #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) 313 + #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) 314 + #define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3) 315 + #define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val)) 316 + #define XFS_BUF_SET_START(bp) do { } while (0) 317 + #define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) 318 319 + #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) 320 + #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) 321 + #define XFS_BUF_ADDR(bp) ((bp)->b_bn) 322 + #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) 323 + #define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) 324 + #define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off)) 325 + #define XFS_BUF_COUNT(bp) ((bp)->b_count_desired) 326 + #define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt)) 327 + #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) 328 + #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) 329 330 + #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0) 331 + #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) 332 + #define XFS_BUF_SET_REF(bp, ref) do { } while (0) 333 334 + #define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp) 335 + 336 + #define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp) 337 + #define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) 338 + #define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp) 339 + #define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp) 340 + #define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema); 341 + 342 + #define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) 343 + #define XFS_BUF_TARGET(bp) ((bp)->b_target) 344 + #define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target) 345 + 346 + static inline int xfs_bawrite(void *mp, xfs_buf_t *bp) 347 { 348 + bp->b_fspriv3 = mp; 349 + bp->b_strat = xfs_bdstrat_cb; 350 + xfs_buf_delwri_dequeue(bp); 351 + return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); 352 } 353 354 + static inline void xfs_buf_relse(xfs_buf_t *bp) 355 { 356 + if (!bp->b_relse) 357 + xfs_buf_unlock(bp); 358 + xfs_buf_rele(bp); 359 } 360 361 + #define xfs_bpin(bp) xfs_buf_pin(bp) 362 + #define xfs_bunpin(bp) xfs_buf_unpin(bp) 363 364 #define xfs_buftrace(id, bp) \ 365 + xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0)) 366 367 + #define xfs_biodone(bp) xfs_buf_ioend(bp, 0) 368 369 + #define xfs_biomove(bp, off, len, data, rw) \ 370 + xfs_buf_iomove((bp), (off), (len), (data), \ 371 + ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ) 372 373 + #define xfs_biozero(bp, off, len) \ 374 + xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) 375 376 377 + static inline int XFS_bwrite(xfs_buf_t *bp) 378 { 379 + int iowait = (bp->b_flags & XBF_ASYNC) == 0; 380 int error = 0; 381 382 if (!iowait) 383 + bp->b_flags |= _XBF_RUN_QUEUES; 384 385 + xfs_buf_delwri_dequeue(bp); 386 + xfs_buf_iostrategy(bp); 387 if (iowait) { 388 + error = xfs_buf_iowait(bp); 389 + xfs_buf_relse(bp); 390 } 391 return error; 392 } 393 394 + #define XFS_bdwrite(bp) xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC) 395 396 static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp) 397 { 398 + bp->b_strat = xfs_bdstrat_cb; 399 + bp->b_fspriv3 = mp; 400 + return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC); 401 } 402 403 + #define XFS_bdstrat(bp) xfs_buf_iorequest(bp) 404 405 + #define xfs_iowait(bp) xfs_buf_iowait(bp) 406 407 #define xfs_baread(target, rablkno, ralen) \ 408 + xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK) 409 410 411 /* 412 * Handling of buftargs. 413 */ 414 extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); 415 extern void xfs_free_buftarg(xfs_buftarg_t *, int); 416 extern void xfs_wait_buftarg(xfs_buftarg_t *); 417 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 418 extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 419 420 + #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) 421 + #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) 422 + 423 + #define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1) 424 + #define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1) 425 426 #endif /* __XFS_BUF_H__ */

+2 -4

fs/xfs/linux-2.6/xfs_file.c

··· 509 vnode_t *vp = LINVFS_GET_VP(inode); 510 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); 511 int error = 0; 512 - bhv_desc_t *bdp; 513 xfs_inode_t *ip; 514 515 if (vp->v_vfsp->vfs_flag & VFS_DMI) { 516 - bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); 517 - if (!bdp) { 518 error = -EINVAL; 519 goto open_exec_out; 520 } 521 - ip = XFS_BHVTOI(bdp); 522 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) { 523 error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 524 0, 0, 0, NULL);

··· 509 vnode_t *vp = LINVFS_GET_VP(inode); 510 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); 511 int error = 0; 512 xfs_inode_t *ip; 513 514 if (vp->v_vfsp->vfs_flag & VFS_DMI) { 515 + ip = xfs_vtoi(vp); 516 + if (!ip) { 517 error = -EINVAL; 518 goto open_exec_out; 519 } 520 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) { 521 error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 522 0, 0, 0, NULL);

+3 -7

fs/xfs/linux-2.6/xfs_ioctl.c

··· 146 147 if (cmd != XFS_IOC_PATH_TO_FSHANDLE) { 148 xfs_inode_t *ip; 149 - bhv_desc_t *bhv; 150 int lock_mode; 151 152 /* need to get access to the xfs_inode to read the generation */ 153 - bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops); 154 - ASSERT(bhv); 155 - ip = XFS_BHVTOI(bhv); 156 ASSERT(ip); 157 lock_mode = xfs_ilock_map_shared(ip); 158 ··· 748 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 749 mp->m_rtdev_targp : mp->m_ddev_targp; 750 751 - da.d_mem = da.d_miniosz = 1 << target->pbr_sshift; 752 - /* The size dio will do in one go */ 753 - da.d_maxiosz = 64 * PAGE_CACHE_SIZE; 754 755 if (copy_to_user(arg, &da, sizeof(da))) 756 return -XFS_ERROR(EFAULT);

··· 146 147 if (cmd != XFS_IOC_PATH_TO_FSHANDLE) { 148 xfs_inode_t *ip; 149 int lock_mode; 150 151 /* need to get access to the xfs_inode to read the generation */ 152 + ip = xfs_vtoi(vp); 153 ASSERT(ip); 154 lock_mode = xfs_ilock_map_shared(ip); 155 ··· 751 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 752 mp->m_rtdev_targp : mp->m_ddev_targp; 753 754 + da.d_mem = da.d_miniosz = 1 << target->bt_sshift; 755 + da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); 756 757 if (copy_to_user(arg, &da, sizeof(da))) 758 return -XFS_ERROR(EFAULT);

+81 -40

fs/xfs/linux-2.6/xfs_iops.c

··· 54 #include <linux/capability.h> 55 #include <linux/xattr.h> 56 #include <linux/namei.h> 57 58 #define IS_NOATIME(inode) ((inode->i_sb->s_flags & MS_NOATIME) || \ 59 (S_ISDIR(inode->i_mode) && inode->i_sb->s_flags & MS_NODIRATIME)) 60 61 /* 62 * Change the requested timestamp in the given inode. ··· 111 { 112 struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip)); 113 timespec_t tv; 114 - 115 - /* 116 - * We're not supposed to change timestamps in readonly-mounted 117 - * filesystems. Throw it away if anyone asks us. 118 - */ 119 - if (unlikely(IS_RDONLY(inode))) 120 - return; 121 - 122 - /* 123 - * Don't update access timestamps on reads if mounted "noatime". 124 - * Throw it away if anyone asks us. 125 - */ 126 - if (unlikely( 127 - (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) && 128 - (flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) == 129 - XFS_ICHGTIME_ACC)) 130 - return; 131 132 nanotime(&tv); 133 if (flags & XFS_ICHGTIME_MOD) { ··· 148 * Variant on the above which avoids querying the system clock 149 * in situations where we know the Linux inode timestamps have 150 * just been updated (and so we can update our inode cheaply). 151 - * We also skip the readonly and noatime checks here, they are 152 - * also catered for already. 153 */ 154 void 155 xfs_ichgtime_fast( ··· 158 timespec_t *tvp; 159 160 /* 161 * We're not supposed to change timestamps in readonly-mounted 162 * filesystems. Throw it away if anyone asks us. 163 */ 164 if (unlikely(IS_RDONLY(inode))) 165 return; 166 167 - /* 168 - * Don't update access timestamps on reads if mounted "noatime". 169 - * Throw it away if anyone asks us. 170 - */ 171 - if (unlikely( 172 - (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) && 173 - ((flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) == 174 - XFS_ICHGTIME_ACC))) 175 - return; 176 - 177 if (flags & XFS_ICHGTIME_MOD) { 178 tvp = &inode->i_mtime; 179 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; 180 ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec; 181 - } 182 - if (flags & XFS_ICHGTIME_ACC) { 183 - tvp = &inode->i_atime; 184 - ip->i_d.di_atime.t_sec = (__int32_t)tvp->tv_sec; 185 - ip->i_d.di_atime.t_nsec = (__int32_t)tvp->tv_nsec; 186 } 187 if (flags & XFS_ICHGTIME_CHG) { 188 tvp = &inode->i_ctime; ··· 218 if (i_size_read(ip) != va.va_size) 219 i_size_write(ip, va.va_size); 220 } 221 } 222 223 /* ··· 318 break; 319 } 320 321 if (default_acl) { 322 if (!error) { 323 error = _ACL_INHERIT(vp, &va, default_acl); ··· 337 teardown.d_inode = ip = LINVFS_GET_IP(vp); 338 teardown.d_name = dentry->d_name; 339 340 - vn_mark_bad(vp); 341 - 342 if (S_ISDIR(mode)) 343 VOP_RMDIR(dvp, &teardown, NULL, err2); 344 else ··· 547 ASSERT(dentry); 548 ASSERT(nd); 549 550 - link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL); 551 if (!link) { 552 nd_set_link(nd, ERR_PTR(-ENOMEM)); 553 return NULL; ··· 563 vp = LINVFS_GET_VP(dentry->d_inode); 564 565 iov.iov_base = link; 566 - iov.iov_len = MAXNAMELEN; 567 568 uio->uio_iov = &iov; 569 uio->uio_offset = 0; 570 uio->uio_segflg = UIO_SYSSPACE; 571 - uio->uio_resid = MAXNAMELEN; 572 uio->uio_iovcnt = 1; 573 574 VOP_READLINK(vp, uio, 0, NULL, error); ··· 576 kfree(link); 577 link = ERR_PTR(-error); 578 } else { 579 - link[MAXNAMELEN - uio->uio_resid] = '\0'; 580 } 581 kfree(uio); 582

··· 54 #include <linux/capability.h> 55 #include <linux/xattr.h> 56 #include <linux/namei.h> 57 + #include <linux/security.h> 58 59 #define IS_NOATIME(inode) ((inode->i_sb->s_flags & MS_NOATIME) || \ 60 (S_ISDIR(inode->i_mode) && inode->i_sb->s_flags & MS_NODIRATIME)) 61 + 62 + /* 63 + * Get a XFS inode from a given vnode. 64 + */ 65 + xfs_inode_t * 66 + xfs_vtoi( 67 + struct vnode *vp) 68 + { 69 + bhv_desc_t *bdp; 70 + 71 + bdp = bhv_lookup_range(VN_BHV_HEAD(vp), 72 + VNODE_POSITION_XFS, VNODE_POSITION_XFS); 73 + if (unlikely(bdp == NULL)) 74 + return NULL; 75 + return XFS_BHVTOI(bdp); 76 + } 77 + 78 + /* 79 + * Bring the atime in the XFS inode uptodate. 80 + * Used before logging the inode to disk or when the Linux inode goes away. 81 + */ 82 + void 83 + xfs_synchronize_atime( 84 + xfs_inode_t *ip) 85 + { 86 + vnode_t *vp; 87 + 88 + vp = XFS_ITOV_NULL(ip); 89 + if (vp) { 90 + struct inode *inode = &vp->v_inode; 91 + ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; 92 + ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; 93 + } 94 + } 95 96 /* 97 * Change the requested timestamp in the given inode. ··· 76 { 77 struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip)); 78 timespec_t tv; 79 80 nanotime(&tv); 81 if (flags & XFS_ICHGTIME_MOD) { ··· 130 * Variant on the above which avoids querying the system clock 131 * in situations where we know the Linux inode timestamps have 132 * just been updated (and so we can update our inode cheaply). 133 */ 134 void 135 xfs_ichgtime_fast( ··· 142 timespec_t *tvp; 143 144 /* 145 + * Atime updates for read() & friends are handled lazily now, and 146 + * explicit updates must go through xfs_ichgtime() 147 + */ 148 + ASSERT((flags & XFS_ICHGTIME_ACC) == 0); 149 + 150 + /* 151 * We're not supposed to change timestamps in readonly-mounted 152 * filesystems. Throw it away if anyone asks us. 153 */ 154 if (unlikely(IS_RDONLY(inode))) 155 return; 156 157 if (flags & XFS_ICHGTIME_MOD) { 158 tvp = &inode->i_mtime; 159 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; 160 ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec; 161 } 162 if (flags & XFS_ICHGTIME_CHG) { 163 tvp = &inode->i_ctime; ··· 211 if (i_size_read(ip) != va.va_size) 212 i_size_write(ip, va.va_size); 213 } 214 + } 215 + 216 + /* 217 + * Hook in SELinux. This is not quite correct yet, what we really need 218 + * here (as we do for default ACLs) is a mechanism by which creation of 219 + * these attrs can be journalled at inode creation time (along with the 220 + * inode, of course, such that log replay can't cause these to be lost). 221 + */ 222 + STATIC int 223 + linvfs_init_security( 224 + struct vnode *vp, 225 + struct inode *dir) 226 + { 227 + struct inode *ip = LINVFS_GET_IP(vp); 228 + size_t length; 229 + void *value; 230 + char *name; 231 + int error; 232 + 233 + error = security_inode_init_security(ip, dir, &name, &value, &length); 234 + if (error) { 235 + if (error == -EOPNOTSUPP) 236 + return 0; 237 + return -error; 238 + } 239 + 240 + VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error); 241 + if (!error) 242 + VMODIFY(vp); 243 + 244 + kfree(name); 245 + kfree(value); 246 + return error; 247 } 248 249 /* ··· 278 break; 279 } 280 281 + if (!error) 282 + error = linvfs_init_security(vp, dir); 283 + 284 if (default_acl) { 285 if (!error) { 286 error = _ACL_INHERIT(vp, &va, default_acl); ··· 294 teardown.d_inode = ip = LINVFS_GET_IP(vp); 295 teardown.d_name = dentry->d_name; 296 297 if (S_ISDIR(mode)) 298 VOP_RMDIR(dvp, &teardown, NULL, err2); 299 else ··· 506 ASSERT(dentry); 507 ASSERT(nd); 508 509 + link = (char *)kmalloc(MAXPATHLEN+1, GFP_KERNEL); 510 if (!link) { 511 nd_set_link(nd, ERR_PTR(-ENOMEM)); 512 return NULL; ··· 522 vp = LINVFS_GET_VP(dentry->d_inode); 523 524 iov.iov_base = link; 525 + iov.iov_len = MAXPATHLEN; 526 527 uio->uio_iov = &iov; 528 uio->uio_offset = 0; 529 uio->uio_segflg = UIO_SYSSPACE; 530 + uio->uio_resid = MAXPATHLEN; 531 uio->uio_iovcnt = 1; 532 533 VOP_READLINK(vp, uio, 0, NULL, error); ··· 535 kfree(link); 536 link = ERR_PTR(-error); 537 } else { 538 + link[MAXPATHLEN - uio->uio_resid] = '\0'; 539 } 540 kfree(uio); 541

-5

fs/xfs/linux-2.6/xfs_iops.h

··· 26 extern struct file_operations linvfs_invis_file_operations; 27 extern struct file_operations linvfs_dir_operations; 28 29 - extern struct address_space_operations linvfs_aops; 30 - 31 - extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int); 32 - extern void linvfs_unwritten_done(struct buffer_head *, int); 33 - 34 extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *, 35 int, unsigned int, void __user *); 36

··· 26 extern struct file_operations linvfs_invis_file_operations; 27 extern struct file_operations linvfs_dir_operations; 28 29 extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *, 30 int, unsigned int, void __user *); 31

+1 -5

fs/xfs/linux-2.6/xfs_linux.h

··· 110 * delalloc and these ondisk-uninitialised buffers. 111 */ 112 BUFFER_FNS(PrivateStart, unwritten); 113 - static inline void set_buffer_unwritten_io(struct buffer_head *bh) 114 - { 115 - bh->b_end_io = linvfs_unwritten_done; 116 - } 117 118 #define restricted_chown xfs_params.restrict_chown.val 119 #define irix_sgid_inherit xfs_params.sgid_inherit.val ··· 228 #define xfs_itruncate_data(ip, off) \ 229 (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) 230 #define xfs_statvfs_fsid(statp, mp) \ 231 - ({ u64 id = huge_encode_dev((mp)->m_dev); \ 232 __kernel_fsid_t *fsid = &(statp)->f_fsid; \ 233 (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); }) 234

··· 110 * delalloc and these ondisk-uninitialised buffers. 111 */ 112 BUFFER_FNS(PrivateStart, unwritten); 113 114 #define restricted_chown xfs_params.restrict_chown.val 115 #define irix_sgid_inherit xfs_params.sgid_inherit.val ··· 232 #define xfs_itruncate_data(ip, off) \ 233 (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) 234 #define xfs_statvfs_fsid(statp, mp) \ 235 + ({ u64 id = huge_encode_dev((mp)->m_ddev_targp->bt_dev); \ 236 __kernel_fsid_t *fsid = &(statp)->f_fsid; \ 237 (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); }) 238

+19 -37

fs/xfs/linux-2.6/xfs_lrw.c

··· 233 xfs_buftarg_t *target = 234 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 235 mp->m_rtdev_targp : mp->m_ddev_targp; 236 - if ((*offset & target->pbr_smask) || 237 - (size & target->pbr_smask)) { 238 if (*offset == ip->i_d.di_size) { 239 return (0); 240 } ··· 280 XFS_STATS_ADD(xs_read_bytes, ret); 281 282 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 283 - 284 - if (likely(!(ioflags & IO_INVIS))) 285 - xfs_ichgtime_fast(ip, inode, XFS_ICHGTIME_ACC); 286 287 unlock_isem: 288 if (unlikely(ioflags & IO_ISDIRECT)) ··· 343 if (ret > 0) 344 XFS_STATS_ADD(xs_read_bytes, ret); 345 346 - if (likely(!(ioflags & IO_INVIS))) 347 - xfs_ichgtime_fast(ip, LINVFS_GET_IP(vp), XFS_ICHGTIME_ACC); 348 - 349 return ret; 350 } 351 ··· 356 xfs_zero_last_block( 357 struct inode *ip, 358 xfs_iocore_t *io, 359 - xfs_off_t offset, 360 xfs_fsize_t isize, 361 xfs_fsize_t end_size) 362 { ··· 364 int nimaps; 365 int zero_offset; 366 int zero_len; 367 - int isize_fsb_offset; 368 int error = 0; 369 xfs_bmbt_irec_t imap; 370 loff_t loff; 371 - size_t lsize; 372 373 ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); 374 - ASSERT(offset > isize); 375 376 mp = io->io_mount; 377 378 - isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize); 379 - if (isize_fsb_offset == 0) { 380 /* 381 * There are no extra bytes in the last block on disk to 382 * zero, so return. ··· 403 */ 404 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); 405 loff = XFS_FSB_TO_B(mp, last_fsb); 406 - lsize = XFS_FSB_TO_B(mp, 1); 407 408 - zero_offset = isize_fsb_offset; 409 - zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset; 410 411 error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size); 412 ··· 435 struct inode *ip = LINVFS_GET_IP(vp); 436 xfs_fileoff_t start_zero_fsb; 437 xfs_fileoff_t end_zero_fsb; 438 - xfs_fileoff_t prev_zero_fsb; 439 xfs_fileoff_t zero_count_fsb; 440 xfs_fileoff_t last_fsb; 441 xfs_extlen_t buf_len_fsb; 442 - xfs_extlen_t prev_zero_count; 443 xfs_mount_t *mp; 444 int nimaps; 445 int error = 0; 446 xfs_bmbt_irec_t imap; 447 - loff_t loff; 448 - size_t lsize; 449 450 ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); 451 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); 452 453 mp = io->io_mount; 454 ··· 453 * First handle zeroing the block on which isize resides. 454 * We only zero a part of that block so it is handled specially. 455 */ 456 - error = xfs_zero_last_block(ip, io, offset, isize, end_size); 457 if (error) { 458 ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); 459 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); ··· 481 } 482 483 ASSERT(start_zero_fsb <= end_zero_fsb); 484 - prev_zero_fsb = NULLFILEOFF; 485 - prev_zero_count = 0; 486 while (start_zero_fsb <= end_zero_fsb) { 487 nimaps = 1; 488 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; ··· 502 * that sits on a hole and sets the page as P_HOLE 503 * and calls remapf if it is a mapped file. 504 */ 505 - prev_zero_fsb = NULLFILEOFF; 506 - prev_zero_count = 0; 507 - start_zero_fsb = imap.br_startoff + 508 - imap.br_blockcount; 509 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 510 continue; 511 } ··· 523 */ 524 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 525 526 - loff = XFS_FSB_TO_B(mp, start_zero_fsb); 527 - lsize = XFS_FSB_TO_B(mp, buf_len_fsb); 528 - 529 - error = xfs_iozero(ip, loff, lsize, end_size); 530 531 if (error) { 532 goto out_lock; 533 } 534 535 - prev_zero_fsb = start_zero_fsb; 536 - prev_zero_count = buf_len_fsb; 537 start_zero_fsb = imap.br_startoff + buf_len_fsb; 538 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 539 ··· 618 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 619 mp->m_rtdev_targp : mp->m_ddev_targp; 620 621 - if ((pos & target->pbr_smask) || (count & target->pbr_smask)) 622 return XFS_ERROR(-EINVAL); 623 624 if (!VN_CACHED(vp) && pos < i_size_read(inode)) ··· 809 goto retry; 810 } 811 812 if (*offset > xip->i_d.di_size) { 813 xfs_ilock(xip, XFS_ILOCK_EXCL); 814 if (*offset > xip->i_d.di_size) { ··· 938 939 mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); 940 if (!XFS_FORCED_SHUTDOWN(mp)) { 941 - pagebuf_iorequest(bp); 942 return 0; 943 } else { 944 xfs_buftrace("XFS__BDSTRAT IOERROR", bp); ··· 991 * if (XFS_BUF_IS_GRIO(bp)) { 992 */ 993 994 - pagebuf_iorequest(bp); 995 return 0; 996 } 997

··· 233 xfs_buftarg_t *target = 234 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 235 mp->m_rtdev_targp : mp->m_ddev_targp; 236 + if ((*offset & target->bt_smask) || 237 + (size & target->bt_smask)) { 238 if (*offset == ip->i_d.di_size) { 239 return (0); 240 } ··· 280 XFS_STATS_ADD(xs_read_bytes, ret); 281 282 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 283 284 unlock_isem: 285 if (unlikely(ioflags & IO_ISDIRECT)) ··· 346 if (ret > 0) 347 XFS_STATS_ADD(xs_read_bytes, ret); 348 349 return ret; 350 } 351 ··· 362 xfs_zero_last_block( 363 struct inode *ip, 364 xfs_iocore_t *io, 365 xfs_fsize_t isize, 366 xfs_fsize_t end_size) 367 { ··· 371 int nimaps; 372 int zero_offset; 373 int zero_len; 374 int error = 0; 375 xfs_bmbt_irec_t imap; 376 loff_t loff; 377 378 ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); 379 380 mp = io->io_mount; 381 382 + zero_offset = XFS_B_FSB_OFFSET(mp, isize); 383 + if (zero_offset == 0) { 384 /* 385 * There are no extra bytes in the last block on disk to 386 * zero, so return. ··· 413 */ 414 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); 415 loff = XFS_FSB_TO_B(mp, last_fsb); 416 417 + zero_len = mp->m_sb.sb_blocksize - zero_offset; 418 419 error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size); 420 ··· 447 struct inode *ip = LINVFS_GET_IP(vp); 448 xfs_fileoff_t start_zero_fsb; 449 xfs_fileoff_t end_zero_fsb; 450 xfs_fileoff_t zero_count_fsb; 451 xfs_fileoff_t last_fsb; 452 xfs_extlen_t buf_len_fsb; 453 xfs_mount_t *mp; 454 int nimaps; 455 int error = 0; 456 xfs_bmbt_irec_t imap; 457 458 ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); 459 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); 460 + ASSERT(offset > isize); 461 462 mp = io->io_mount; 463 ··· 468 * First handle zeroing the block on which isize resides. 469 * We only zero a part of that block so it is handled specially. 470 */ 471 + error = xfs_zero_last_block(ip, io, isize, end_size); 472 if (error) { 473 ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); 474 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); ··· 496 } 497 498 ASSERT(start_zero_fsb <= end_zero_fsb); 499 while (start_zero_fsb <= end_zero_fsb) { 500 nimaps = 1; 501 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; ··· 519 * that sits on a hole and sets the page as P_HOLE 520 * and calls remapf if it is a mapped file. 521 */ 522 + start_zero_fsb = imap.br_startoff + imap.br_blockcount; 523 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 524 continue; 525 } ··· 543 */ 544 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 545 546 + error = xfs_iozero(ip, 547 + XFS_FSB_TO_B(mp, start_zero_fsb), 548 + XFS_FSB_TO_B(mp, buf_len_fsb), 549 + end_size); 550 551 if (error) { 552 goto out_lock; 553 } 554 555 start_zero_fsb = imap.br_startoff + buf_len_fsb; 556 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 557 ··· 640 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 641 mp->m_rtdev_targp : mp->m_ddev_targp; 642 643 + if ((pos & target->bt_smask) || (count & target->bt_smask)) 644 return XFS_ERROR(-EINVAL); 645 646 if (!VN_CACHED(vp) && pos < i_size_read(inode)) ··· 831 goto retry; 832 } 833 834 + isize = i_size_read(inode); 835 + if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) 836 + *offset = isize; 837 + 838 if (*offset > xip->i_d.di_size) { 839 xfs_ilock(xip, XFS_ILOCK_EXCL); 840 if (*offset > xip->i_d.di_size) { ··· 956 957 mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); 958 if (!XFS_FORCED_SHUTDOWN(mp)) { 959 + xfs_buf_iorequest(bp); 960 return 0; 961 } else { 962 xfs_buftrace("XFS__BDSTRAT IOERROR", bp); ··· 1009 * if (XFS_BUF_IS_GRIO(bp)) { 1010 */ 1011 1012 + xfs_buf_iorequest(bp); 1013 return 0; 1014 } 1015

+1 -1

fs/xfs/linux-2.6/xfs_stats.c

··· 34 __uint64_t xs_write_bytes = 0; 35 __uint64_t xs_read_bytes = 0; 36 37 - static struct xstats_entry { 38 char *desc; 39 int endpoint; 40 } xstats[] = {

··· 34 __uint64_t xs_write_bytes = 0; 35 __uint64_t xs_read_bytes = 0; 36 37 + static const struct xstats_entry { 38 char *desc; 39 int endpoint; 40 } xstats[] = {

+9 -9

fs/xfs/linux-2.6/xfs_stats.h

··· 109 __uint32_t vn_remove; /* # times vn_remove called */ 110 __uint32_t vn_free; /* # times vn_free called */ 111 #define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9) 112 - __uint32_t pb_get; 113 - __uint32_t pb_create; 114 - __uint32_t pb_get_locked; 115 - __uint32_t pb_get_locked_waited; 116 - __uint32_t pb_busy_locked; 117 - __uint32_t pb_miss_locked; 118 - __uint32_t pb_page_retries; 119 - __uint32_t pb_page_found; 120 - __uint32_t pb_get_read; 121 /* Extra precision counters */ 122 __uint64_t xs_xstrat_bytes; 123 __uint64_t xs_write_bytes;

··· 109 __uint32_t vn_remove; /* # times vn_remove called */ 110 __uint32_t vn_free; /* # times vn_free called */ 111 #define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9) 112 + __uint32_t xb_get; 113 + __uint32_t xb_create; 114 + __uint32_t xb_get_locked; 115 + __uint32_t xb_get_locked_waited; 116 + __uint32_t xb_busy_locked; 117 + __uint32_t xb_miss_locked; 118 + __uint32_t xb_page_retries; 119 + __uint32_t xb_page_found; 120 + __uint32_t xb_get_read; 121 /* Extra precision counters */ 122 __uint64_t xs_xstrat_bytes; 123 __uint64_t xs_write_bytes;

+11 -8

fs/xfs/linux-2.6/xfs_super.c

··· 306 xfs_fs_cmn_err(CE_NOTE, mp, 307 "Disabling barriers, not supported with external log device"); 308 mp->m_flags &= ~XFS_MOUNT_BARRIER; 309 } 310 311 - if (mp->m_ddev_targp->pbr_bdev->bd_disk->queue->ordered == 312 QUEUE_ORDERED_NONE) { 313 xfs_fs_cmn_err(CE_NOTE, mp, 314 "Disabling barriers, not supported by the underlying device"); 315 mp->m_flags &= ~XFS_MOUNT_BARRIER; 316 } 317 318 error = xfs_barrier_test(mp); ··· 322 xfs_fs_cmn_err(CE_NOTE, mp, 323 "Disabling barriers, trial barrier write failed"); 324 mp->m_flags &= ~XFS_MOUNT_BARRIER; 325 } 326 } 327 ··· 330 xfs_blkdev_issue_flush( 331 xfs_buftarg_t *buftarg) 332 { 333 - blkdev_issue_flush(buftarg->pbr_bdev, NULL); 334 } 335 336 STATIC struct inode * ··· 579 timeleft = schedule_timeout_interruptible(timeleft); 580 /* swsusp */ 581 try_to_freeze(); 582 - if (kthread_should_stop()) 583 break; 584 585 spin_lock(&vfsp->vfs_sync_lock); ··· 969 if (error < 0) 970 goto undo_zones; 971 972 - error = pagebuf_init(); 973 if (error < 0) 974 - goto undo_pagebuf; 975 976 vn_init(); 977 xfs_init(); ··· 985 return 0; 986 987 undo_register: 988 - pagebuf_terminate(); 989 990 - undo_pagebuf: 991 linvfs_destroy_zones(); 992 993 undo_zones: ··· 1001 XFS_DM_EXIT(&xfs_fs_type); 1002 unregister_filesystem(&xfs_fs_type); 1003 xfs_cleanup(); 1004 - pagebuf_terminate(); 1005 linvfs_destroy_zones(); 1006 ktrace_uninit(); 1007 }

··· 306 xfs_fs_cmn_err(CE_NOTE, mp, 307 "Disabling barriers, not supported with external log device"); 308 mp->m_flags &= ~XFS_MOUNT_BARRIER; 309 + return; 310 } 311 312 + if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered == 313 QUEUE_ORDERED_NONE) { 314 xfs_fs_cmn_err(CE_NOTE, mp, 315 "Disabling barriers, not supported by the underlying device"); 316 mp->m_flags &= ~XFS_MOUNT_BARRIER; 317 + return; 318 } 319 320 error = xfs_barrier_test(mp); ··· 320 xfs_fs_cmn_err(CE_NOTE, mp, 321 "Disabling barriers, trial barrier write failed"); 322 mp->m_flags &= ~XFS_MOUNT_BARRIER; 323 + return; 324 } 325 } 326 ··· 327 xfs_blkdev_issue_flush( 328 xfs_buftarg_t *buftarg) 329 { 330 + blkdev_issue_flush(buftarg->bt_bdev, NULL); 331 } 332 333 STATIC struct inode * ··· 576 timeleft = schedule_timeout_interruptible(timeleft); 577 /* swsusp */ 578 try_to_freeze(); 579 + if (kthread_should_stop() && list_empty(&vfsp->vfs_sync_list)) 580 break; 581 582 spin_lock(&vfsp->vfs_sync_lock); ··· 966 if (error < 0) 967 goto undo_zones; 968 969 + error = xfs_buf_init(); 970 if (error < 0) 971 + goto undo_buffers; 972 973 vn_init(); 974 xfs_init(); ··· 982 return 0; 983 984 undo_register: 985 + xfs_buf_terminate(); 986 987 + undo_buffers: 988 linvfs_destroy_zones(); 989 990 undo_zones: ··· 998 XFS_DM_EXIT(&xfs_fs_type); 999 unregister_filesystem(&xfs_fs_type); 1000 xfs_cleanup(); 1001 + xfs_buf_terminate(); 1002 linvfs_destroy_zones(); 1003 ktrace_uninit(); 1004 }

-1

fs/xfs/linux-2.6/xfs_vnode.c

··· 106 inode->i_blocks = vap->va_nblocks; 107 inode->i_mtime = vap->va_mtime; 108 inode->i_ctime = vap->va_ctime; 109 - inode->i_atime = vap->va_atime; 110 inode->i_blksize = vap->va_blocksize; 111 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) 112 inode->i_flags |= S_IMMUTABLE;

··· 106 inode->i_blocks = vap->va_nblocks; 107 inode->i_mtime = vap->va_mtime; 108 inode->i_ctime = vap->va_ctime; 109 inode->i_blksize = vap->va_blocksize; 110 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) 111 inode->i_flags |= S_IMMUTABLE;

+19

fs/xfs/linux-2.6/xfs_vnode.h

··· 566 } 567 568 /* 569 * Some useful predicates. 570 */ 571 #define VN_MAPPED(vp) mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)

··· 566 } 567 568 /* 569 + * Extracting atime values in various formats 570 + */ 571 + static inline void vn_atime_to_bstime(struct vnode *vp, xfs_bstime_t *bs_atime) 572 + { 573 + bs_atime->tv_sec = vp->v_inode.i_atime.tv_sec; 574 + bs_atime->tv_nsec = vp->v_inode.i_atime.tv_nsec; 575 + } 576 + 577 + static inline void vn_atime_to_timespec(struct vnode *vp, struct timespec *ts) 578 + { 579 + *ts = vp->v_inode.i_atime; 580 + } 581 + 582 + static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt) 583 + { 584 + *tt = vp->v_inode.i_atime.tv_sec; 585 + } 586 + 587 + /* 588 * Some useful predicates. 589 */ 590 #define VN_MAPPED(vp) mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)

+2 -2

fs/xfs/quota/xfs_dquot_item.c

··· 239 * trying to duplicate our effort. 240 */ 241 ASSERT(qip->qli_pushbuf_flag != 0); 242 - ASSERT(qip->qli_push_owner == get_thread_id()); 243 244 /* 245 * If flushlock isn't locked anymore, chances are that the ··· 333 qip->qli_pushbuf_flag = 1; 334 ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno); 335 #ifdef DEBUG 336 - qip->qli_push_owner = get_thread_id(); 337 #endif 338 /* 339 * The dquot is left locked.

··· 239 * trying to duplicate our effort. 240 */ 241 ASSERT(qip->qli_pushbuf_flag != 0); 242 + ASSERT(qip->qli_push_owner == current_pid()); 243 244 /* 245 * If flushlock isn't locked anymore, chances are that the ··· 333 qip->qli_pushbuf_flag = 1; 334 ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno); 335 #ifdef DEBUG 336 + qip->qli_push_owner = current_pid(); 337 #endif 338 /* 339 * The dquot is left locked.

+11 -7

fs/xfs/quota/xfs_qm.c

··· 1392 { 1393 xfs_trans_t *tp; 1394 int error; 1395 - unsigned long s; 1396 cred_t zerocr; 1397 int committed; 1398 1399 - tp = xfs_trans_alloc(mp,XFS_TRANS_QM_QINOCREATE); 1400 if ((error = xfs_trans_reserve(tp, 1401 XFS_QM_QINOCREATE_SPACE_RES(mp), 1402 XFS_CREATE_LOG_RES(mp), 0, ··· 1407 return (error); 1408 } 1409 memset(&zerocr, 0, sizeof(zerocr)); 1410 1411 - if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, S_IFREG, 1, 0, 1412 &zerocr, 0, 1, ip, &committed))) { 1413 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 1414 XFS_TRANS_ABORT); ··· 1920 * at this point (because we intentionally didn't in dqget_noattach). 1921 */ 1922 if (error) { 1923 - xfs_qm_dqpurge_all(mp, 1924 - XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA| 1925 - XFS_QMOPT_PQUOTA|XFS_QMOPT_QUOTAOFF); 1926 goto error_return; 1927 } 1928 /* ··· 2743 xfs_dqunlock(udqp); 2744 ASSERT(ip->i_udquot == NULL); 2745 ip->i_udquot = udqp; 2746 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); 2747 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); 2748 } ··· 2753 xfs_dqunlock(gdqp); 2754 ASSERT(ip->i_gdquot == NULL); 2755 ip->i_gdquot = gdqp; 2756 - ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); 2757 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); 2758 } 2759 }

··· 1392 { 1393 xfs_trans_t *tp; 1394 int error; 1395 + unsigned long s; 1396 cred_t zerocr; 1397 + xfs_inode_t zeroino; 1398 int committed; 1399 1400 + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); 1401 if ((error = xfs_trans_reserve(tp, 1402 XFS_QM_QINOCREATE_SPACE_RES(mp), 1403 XFS_CREATE_LOG_RES(mp), 0, ··· 1406 return (error); 1407 } 1408 memset(&zerocr, 0, sizeof(zerocr)); 1409 + memset(&zeroino, 0, sizeof(zeroino)); 1410 1411 + if ((error = xfs_dir_ialloc(&tp, &zeroino, S_IFREG, 1, 0, 1412 &zerocr, 0, 1, ip, &committed))) { 1413 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 1414 XFS_TRANS_ABORT); ··· 1918 * at this point (because we intentionally didn't in dqget_noattach). 1919 */ 1920 if (error) { 1921 + xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF); 1922 goto error_return; 1923 } 1924 /* ··· 2743 xfs_dqunlock(udqp); 2744 ASSERT(ip->i_udquot == NULL); 2745 ip->i_udquot = udqp; 2746 + ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp)); 2747 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); 2748 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); 2749 } ··· 2752 xfs_dqunlock(gdqp); 2753 ASSERT(ip->i_gdquot == NULL); 2754 ip->i_gdquot = gdqp; 2755 + ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp)); 2756 + ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ? 2757 + ip->i_d.di_gid : ip->i_d.di_projid) == 2758 + be32_to_cpu(gdqp->q_core.d_id)); 2759 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); 2760 } 2761 }

+25 -35

fs/xfs/support/debug.c

··· 27 /* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */ 28 #define XFS_MAX_ERR_LEVEL 7 29 #define XFS_ERR_MASK ((1 << 3) - 1) 30 - static char *err_level[XFS_MAX_ERR_LEVEL+1] = 31 {KERN_EMERG, KERN_ALERT, KERN_CRIT, 32 KERN_ERR, KERN_WARNING, KERN_NOTICE, 33 KERN_INFO, KERN_DEBUG}; 34 - 35 - void 36 - assfail(char *a, char *f, int l) 37 - { 38 - printk("XFS assertion failed: %s, file: %s, line: %d\n", a, f, l); 39 - BUG(); 40 - } 41 - 42 - #if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM)) 43 - 44 - unsigned long 45 - random(void) 46 - { 47 - static unsigned long RandomValue = 1; 48 - /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */ 49 - register long rv = RandomValue; 50 - register long lo; 51 - register long hi; 52 - 53 - hi = rv / 127773; 54 - lo = rv % 127773; 55 - rv = 16807 * lo - 2836 * hi; 56 - if( rv <= 0 ) rv += 2147483647; 57 - return( RandomValue = rv ); 58 - } 59 - 60 - int 61 - get_thread_id(void) 62 - { 63 - return current->pid; 64 - } 65 - 66 - #endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */ 67 68 void 69 cmn_err(register int level, char *fmt, ...) ··· 57 BUG(); 58 } 59 60 - 61 void 62 icmn_err(register int level, char *fmt, va_list ap) 63 { ··· 75 if (level == CE_PANIC) 76 BUG(); 77 }

··· 27 /* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */ 28 #define XFS_MAX_ERR_LEVEL 7 29 #define XFS_ERR_MASK ((1 << 3) - 1) 30 + static const char * const err_level[XFS_MAX_ERR_LEVEL+1] = 31 {KERN_EMERG, KERN_ALERT, KERN_CRIT, 32 KERN_ERR, KERN_WARNING, KERN_NOTICE, 33 KERN_INFO, KERN_DEBUG}; 34 35 void 36 cmn_err(register int level, char *fmt, ...) ··· 90 BUG(); 91 } 92 93 void 94 icmn_err(register int level, char *fmt, va_list ap) 95 { ··· 109 if (level == CE_PANIC) 110 BUG(); 111 } 112 + 113 + void 114 + assfail(char *expr, char *file, int line) 115 + { 116 + printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line); 117 + BUG(); 118 + } 119 + 120 + #if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM)) 121 + unsigned long random(void) 122 + { 123 + static unsigned long RandomValue = 1; 124 + /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */ 125 + register long rv = RandomValue; 126 + register long lo; 127 + register long hi; 128 + 129 + hi = rv / 127773; 130 + lo = rv % 127773; 131 + rv = 16807 * lo - 2836 * hi; 132 + if (rv <= 0) rv += 2147483647; 133 + return RandomValue = rv; 134 + } 135 + #endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */

+14 -15

fs/xfs/support/debug.h

··· 31 __attribute__ ((format (printf, 2, 0))); 32 extern void cmn_err(int, char *, ...) 33 __attribute__ ((format (printf, 2, 3))); 34 35 #ifndef STATIC 36 # define STATIC static 37 #endif 38 - 39 - #ifdef DEBUG 40 - # define ASSERT(EX) ((EX) ? ((void)0) : assfail(#EX, __FILE__, __LINE__)) 41 - #else 42 - # define ASSERT(x) ((void)0) 43 - #endif 44 - 45 - extern void assfail(char *, char *, int); 46 - #ifdef DEBUG 47 - extern unsigned long random(void); 48 - extern int get_thread_id(void); 49 - #endif 50 - 51 - #define ASSERT_ALWAYS(EX) ((EX)?((void)0):assfail(#EX, __FILE__, __LINE__)) 52 - #define debug_stop_all_cpus(param) /* param is "cpumask_t *" */ 53 54 #endif /* __XFS_SUPPORT_DEBUG_H__ */

··· 31 __attribute__ ((format (printf, 2, 0))); 32 extern void cmn_err(int, char *, ...) 33 __attribute__ ((format (printf, 2, 3))); 34 + extern void assfail(char *expr, char *f, int l); 35 + 36 + #define prdev(fmt,targ,args...) \ 37 + printk("Device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args) 38 + 39 + #define ASSERT_ALWAYS(expr) \ 40 + (unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) 41 + 42 + #ifndef DEBUG 43 + # define ASSERT(expr) ((void)0) 44 + #else 45 + # define ASSERT(expr) ASSERT_ALWAYS(expr) 46 + extern unsigned long random(void); 47 + #endif 48 49 #ifndef STATIC 50 # define STATIC static 51 #endif 52 53 #endif /* __XFS_SUPPORT_DEBUG_H__ */

+14 -9

fs/xfs/support/uuid.c

··· 27 mutex_init(&uuid_monitor); 28 } 29 30 /* 31 * uuid_getnodeuniq - obtain the node unique fields of a UUID. 32 * ··· 46 void 47 uuid_getnodeuniq(uuid_t *uuid, int fsid [2]) 48 { 49 - char *uu = (char *)uuid; 50 51 - /* on IRIX, this function assumes big-endian fields within 52 - * the uuid, so we use INT_GET to get the same result on 53 - * little-endian systems 54 - */ 55 - 56 - fsid[0] = (INT_GET(*(u_int16_t*)(uu+8), ARCH_CONVERT) << 16) + 57 - INT_GET(*(u_int16_t*)(uu+4), ARCH_CONVERT); 58 - fsid[1] = INT_GET(*(u_int32_t*)(uu ), ARCH_CONVERT); 59 } 60 61 void

··· 27 mutex_init(&uuid_monitor); 28 } 29 30 + 31 + /* IRIX interpretation of an uuid_t */ 32 + typedef struct { 33 + __be32 uu_timelow; 34 + __be16 uu_timemid; 35 + __be16 uu_timehi; 36 + __be16 uu_clockseq; 37 + __be16 uu_node[3]; 38 + } xfs_uu_t; 39 + 40 /* 41 * uuid_getnodeuniq - obtain the node unique fields of a UUID. 42 * ··· 36 void 37 uuid_getnodeuniq(uuid_t *uuid, int fsid [2]) 38 { 39 + xfs_uu_t *uup = (xfs_uu_t *)uuid; 40 41 + fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) | 42 + be16_to_cpu(uup->uu_timemid); 43 + fsid[1] = be16_to_cpu(uup->uu_timelow); 44 } 45 46 void

+19 -3

fs/xfs/xfs_arch.h

··· 40 #undef XFS_NATIVE_HOST 41 #endif 42 43 #endif /* __KERNEL__ */ 44 45 /* do we need conversion? */ ··· 202 */ 203 204 #define XFS_GET_DIR_INO4(di) \ 205 - (((u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) 206 207 #define XFS_PUT_DIR_INO4(from, di) \ 208 do { \ ··· 213 } while (0) 214 215 #define XFS_DI_HI(di) \ 216 - (((u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) 217 #define XFS_DI_LO(di) \ 218 - (((u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7])) 219 220 #define XFS_GET_DIR_INO8(di) \ 221 (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \

··· 40 #undef XFS_NATIVE_HOST 41 #endif 42 43 + #ifdef XFS_NATIVE_HOST 44 + #define cpu_to_be16(val) ((__be16)(val)) 45 + #define cpu_to_be32(val) ((__be32)(val)) 46 + #define cpu_to_be64(val) ((__be64)(val)) 47 + #define be16_to_cpu(val) ((__uint16_t)(val)) 48 + #define be32_to_cpu(val) ((__uint32_t)(val)) 49 + #define be64_to_cpu(val) ((__uint64_t)(val)) 50 + #else 51 + #define cpu_to_be16(val) (__swab16((__uint16_t)(val))) 52 + #define cpu_to_be32(val) (__swab32((__uint32_t)(val))) 53 + #define cpu_to_be64(val) (__swab64((__uint64_t)(val))) 54 + #define be16_to_cpu(val) (__swab16((__be16)(val))) 55 + #define be32_to_cpu(val) (__swab32((__be32)(val))) 56 + #define be64_to_cpu(val) (__swab64((__be64)(val))) 57 + #endif 58 + 59 #endif /* __KERNEL__ */ 60 61 /* do we need conversion? */ ··· 186 */ 187 188 #define XFS_GET_DIR_INO4(di) \ 189 + (((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) 190 191 #define XFS_PUT_DIR_INO4(from, di) \ 192 do { \ ··· 197 } while (0) 198 199 #define XFS_DI_HI(di) \ 200 + (((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) 201 #define XFS_DI_LO(di) \ 202 + (((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7])) 203 204 #define XFS_GET_DIR_INO8(di) \ 205 (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \

+6 -6

fs/xfs/xfs_attr_leaf.c

··· 128 return (offset >= minforkoff) ? minforkoff : 0; 129 } 130 131 - if (unlikely(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) { 132 if (bytes <= XFS_IFORK_ASIZE(dp)) 133 return mp->m_attroffset >> 3; 134 return 0; ··· 157 { 158 unsigned long s; 159 160 - if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR) && 161 !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) { 162 s = XFS_SB_LOCK(mp); 163 if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) { ··· 311 */ 312 totsize -= size; 313 if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname && 314 - !(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) { 315 /* 316 * Last attribute now removed, revert to original 317 * inode format making all literal area available ··· 330 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); 331 ASSERT(dp->i_d.di_forkoff); 332 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname || 333 - (mp->m_flags & XFS_MOUNT_COMPAT_ATTR)); 334 dp->i_afp->if_ext_max = 335 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); 336 dp->i_df.if_ext_max = ··· 739 + name_loc->namelen 740 + INT_GET(name_loc->valuelen, ARCH_CONVERT); 741 } 742 - if (!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR) && 743 (bytes == sizeof(struct xfs_attr_sf_hdr))) 744 return(-1); 745 return(xfs_attr_shortform_bytesfit(dp, bytes)); ··· 778 goto out; 779 780 if (forkoff == -1) { 781 - ASSERT(!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR)); 782 783 /* 784 * Last attribute was removed, revert to original

··· 128 return (offset >= minforkoff) ? minforkoff : 0; 129 } 130 131 + if (!(mp->m_flags & XFS_MOUNT_ATTR2)) { 132 if (bytes <= XFS_IFORK_ASIZE(dp)) 133 return mp->m_attroffset >> 3; 134 return 0; ··· 157 { 158 unsigned long s; 159 160 + if ((mp->m_flags & XFS_MOUNT_ATTR2) && 161 !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) { 162 s = XFS_SB_LOCK(mp); 163 if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) { ··· 311 */ 312 totsize -= size; 313 if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname && 314 + (mp->m_flags & XFS_MOUNT_ATTR2)) { 315 /* 316 * Last attribute now removed, revert to original 317 * inode format making all literal area available ··· 330 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); 331 ASSERT(dp->i_d.di_forkoff); 332 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname || 333 + !(mp->m_flags & XFS_MOUNT_ATTR2)); 334 dp->i_afp->if_ext_max = 335 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); 336 dp->i_df.if_ext_max = ··· 739 + name_loc->namelen 740 + INT_GET(name_loc->valuelen, ARCH_CONVERT); 741 } 742 + if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) && 743 (bytes == sizeof(struct xfs_attr_sf_hdr))) 744 return(-1); 745 return(xfs_attr_shortform_bytesfit(dp, bytes)); ··· 778 goto out; 779 780 if (forkoff == -1) { 781 + ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); 782 783 /* 784 * Last attribute was removed, revert to original

+43 -36

fs/xfs/xfs_attr_leaf.h

··· 63 * the leaf_entry. The namespaces are independent only because we also look 64 * at the namespace bit when we are looking for a matching attribute name. 65 * 66 - * We also store a "incomplete" bit in the leaf_entry. It shows that an 67 * attribute is in the middle of being created and should not be shown to 68 * the user if we crash during the time that the bit is set. We clear the 69 * bit when we have finished setting up the attribute. We do this because ··· 72 */ 73 #define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ 74 75 typedef struct xfs_attr_leafblock { 76 - struct xfs_attr_leaf_hdr { /* constant-structure header block */ 77 - xfs_da_blkinfo_t info; /* block type, links, etc. */ 78 - __uint16_t count; /* count of active leaf_entry's */ 79 - __uint16_t usedbytes; /* num bytes of names/values stored */ 80 - __uint16_t firstused; /* first used byte in name area */ 81 - __uint8_t holes; /* != 0 if blk needs compaction */ 82 - __uint8_t pad1; 83 - struct xfs_attr_leaf_map { /* RLE map of free bytes */ 84 - __uint16_t base; /* base of free region */ 85 - __uint16_t size; /* length of free region */ 86 - } freemap[XFS_ATTR_LEAF_MAPSIZE]; /* N largest free regions */ 87 - } hdr; 88 - struct xfs_attr_leaf_entry { /* sorted on key, not name */ 89 - xfs_dahash_t hashval; /* hash value of name */ 90 - __uint16_t nameidx; /* index into buffer of name/value */ 91 - __uint8_t flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */ 92 - __uint8_t pad2; /* unused pad byte */ 93 - } entries[1]; /* variable sized array */ 94 - struct xfs_attr_leaf_name_local { 95 - __uint16_t valuelen; /* number of bytes in value */ 96 - __uint8_t namelen; /* length of name bytes */ 97 - __uint8_t nameval[1]; /* name/value bytes */ 98 - } namelist; /* grows from bottom of buf */ 99 - struct xfs_attr_leaf_name_remote { 100 - xfs_dablk_t valueblk; /* block number of value bytes */ 101 - __uint32_t valuelen; /* number of bytes in value */ 102 - __uint8_t namelen; /* length of name bytes */ 103 - __uint8_t name[1]; /* name bytes */ 104 - } valuelist; /* grows from bottom of buf */ 105 } xfs_attr_leafblock_t; 106 - typedef struct xfs_attr_leaf_hdr xfs_attr_leaf_hdr_t; 107 - typedef struct xfs_attr_leaf_map xfs_attr_leaf_map_t; 108 - typedef struct xfs_attr_leaf_entry xfs_attr_leaf_entry_t; 109 - typedef struct xfs_attr_leaf_name_local xfs_attr_leaf_name_local_t; 110 - typedef struct xfs_attr_leaf_name_remote xfs_attr_leaf_name_remote_t; 111 112 /* 113 * Flags used in the leaf_entry[i].flags field. ··· 156 (leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)]; 157 } 158 159 - #define XFS_ATTR_LEAF_NAME(leafp,idx) xfs_attr_leaf_name(leafp,idx) 160 static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx) 161 { 162 return (&((char *)

··· 63 * the leaf_entry. The namespaces are independent only because we also look 64 * at the namespace bit when we are looking for a matching attribute name. 65 * 66 + * We also store an "incomplete" bit in the leaf_entry. It shows that an 67 * attribute is in the middle of being created and should not be shown to 68 * the user if we crash during the time that the bit is set. We clear the 69 * bit when we have finished setting up the attribute. We do this because ··· 72 */ 73 #define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ 74 75 + typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */ 76 + __uint16_t base; /* base of free region */ 77 + __uint16_t size; /* length of free region */ 78 + } xfs_attr_leaf_map_t; 79 + 80 + typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */ 81 + xfs_da_blkinfo_t info; /* block type, links, etc. */ 82 + __uint16_t count; /* count of active leaf_entry's */ 83 + __uint16_t usedbytes; /* num bytes of names/values stored */ 84 + __uint16_t firstused; /* first used byte in name area */ 85 + __uint8_t holes; /* != 0 if blk needs compaction */ 86 + __uint8_t pad1; 87 + xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE]; 88 + /* N largest free regions */ 89 + } xfs_attr_leaf_hdr_t; 90 + 91 + typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */ 92 + xfs_dahash_t hashval; /* hash value of name */ 93 + __uint16_t nameidx; /* index into buffer of name/value */ 94 + __uint8_t flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */ 95 + __uint8_t pad2; /* unused pad byte */ 96 + } xfs_attr_leaf_entry_t; 97 + 98 + typedef struct xfs_attr_leaf_name_local { 99 + __uint16_t valuelen; /* number of bytes in value */ 100 + __uint8_t namelen; /* length of name bytes */ 101 + __uint8_t nameval[1]; /* name/value bytes */ 102 + } xfs_attr_leaf_name_local_t; 103 + 104 + typedef struct xfs_attr_leaf_name_remote { 105 + xfs_dablk_t valueblk; /* block number of value bytes */ 106 + __uint32_t valuelen; /* number of bytes in value */ 107 + __uint8_t namelen; /* length of name bytes */ 108 + __uint8_t name[1]; /* name bytes */ 109 + } xfs_attr_leaf_name_remote_t; 110 + 111 typedef struct xfs_attr_leafblock { 112 + xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */ 113 + xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */ 114 + xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */ 115 + xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */ 116 } xfs_attr_leafblock_t; 117 118 /* 119 * Flags used in the leaf_entry[i].flags field. ··· 150 (leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)]; 151 } 152 153 + #define XFS_ATTR_LEAF_NAME(leafp,idx) \ 154 + xfs_attr_leaf_name(leafp,idx) 155 static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx) 156 { 157 return (&((char *)

+250 -160

fs/xfs/xfs_bmap.c

··· 2146 return 0; /* keep gcc quite */ 2147 } 2148 2149 #define XFS_ALLOC_GAP_UNITS 4 2150 2151 /* 2152 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. 2153 * It figures out where to ask the underlying allocator to put the new extent. 2154 */ 2155 - STATIC int /* error */ 2156 xfs_bmap_alloc( 2157 xfs_bmalloca_t *ap) /* bmap alloc argument struct */ 2158 { ··· 2326 xfs_mount_t *mp; /* mount point structure */ 2327 int nullfb; /* true if ap->firstblock isn't set */ 2328 int rt; /* true if inode is realtime */ 2329 - #ifdef __KERNEL__ 2330 - xfs_extlen_t prod=0; /* product factor for allocators */ 2331 - xfs_extlen_t ralen=0; /* realtime allocation length */ 2332 - #endif 2333 2334 #define ISVALID(x,y) \ 2335 (rt ? \ ··· 2345 nullfb = ap->firstblock == NULLFSBLOCK; 2346 rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata; 2347 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); 2348 - #ifdef __KERNEL__ 2349 if (rt) { 2350 - xfs_extlen_t extsz; /* file extent size for rt */ 2351 - xfs_fileoff_t nexto; /* next file offset */ 2352 - xfs_extlen_t orig_alen; /* original ap->alen */ 2353 - xfs_fileoff_t orig_end; /* original off+len */ 2354 - xfs_fileoff_t orig_off; /* original ap->off */ 2355 - xfs_extlen_t mod_off; /* modulus calculations */ 2356 - xfs_fileoff_t prevo; /* previous file offset */ 2357 - xfs_rtblock_t rtx; /* realtime extent number */ 2358 - xfs_extlen_t temp; /* temp for rt calculations */ 2359 2360 - /* 2361 - * Set prod to match the realtime extent size. 2362 - */ 2363 - if (!(extsz = ap->ip->i_d.di_extsize)) 2364 - extsz = mp->m_sb.sb_rextsize; 2365 - prod = extsz / mp->m_sb.sb_rextsize; 2366 - orig_off = ap->off; 2367 - orig_alen = ap->alen; 2368 - orig_end = orig_off + orig_alen; 2369 - /* 2370 - * If the file offset is unaligned vs. the extent size 2371 - * we need to align it. This will be possible unless 2372 - * the file was previously written with a kernel that didn't 2373 - * perform this alignment. 2374 - */ 2375 - mod_off = do_mod(orig_off, extsz); 2376 - if (mod_off) { 2377 - ap->alen += mod_off; 2378 - ap->off -= mod_off; 2379 - } 2380 - /* 2381 - * Same adjustment for the end of the requested area. 2382 - */ 2383 - if ((temp = (ap->alen % extsz))) 2384 - ap->alen += extsz - temp; 2385 - /* 2386 - * If the previous block overlaps with this proposed allocation 2387 - * then move the start forward without adjusting the length. 2388 - */ 2389 - prevo = 2390 - ap->prevp->br_startoff == NULLFILEOFF ? 2391 - 0 : 2392 - (ap->prevp->br_startoff + 2393 - ap->prevp->br_blockcount); 2394 - if (ap->off != orig_off && ap->off < prevo) 2395 - ap->off = prevo; 2396 - /* 2397 - * If the next block overlaps with this proposed allocation 2398 - * then move the start back without adjusting the length, 2399 - * but not before offset 0. 2400 - * This may of course make the start overlap previous block, 2401 - * and if we hit the offset 0 limit then the next block 2402 - * can still overlap too. 2403 - */ 2404 - nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ? 2405 - NULLFILEOFF : ap->gotp->br_startoff; 2406 - if (!ap->eof && 2407 - ap->off + ap->alen != orig_end && 2408 - ap->off + ap->alen > nexto) 2409 - ap->off = nexto > ap->alen ? nexto - ap->alen : 0; 2410 - /* 2411 - * If we're now overlapping the next or previous extent that 2412 - * means we can't fit an extsz piece in this hole. Just move 2413 - * the start forward to the first valid spot and set 2414 - * the length so we hit the end. 2415 - */ 2416 - if ((ap->off != orig_off && ap->off < prevo) || 2417 - (ap->off + ap->alen != orig_end && 2418 - ap->off + ap->alen > nexto)) { 2419 - ap->off = prevo; 2420 - ap->alen = nexto - prevo; 2421 - } 2422 - /* 2423 - * If the result isn't a multiple of rtextents we need to 2424 - * remove blocks until it is. 2425 - */ 2426 - if ((temp = (ap->alen % mp->m_sb.sb_rextsize))) { 2427 - /* 2428 - * We're not covering the original request, or 2429 - * we won't be able to once we fix the length. 2430 - */ 2431 - if (orig_off < ap->off || 2432 - orig_end > ap->off + ap->alen || 2433 - ap->alen - temp < orig_alen) 2434 - return XFS_ERROR(EINVAL); 2435 - /* 2436 - * Try to fix it by moving the start up. 2437 - */ 2438 - if (ap->off + temp <= orig_off) { 2439 - ap->alen -= temp; 2440 - ap->off += temp; 2441 - } 2442 - /* 2443 - * Try to fix it by moving the end in. 2444 - */ 2445 - else if (ap->off + ap->alen - temp >= orig_end) 2446 - ap->alen -= temp; 2447 - /* 2448 - * Set the start to the minimum then trim the length. 2449 - */ 2450 - else { 2451 - ap->alen -= orig_off - ap->off; 2452 - ap->off = orig_off; 2453 - ap->alen -= ap->alen % mp->m_sb.sb_rextsize; 2454 - } 2455 - /* 2456 - * Result doesn't cover the request, fail it. 2457 - */ 2458 - if (orig_off < ap->off || orig_end > ap->off + ap->alen) 2459 - return XFS_ERROR(EINVAL); 2460 - } 2461 ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0); 2462 /* 2463 * If the offset & length are not perfectly aligned 2464 * then kill prod, it will just get us in trouble. 2465 */ 2466 - if (do_mod(ap->off, extsz) || ap->alen % extsz) 2467 prod = 1; 2468 /* 2469 * Set ralen to be the actual requested length in rtextents. ··· 2389 ap->rval = rtx * mp->m_sb.sb_rextsize; 2390 } else 2391 ap->rval = 0; 2392 } 2393 - #else 2394 - if (rt) 2395 - ap->rval = 0; 2396 - #endif /* __KERNEL__ */ 2397 - else if (nullfb) 2398 - ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino); 2399 - else 2400 - ap->rval = ap->firstblock; 2401 /* 2402 * If allocating at eof, and there's a previous real block, 2403 * try to use it's last block as our starting point. ··· 2670 args.total = ap->total; 2671 args.minlen = ap->minlen; 2672 } 2673 - if (ap->ip->i_d.di_extsize) { 2674 args.prod = ap->ip->i_d.di_extsize; 2675 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) 2676 args.mod = (xfs_extlen_t)(args.prod - args.mod); 2677 - } else if (mp->m_sb.sb_blocksize >= NBPP) { 2678 args.prod = 1; 2679 args.mod = 0; 2680 } else { ··· 3653 3654 ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp, 3655 lastxp, gotp, prevp); 3656 - rt = ip->i_d.di_flags & XFS_DIFLAG_REALTIME; 3657 - if(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM)) { 3658 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld " 3659 "start_block : %llx start_off : %llx blkcnt : %llx " 3660 "extent-state : %x \n", 3661 - (ip->i_mount)->m_fsname,(long long)ip->i_ino, 3662 - gotp->br_startblock, gotp->br_startoff, 3663 - gotp->br_blockcount,gotp->br_state); 3664 } 3665 return ep; 3666 } ··· 3950 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); 3951 if (!ip->i_d.di_forkoff) 3952 ip->i_d.di_forkoff = mp->m_attroffset >> 3; 3953 - else if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) 3954 version = 2; 3955 break; 3956 default: ··· 4098 */ 4099 if (whichfork == XFS_DATA_FORK) { 4100 maxleafents = MAXEXTNUM; 4101 - sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ? 4102 - mp->m_attroffset : XFS_BMDR_SPACE_CALC(MINDBTPTRS); 4103 } else { 4104 maxleafents = MAXAEXTNUM; 4105 - sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ? 4106 - mp->m_sb.sb_inodesize - mp->m_attroffset : 4107 - XFS_BMDR_SPACE_CALC(MINABTPTRS); 4108 } 4109 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); 4110 minleafrecs = mp->m_bmap_dmnr[0]; ··· 4493 num_recs = be16_to_cpu(block->bb_numrecs); 4494 if (unlikely(i + num_recs > room)) { 4495 ASSERT(i + num_recs <= room); 4496 - xfs_fs_cmn_err(CE_WARN, ip->i_mount, 4497 - "corrupt dinode %Lu, (btree extents). Unmount and run xfs_repair.", 4498 (unsigned long long) ip->i_ino); 4499 XFS_ERROR_REPORT("xfs_bmap_read_extents(1)", 4500 XFS_ERRLEVEL_LOW, ··· 4665 char contig; /* allocation must be one extent */ 4666 char delay; /* this request is for delayed alloc */ 4667 char exact; /* don't do all of wasdelayed extent */ 4668 xfs_bmbt_rec_t *ep; /* extent list entry pointer */ 4669 int error; /* error return */ 4670 xfs_bmbt_irec_t got; /* current extent list record */ ··· 4719 } 4720 if (XFS_FORCED_SHUTDOWN(mp)) 4721 return XFS_ERROR(EIO); 4722 - rt = XFS_IS_REALTIME_INODE(ip); 4723 ifp = XFS_IFORK_PTR(ip, whichfork); 4724 ASSERT(ifp->if_ext_max == 4725 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); ··· 4730 delay = (flags & XFS_BMAPI_DELAY) != 0; 4731 trim = (flags & XFS_BMAPI_ENTIRE) == 0; 4732 userdata = (flags & XFS_BMAPI_METADATA) == 0; 4733 exact = (flags & XFS_BMAPI_EXACT) != 0; 4734 rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0; 4735 contig = (flags & XFS_BMAPI_CONTIG) != 0; ··· 4825 } 4826 minlen = contig ? alen : 1; 4827 if (delay) { 4828 - xfs_extlen_t extsz = 0; 4829 4830 /* Figure out the extent size, adjust alen */ 4831 if (rt) { 4832 if (!(extsz = ip->i_d.di_extsize)) 4833 extsz = mp->m_sb.sb_rextsize; 4834 - alen = roundup(alen, extsz); 4835 - extsz = alen / mp->m_sb.sb_rextsize; 4836 } 4837 4838 /* 4839 * Make a transaction-less quota reservation for ··· 4872 xfs_bmap_worst_indlen(ip, alen); 4873 ASSERT(indlen > 0); 4874 4875 - if (rt) 4876 error = xfs_mod_incore_sb(mp, 4877 XFS_SBS_FREXTENTS, 4878 -(extsz), rsvd); 4879 - else 4880 error = xfs_mod_incore_sb(mp, 4881 XFS_SBS_FDBLOCKS, 4882 -(alen), rsvd); 4883 if (!error) { 4884 error = xfs_mod_incore_sb(mp, 4885 XFS_SBS_FDBLOCKS, 4886 -(indlen), rsvd); 4887 - if (error && rt) { 4888 - xfs_mod_incore_sb(ip->i_mount, 4889 XFS_SBS_FREXTENTS, 4890 extsz, rsvd); 4891 - } else if (error) { 4892 - xfs_mod_incore_sb(ip->i_mount, 4893 XFS_SBS_FDBLOCKS, 4894 alen, rsvd); 4895 - } 4896 } 4897 4898 if (error) { 4899 - if (XFS_IS_QUOTA_ON(ip->i_mount)) 4900 /* unreserve the blocks now */ 4901 XFS_TRANS_UNRESERVE_QUOTA_NBLKS( 4902 mp, NULL, ip, 4903 (long)alen, 0, rt ? ··· 4937 bma.firstblock = *firstblock; 4938 bma.alen = alen; 4939 bma.off = aoff; 4940 bma.wasdel = wasdelay; 4941 bma.minlen = minlen; 4942 bma.low = flist->xbf_low; ··· 5359 return 0; 5360 } 5361 XFS_STATS_INC(xs_blk_unmap); 5362 - isrt = (whichfork == XFS_DATA_FORK) && 5363 - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME); 5364 start = bno; 5365 bno = start + len - 1; 5366 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, ··· 5531 } 5532 if (wasdel) { 5533 ASSERT(STARTBLOCKVAL(del.br_startblock) > 0); 5534 - /* Update realtim/data freespace, unreserve quota */ 5535 if (isrt) { 5536 xfs_filblks_t rtexts; 5537 ··· 5539 do_div(rtexts, mp->m_sb.sb_rextsize); 5540 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, 5541 (int)rtexts, rsvd); 5542 - XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip, 5543 - -((long)del.br_blockcount), 0, 5544 XFS_QMOPT_RES_RTBLKS); 5545 } else { 5546 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, 5547 (int)del.br_blockcount, rsvd); 5548 - XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip, 5549 - -((long)del.br_blockcount), 0, 5550 XFS_QMOPT_RES_REGBLKS); 5551 } 5552 ip->i_delayed_blks -= del.br_blockcount; ··· 5740 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 5741 return XFS_ERROR(EINVAL); 5742 if (whichfork == XFS_DATA_FORK) { 5743 - if (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC) { 5744 prealloced = 1; 5745 fixlen = XFS_MAXIOFFSET(mp); 5746 } else {

··· 2146 return 0; /* keep gcc quite */ 2147 } 2148 2149 + /* 2150 + * Adjust the size of the new extent based on di_extsize and rt extsize. 2151 + */ 2152 + STATIC int 2153 + xfs_bmap_extsize_align( 2154 + xfs_mount_t *mp, 2155 + xfs_bmbt_irec_t *gotp, /* next extent pointer */ 2156 + xfs_bmbt_irec_t *prevp, /* previous extent pointer */ 2157 + xfs_extlen_t extsz, /* align to this extent size */ 2158 + int rt, /* is this a realtime inode? */ 2159 + int eof, /* is extent at end-of-file? */ 2160 + int delay, /* creating delalloc extent? */ 2161 + int convert, /* overwriting unwritten extent? */ 2162 + xfs_fileoff_t *offp, /* in/out: aligned offset */ 2163 + xfs_extlen_t *lenp) /* in/out: aligned length */ 2164 + { 2165 + xfs_fileoff_t orig_off; /* original offset */ 2166 + xfs_extlen_t orig_alen; /* original length */ 2167 + xfs_fileoff_t orig_end; /* original off+len */ 2168 + xfs_fileoff_t nexto; /* next file offset */ 2169 + xfs_fileoff_t prevo; /* previous file offset */ 2170 + xfs_fileoff_t align_off; /* temp for offset */ 2171 + xfs_extlen_t align_alen; /* temp for length */ 2172 + xfs_extlen_t temp; /* temp for calculations */ 2173 + 2174 + if (convert) 2175 + return 0; 2176 + 2177 + orig_off = align_off = *offp; 2178 + orig_alen = align_alen = *lenp; 2179 + orig_end = orig_off + orig_alen; 2180 + 2181 + /* 2182 + * If this request overlaps an existing extent, then don't 2183 + * attempt to perform any additional alignment. 2184 + */ 2185 + if (!delay && !eof && 2186 + (orig_off >= gotp->br_startoff) && 2187 + (orig_end <= gotp->br_startoff + gotp->br_blockcount)) { 2188 + return 0; 2189 + } 2190 + 2191 + /* 2192 + * If the file offset is unaligned vs. the extent size 2193 + * we need to align it. This will be possible unless 2194 + * the file was previously written with a kernel that didn't 2195 + * perform this alignment, or if a truncate shot us in the 2196 + * foot. 2197 + */ 2198 + temp = do_mod(orig_off, extsz); 2199 + if (temp) { 2200 + align_alen += temp; 2201 + align_off -= temp; 2202 + } 2203 + /* 2204 + * Same adjustment for the end of the requested area. 2205 + */ 2206 + if ((temp = (align_alen % extsz))) { 2207 + align_alen += extsz - temp; 2208 + } 2209 + /* 2210 + * If the previous block overlaps with this proposed allocation 2211 + * then move the start forward without adjusting the length. 2212 + */ 2213 + if (prevp->br_startoff != NULLFILEOFF) { 2214 + if (prevp->br_startblock == HOLESTARTBLOCK) 2215 + prevo = prevp->br_startoff; 2216 + else 2217 + prevo = prevp->br_startoff + prevp->br_blockcount; 2218 + } else 2219 + prevo = 0; 2220 + if (align_off != orig_off && align_off < prevo) 2221 + align_off = prevo; 2222 + /* 2223 + * If the next block overlaps with this proposed allocation 2224 + * then move the start back without adjusting the length, 2225 + * but not before offset 0. 2226 + * This may of course make the start overlap previous block, 2227 + * and if we hit the offset 0 limit then the next block 2228 + * can still overlap too. 2229 + */ 2230 + if (!eof && gotp->br_startoff != NULLFILEOFF) { 2231 + if ((delay && gotp->br_startblock == HOLESTARTBLOCK) || 2232 + (!delay && gotp->br_startblock == DELAYSTARTBLOCK)) 2233 + nexto = gotp->br_startoff + gotp->br_blockcount; 2234 + else 2235 + nexto = gotp->br_startoff; 2236 + } else 2237 + nexto = NULLFILEOFF; 2238 + if (!eof && 2239 + align_off + align_alen != orig_end && 2240 + align_off + align_alen > nexto) 2241 + align_off = nexto > align_alen ? nexto - align_alen : 0; 2242 + /* 2243 + * If we're now overlapping the next or previous extent that 2244 + * means we can't fit an extsz piece in this hole. Just move 2245 + * the start forward to the first valid spot and set 2246 + * the length so we hit the end. 2247 + */ 2248 + if (align_off != orig_off && align_off < prevo) 2249 + align_off = prevo; 2250 + if (align_off + align_alen != orig_end && 2251 + align_off + align_alen > nexto && 2252 + nexto != NULLFILEOFF) { 2253 + ASSERT(nexto > prevo); 2254 + align_alen = nexto - align_off; 2255 + } 2256 + 2257 + /* 2258 + * If realtime, and the result isn't a multiple of the realtime 2259 + * extent size we need to remove blocks until it is. 2260 + */ 2261 + if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) { 2262 + /* 2263 + * We're not covering the original request, or 2264 + * we won't be able to once we fix the length. 2265 + */ 2266 + if (orig_off < align_off || 2267 + orig_end > align_off + align_alen || 2268 + align_alen - temp < orig_alen) 2269 + return XFS_ERROR(EINVAL); 2270 + /* 2271 + * Try to fix it by moving the start up. 2272 + */ 2273 + if (align_off + temp <= orig_off) { 2274 + align_alen -= temp; 2275 + align_off += temp; 2276 + } 2277 + /* 2278 + * Try to fix it by moving the end in. 2279 + */ 2280 + else if (align_off + align_alen - temp >= orig_end) 2281 + align_alen -= temp; 2282 + /* 2283 + * Set the start to the minimum then trim the length. 2284 + */ 2285 + else { 2286 + align_alen -= orig_off - align_off; 2287 + align_off = orig_off; 2288 + align_alen -= align_alen % mp->m_sb.sb_rextsize; 2289 + } 2290 + /* 2291 + * Result doesn't cover the request, fail it. 2292 + */ 2293 + if (orig_off < align_off || orig_end > align_off + align_alen) 2294 + return XFS_ERROR(EINVAL); 2295 + } else { 2296 + ASSERT(orig_off >= align_off); 2297 + ASSERT(orig_end <= align_off + align_alen); 2298 + } 2299 + 2300 + #ifdef DEBUG 2301 + if (!eof && gotp->br_startoff != NULLFILEOFF) 2302 + ASSERT(align_off + align_alen <= gotp->br_startoff); 2303 + if (prevp->br_startoff != NULLFILEOFF) 2304 + ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount); 2305 + #endif 2306 + 2307 + *lenp = align_alen; 2308 + *offp = align_off; 2309 + return 0; 2310 + } 2311 + 2312 #define XFS_ALLOC_GAP_UNITS 4 2313 2314 /* 2315 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. 2316 * It figures out where to ask the underlying allocator to put the new extent. 2317 */ 2318 + STATIC int 2319 xfs_bmap_alloc( 2320 xfs_bmalloca_t *ap) /* bmap alloc argument struct */ 2321 { ··· 2163 xfs_mount_t *mp; /* mount point structure */ 2164 int nullfb; /* true if ap->firstblock isn't set */ 2165 int rt; /* true if inode is realtime */ 2166 + xfs_extlen_t prod = 0; /* product factor for allocators */ 2167 + xfs_extlen_t ralen = 0; /* realtime allocation length */ 2168 + xfs_extlen_t align; /* minimum allocation alignment */ 2169 + xfs_rtblock_t rtx; 2170 2171 #define ISVALID(x,y) \ 2172 (rt ? \ ··· 2182 nullfb = ap->firstblock == NULLFSBLOCK; 2183 rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata; 2184 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); 2185 if (rt) { 2186 + align = ap->ip->i_d.di_extsize ? 2187 + ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize; 2188 + /* Set prod to match the extent size */ 2189 + prod = align / mp->m_sb.sb_rextsize; 2190 2191 + error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp, 2192 + align, rt, ap->eof, 0, 2193 + ap->conv, &ap->off, &ap->alen); 2194 + if (error) 2195 + return error; 2196 + ASSERT(ap->alen); 2197 ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0); 2198 + 2199 /* 2200 * If the offset & length are not perfectly aligned 2201 * then kill prod, it will just get us in trouble. 2202 */ 2203 + if (do_mod(ap->off, align) || ap->alen % align) 2204 prod = 1; 2205 /* 2206 * Set ralen to be the actual requested length in rtextents. ··· 2326 ap->rval = rtx * mp->m_sb.sb_rextsize; 2327 } else 2328 ap->rval = 0; 2329 + } else { 2330 + align = (ap->userdata && ap->ip->i_d.di_extsize && 2331 + (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ? 2332 + ap->ip->i_d.di_extsize : 0; 2333 + if (unlikely(align)) { 2334 + error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp, 2335 + align, rt, 2336 + ap->eof, 0, ap->conv, 2337 + &ap->off, &ap->alen); 2338 + ASSERT(!error); 2339 + ASSERT(ap->alen); 2340 + } 2341 + if (nullfb) 2342 + ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino); 2343 + else 2344 + ap->rval = ap->firstblock; 2345 } 2346 + 2347 /* 2348 * If allocating at eof, and there's a previous real block, 2349 * try to use it's last block as our starting point. ··· 2598 args.total = ap->total; 2599 args.minlen = ap->minlen; 2600 } 2601 + if (unlikely(ap->userdata && ap->ip->i_d.di_extsize && 2602 + (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) { 2603 args.prod = ap->ip->i_d.di_extsize; 2604 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) 2605 args.mod = (xfs_extlen_t)(args.prod - args.mod); 2606 + } else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) { 2607 args.prod = 1; 2608 args.mod = 0; 2609 } else { ··· 3580 3581 ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp, 3582 lastxp, gotp, prevp); 3583 + rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 3584 + if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) { 3585 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld " 3586 "start_block : %llx start_off : %llx blkcnt : %llx " 3587 "extent-state : %x \n", 3588 + (ip->i_mount)->m_fsname, (long long)ip->i_ino, 3589 + (unsigned long long)gotp->br_startblock, 3590 + (unsigned long long)gotp->br_startoff, 3591 + (unsigned long long)gotp->br_blockcount, 3592 + gotp->br_state); 3593 } 3594 return ep; 3595 } ··· 3875 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); 3876 if (!ip->i_d.di_forkoff) 3877 ip->i_d.di_forkoff = mp->m_attroffset >> 3; 3878 + else if (mp->m_flags & XFS_MOUNT_ATTR2) 3879 version = 2; 3880 break; 3881 default: ··· 4023 */ 4024 if (whichfork == XFS_DATA_FORK) { 4025 maxleafents = MAXEXTNUM; 4026 + sz = (mp->m_flags & XFS_MOUNT_ATTR2) ? 4027 + XFS_BMDR_SPACE_CALC(MINDBTPTRS) : mp->m_attroffset; 4028 } else { 4029 maxleafents = MAXAEXTNUM; 4030 + sz = (mp->m_flags & XFS_MOUNT_ATTR2) ? 4031 + XFS_BMDR_SPACE_CALC(MINABTPTRS) : 4032 + mp->m_sb.sb_inodesize - mp->m_attroffset; 4033 } 4034 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); 4035 minleafrecs = mp->m_bmap_dmnr[0]; ··· 4418 num_recs = be16_to_cpu(block->bb_numrecs); 4419 if (unlikely(i + num_recs > room)) { 4420 ASSERT(i + num_recs <= room); 4421 + xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 4422 + "corrupt dinode %Lu, (btree extents).", 4423 (unsigned long long) ip->i_ino); 4424 XFS_ERROR_REPORT("xfs_bmap_read_extents(1)", 4425 XFS_ERRLEVEL_LOW, ··· 4590 char contig; /* allocation must be one extent */ 4591 char delay; /* this request is for delayed alloc */ 4592 char exact; /* don't do all of wasdelayed extent */ 4593 + char convert; /* unwritten extent I/O completion */ 4594 xfs_bmbt_rec_t *ep; /* extent list entry pointer */ 4595 int error; /* error return */ 4596 xfs_bmbt_irec_t got; /* current extent list record */ ··· 4643 } 4644 if (XFS_FORCED_SHUTDOWN(mp)) 4645 return XFS_ERROR(EIO); 4646 + rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 4647 ifp = XFS_IFORK_PTR(ip, whichfork); 4648 ASSERT(ifp->if_ext_max == 4649 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); ··· 4654 delay = (flags & XFS_BMAPI_DELAY) != 0; 4655 trim = (flags & XFS_BMAPI_ENTIRE) == 0; 4656 userdata = (flags & XFS_BMAPI_METADATA) == 0; 4657 + convert = (flags & XFS_BMAPI_CONVERT) != 0; 4658 exact = (flags & XFS_BMAPI_EXACT) != 0; 4659 rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0; 4660 contig = (flags & XFS_BMAPI_CONTIG) != 0; ··· 4748 } 4749 minlen = contig ? alen : 1; 4750 if (delay) { 4751 + xfs_extlen_t extsz; 4752 4753 /* Figure out the extent size, adjust alen */ 4754 if (rt) { 4755 if (!(extsz = ip->i_d.di_extsize)) 4756 extsz = mp->m_sb.sb_rextsize; 4757 + } else { 4758 + extsz = ip->i_d.di_extsize; 4759 } 4760 + if (extsz) { 4761 + error = xfs_bmap_extsize_align(mp, 4762 + &got, &prev, extsz, 4763 + rt, eof, delay, convert, 4764 + &aoff, &alen); 4765 + ASSERT(!error); 4766 + } 4767 + 4768 + if (rt) 4769 + extsz = alen / mp->m_sb.sb_rextsize; 4770 4771 /* 4772 * Make a transaction-less quota reservation for ··· 4785 xfs_bmap_worst_indlen(ip, alen); 4786 ASSERT(indlen > 0); 4787 4788 + if (rt) { 4789 error = xfs_mod_incore_sb(mp, 4790 XFS_SBS_FREXTENTS, 4791 -(extsz), rsvd); 4792 + } else { 4793 error = xfs_mod_incore_sb(mp, 4794 XFS_SBS_FDBLOCKS, 4795 -(alen), rsvd); 4796 + } 4797 if (!error) { 4798 error = xfs_mod_incore_sb(mp, 4799 XFS_SBS_FDBLOCKS, 4800 -(indlen), rsvd); 4801 + if (error && rt) 4802 + xfs_mod_incore_sb(mp, 4803 XFS_SBS_FREXTENTS, 4804 extsz, rsvd); 4805 + else if (error) 4806 + xfs_mod_incore_sb(mp, 4807 XFS_SBS_FDBLOCKS, 4808 alen, rsvd); 4809 } 4810 4811 if (error) { 4812 + if (XFS_IS_QUOTA_ON(mp)) 4813 /* unreserve the blocks now */ 4814 + (void) 4815 XFS_TRANS_UNRESERVE_QUOTA_NBLKS( 4816 mp, NULL, ip, 4817 (long)alen, 0, rt ? ··· 4849 bma.firstblock = *firstblock; 4850 bma.alen = alen; 4851 bma.off = aoff; 4852 + bma.conv = convert; 4853 bma.wasdel = wasdelay; 4854 bma.minlen = minlen; 4855 bma.low = flist->xbf_low; ··· 5270 return 0; 5271 } 5272 XFS_STATS_INC(xs_blk_unmap); 5273 + isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 5274 start = bno; 5275 bno = start + len - 1; 5276 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, ··· 5443 } 5444 if (wasdel) { 5445 ASSERT(STARTBLOCKVAL(del.br_startblock) > 0); 5446 + /* Update realtime/data freespace, unreserve quota */ 5447 if (isrt) { 5448 xfs_filblks_t rtexts; 5449 ··· 5451 do_div(rtexts, mp->m_sb.sb_rextsize); 5452 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, 5453 (int)rtexts, rsvd); 5454 + (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, 5455 + NULL, ip, -((long)del.br_blockcount), 0, 5456 XFS_QMOPT_RES_RTBLKS); 5457 } else { 5458 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, 5459 (int)del.br_blockcount, rsvd); 5460 + (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, 5461 + NULL, ip, -((long)del.br_blockcount), 0, 5462 XFS_QMOPT_RES_REGBLKS); 5463 } 5464 ip->i_delayed_blks -= del.br_blockcount; ··· 5652 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 5653 return XFS_ERROR(EINVAL); 5654 if (whichfork == XFS_DATA_FORK) { 5655 + if ((ip->i_d.di_extsize && (ip->i_d.di_flags & 5656 + (XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) || 5657 + ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ 5658 prealloced = 1; 5659 fixlen = XFS_MAXIOFFSET(mp); 5660 } else {

+6 -1

fs/xfs/xfs_bmap.h

··· 62 #define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */ 63 /* combine contig. space */ 64 #define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */ 65 66 #define XFS_BMAPI_AFLAG(w) xfs_bmapi_aflag(w) 67 static inline int xfs_bmapi_aflag(int w) ··· 105 char wasdel; /* replacing a delayed allocation */ 106 char userdata;/* set if is user data */ 107 char low; /* low on space, using seq'l ags */ 108 - char aeof; /* allocated space at eof */ 109 } xfs_bmalloca_t; 110 111 #ifdef __KERNEL__

··· 62 #define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */ 63 /* combine contig. space */ 64 #define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */ 65 + /* XFS_BMAPI_DIRECT_IO 0x800 */ 66 + #define XFS_BMAPI_CONVERT 0x1000 /* unwritten extent conversion - */ 67 + /* need write cache flushing and no */ 68 + /* additional allocation alignments */ 69 70 #define XFS_BMAPI_AFLAG(w) xfs_bmapi_aflag(w) 71 static inline int xfs_bmapi_aflag(int w) ··· 101 char wasdel; /* replacing a delayed allocation */ 102 char userdata;/* set if is user data */ 103 char low; /* low on space, using seq'l ags */ 104 + char aeof; /* allocated space at eof */ 105 + char conv; /* overwriting unwritten extents */ 106 } xfs_bmalloca_t; 107 108 #ifdef __KERNEL__

+1 -1

fs/xfs/xfs_clnt.h

··· 57 /* 58 * XFS mount option flags -- args->flags1 59 */ 60 - #define XFSMNT_COMPAT_ATTR 0x00000001 /* do not use ATTR2 format */ 61 #define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount 62 * compatible */ 63 #define XFSMNT_INO64 0x00000004 /* move inode numbers up

··· 57 /* 58 * XFS mount option flags -- args->flags1 59 */ 60 + #define XFSMNT_ATTR2 0x00000001 /* allow ATTR2 EA format */ 61 #define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount 62 * compatible */ 63 #define XFSMNT_INO64 0x00000004 /* move inode numbers up

+4 -12

fs/xfs/xfs_dfrag.c

··· 60 xfs_bstat_t *sbp; 61 struct file *fp = NULL, *tfp = NULL; 62 vnode_t *vp, *tvp; 63 - bhv_desc_t *bdp, *tbdp; 64 - vn_bhv_head_t *bhp, *tbhp; 65 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; 66 int ilf_fields, tilf_fields; 67 int error = 0; ··· 88 goto error0; 89 } 90 91 - bhp = VN_BHV_HEAD(vp); 92 - bdp = vn_bhv_lookup(bhp, &xfs_vnodeops); 93 - if (bdp == NULL) { 94 error = XFS_ERROR(EBADF); 95 goto error0; 96 - } else { 97 - ip = XFS_BHVTOI(bdp); 98 } 99 100 if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) || ··· 100 goto error0; 101 } 102 103 - tbhp = VN_BHV_HEAD(tvp); 104 - tbdp = vn_bhv_lookup(tbhp, &xfs_vnodeops); 105 - if (tbdp == NULL) { 106 error = XFS_ERROR(EBADF); 107 goto error0; 108 - } else { 109 - tip = XFS_BHVTOI(tbdp); 110 } 111 112 if (ip->i_mount != tip->i_mount) {

··· 60 xfs_bstat_t *sbp; 61 struct file *fp = NULL, *tfp = NULL; 62 vnode_t *vp, *tvp; 63 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; 64 int ilf_fields, tilf_fields; 65 int error = 0; ··· 90 goto error0; 91 } 92 93 + ip = xfs_vtoi(vp); 94 + if (ip == NULL) { 95 error = XFS_ERROR(EBADF); 96 goto error0; 97 } 98 99 if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) || ··· 105 goto error0; 106 } 107 108 + tip = xfs_vtoi(tvp); 109 + if (tip == NULL) { 110 error = XFS_ERROR(EBADF); 111 goto error0; 112 } 113 114 if (ip->i_mount != tip->i_mount) {

+17 -5

fs/xfs/xfs_dinode.h

··· 199 200 #define XFS_DFORK_DSIZE(dip,mp) \ 201 XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp) 202 #define XFS_DFORK_ASIZE(dip,mp) \ 203 XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp) 204 #define XFS_DFORK_SIZE(dip,mp,w) \ 205 XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w) 206 207 #define XFS_DFORK_Q(dip) XFS_CFORK_Q_DISK(&(dip)->di_core) 208 #define XFS_DFORK_BOFF(dip) XFS_CFORK_BOFF_DISK(&(dip)->di_core) ··· 222 #define XFS_CFORK_FMT_SET(dcp,w,n) \ 223 ((w) == XFS_DATA_FORK ? \ 224 ((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n))) 225 226 #define XFS_CFORK_NEXTENTS_DISK(dcp,w) \ 227 ((w) == XFS_DATA_FORK ? \ ··· 230 INT_GET((dcp)->di_anextents, ARCH_CONVERT)) 231 #define XFS_CFORK_NEXTENTS(dcp,w) \ 232 ((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents) 233 234 #define XFS_CFORK_NEXT_SET(dcp,w,n) \ 235 ((w) == XFS_DATA_FORK ? \ 236 ((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n))) 237 - 238 - #define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w) 239 240 #define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) 241 ··· 253 #define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */ 254 #define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */ 255 #define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */ 256 - #define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */ 257 - #define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */ 258 #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) 259 #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) 260 #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) ··· 268 #define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT) 269 #define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT) 270 #define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT) 271 272 #define XFS_DIFLAG_ANY \ 273 (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ 274 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ 275 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ 276 - XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS) 277 278 #endif /* __XFS_DINODE_H__ */

··· 199 200 #define XFS_DFORK_DSIZE(dip,mp) \ 201 XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp) 202 + #define XFS_DFORK_DSIZE_HOST(dip,mp) \ 203 + XFS_CFORK_DSIZE(&(dip)->di_core, mp) 204 #define XFS_DFORK_ASIZE(dip,mp) \ 205 XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp) 206 + #define XFS_DFORK_ASIZE_HOST(dip,mp) \ 207 + XFS_CFORK_ASIZE(&(dip)->di_core, mp) 208 #define XFS_DFORK_SIZE(dip,mp,w) \ 209 XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w) 210 + #define XFS_DFORK_SIZE_HOST(dip,mp,w) \ 211 + XFS_CFORK_SIZE(&(dip)->di_core, mp, w) 212 213 #define XFS_DFORK_Q(dip) XFS_CFORK_Q_DISK(&(dip)->di_core) 214 #define XFS_DFORK_BOFF(dip) XFS_CFORK_BOFF_DISK(&(dip)->di_core) ··· 216 #define XFS_CFORK_FMT_SET(dcp,w,n) \ 217 ((w) == XFS_DATA_FORK ? \ 218 ((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n))) 219 + #define XFS_DFORK_FORMAT(dip,w) XFS_CFORK_FORMAT(&(dip)->di_core, w) 220 221 #define XFS_CFORK_NEXTENTS_DISK(dcp,w) \ 222 ((w) == XFS_DATA_FORK ? \ ··· 223 INT_GET((dcp)->di_anextents, ARCH_CONVERT)) 224 #define XFS_CFORK_NEXTENTS(dcp,w) \ 225 ((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents) 226 + #define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w) 227 + #define XFS_DFORK_NEXTENTS_HOST(dip,w) XFS_CFORK_NEXTENTS(&(dip)->di_core, w) 228 229 #define XFS_CFORK_NEXT_SET(dcp,w,n) \ 230 ((w) == XFS_DATA_FORK ? \ 231 ((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n))) 232 233 #define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) 234 ··· 246 #define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */ 247 #define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */ 248 #define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */ 249 + #define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */ 250 + #define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */ 251 + #define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */ 252 + #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ 253 #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) 254 #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) 255 #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) ··· 259 #define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT) 260 #define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT) 261 #define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT) 262 + #define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT) 263 + #define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT) 264 265 #define XFS_DIFLAG_ANY \ 266 (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ 267 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ 268 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ 269 + XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \ 270 + XFS_DIFLAG_EXTSZINHERIT) 271 272 #endif /* __XFS_DINODE_H__ */

+1 -1

fs/xfs/xfs_dir.c

··· 176 uint shortcount, leafcount, count; 177 178 mp->m_dirversion = 1; 179 - if (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) { 180 shortcount = (mp->m_attroffset - 181 (uint)sizeof(xfs_dir_sf_hdr_t)) / 182 (uint)sizeof(xfs_dir_sf_entry_t);

··· 176 uint shortcount, leafcount, count; 177 178 mp->m_dirversion = 1; 179 + if (!(mp->m_flags & XFS_MOUNT_ATTR2)) { 180 shortcount = (mp->m_attroffset - 181 (uint)sizeof(xfs_dir_sf_hdr_t)) / 182 (uint)sizeof(xfs_dir_sf_entry_t);

+2

fs/xfs/xfs_dir.h

··· 135 ((mp)->m_dirops.xd_shortform_to_single(args)) 136 137 #define XFS_DIR_IS_V1(mp) ((mp)->m_dirversion == 1) 138 extern xfs_dirops_t xfsv1_dirops; 139 140 #endif /* __XFS_DIR_H__ */

··· 135 ((mp)->m_dirops.xd_shortform_to_single(args)) 136 137 #define XFS_DIR_IS_V1(mp) ((mp)->m_dirversion == 1) 138 + #define XFS_DIR_IS_V2(mp) ((mp)->m_dirversion == 2) 139 extern xfs_dirops_t xfsv1_dirops; 140 + extern xfs_dirops_t xfsv2_dirops; 141 142 #endif /* __XFS_DIR_H__ */

-3

fs/xfs/xfs_dir2.h

··· 72 struct uio *uio; /* uio control structure */ 73 } xfs_dir2_put_args_t; 74 75 - #define XFS_DIR_IS_V2(mp) ((mp)->m_dirversion == 2) 76 - extern xfs_dirops_t xfsv2_dirops; 77 - 78 /* 79 * Other interfaces used by the rest of the dir v2 code. 80 */

··· 72 struct uio *uio; /* uio control structure */ 73 } xfs_dir2_put_args_t; 74 75 /* 76 * Other interfaces used by the rest of the dir v2 code. 77 */

+34 -30

fs/xfs/xfs_dir_leaf.h

··· 67 */ 68 #define XFS_DIR_LEAF_MAPSIZE 3 /* how many freespace slots */ 69 70 typedef struct xfs_dir_leafblock { 71 - struct xfs_dir_leaf_hdr { /* constant-structure header block */ 72 - xfs_da_blkinfo_t info; /* block type, links, etc. */ 73 - __uint16_t count; /* count of active leaf_entry's */ 74 - __uint16_t namebytes; /* num bytes of name strings stored */ 75 - __uint16_t firstused; /* first used byte in name area */ 76 - __uint8_t holes; /* != 0 if blk needs compaction */ 77 - __uint8_t pad1; 78 - struct xfs_dir_leaf_map {/* RLE map of free bytes */ 79 - __uint16_t base; /* base of free region */ 80 - __uint16_t size; /* run length of free region */ 81 - } freemap[XFS_DIR_LEAF_MAPSIZE]; /* N largest free regions */ 82 - } hdr; 83 - struct xfs_dir_leaf_entry { /* sorted on key, not name */ 84 - xfs_dahash_t hashval; /* hash value of name */ 85 - __uint16_t nameidx; /* index into buffer of name */ 86 - __uint8_t namelen; /* length of name string */ 87 - __uint8_t pad2; 88 - } entries[1]; /* var sized array */ 89 - struct xfs_dir_leaf_name { 90 - xfs_dir_ino_t inumber; /* inode number for this key */ 91 - __uint8_t name[1]; /* name string itself */ 92 - } namelist[1]; /* grows from bottom of buf */ 93 } xfs_dir_leafblock_t; 94 - typedef struct xfs_dir_leaf_hdr xfs_dir_leaf_hdr_t; 95 - typedef struct xfs_dir_leaf_map xfs_dir_leaf_map_t; 96 - typedef struct xfs_dir_leaf_entry xfs_dir_leaf_entry_t; 97 - typedef struct xfs_dir_leaf_name xfs_dir_leaf_name_t; 98 99 /* 100 * Length of name for which a 512-byte block filesystem ··· 130 #define XFS_PUT_COOKIE(c,mp,bno,entry,hash) \ 131 ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash)) 132 133 - typedef struct xfs_dir_put_args 134 - { 135 xfs_dircook_t cook; /* cookie of (next) entry */ 136 xfs_intino_t ino; /* inode number */ 137 - struct xfs_dirent *dbp; /* buffer pointer */ 138 char *name; /* directory entry name */ 139 int namelen; /* length of name */ 140 int done; /* output: set if value was stored */ ··· 141 struct uio *uio; /* uio control structure */ 142 } xfs_dir_put_args_t; 143 144 - #define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) xfs_dir_leaf_entsize_byname(len) 145 static inline int xfs_dir_leaf_entsize_byname(int len) 146 { 147 return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len;

··· 67 */ 68 #define XFS_DIR_LEAF_MAPSIZE 3 /* how many freespace slots */ 69 70 + typedef struct xfs_dir_leaf_map { /* RLE map of free bytes */ 71 + __uint16_t base; /* base of free region */ 72 + __uint16_t size; /* run length of free region */ 73 + } xfs_dir_leaf_map_t; 74 + 75 + typedef struct xfs_dir_leaf_hdr { /* constant-structure header block */ 76 + xfs_da_blkinfo_t info; /* block type, links, etc. */ 77 + __uint16_t count; /* count of active leaf_entry's */ 78 + __uint16_t namebytes; /* num bytes of name strings stored */ 79 + __uint16_t firstused; /* first used byte in name area */ 80 + __uint8_t holes; /* != 0 if blk needs compaction */ 81 + __uint8_t pad1; 82 + xfs_dir_leaf_map_t freemap[XFS_DIR_LEAF_MAPSIZE]; 83 + } xfs_dir_leaf_hdr_t; 84 + 85 + typedef struct xfs_dir_leaf_entry { /* sorted on key, not name */ 86 + xfs_dahash_t hashval; /* hash value of name */ 87 + __uint16_t nameidx; /* index into buffer of name */ 88 + __uint8_t namelen; /* length of name string */ 89 + __uint8_t pad2; 90 + } xfs_dir_leaf_entry_t; 91 + 92 + typedef struct xfs_dir_leaf_name { 93 + xfs_dir_ino_t inumber; /* inode number for this key */ 94 + __uint8_t name[1]; /* name string itself */ 95 + } xfs_dir_leaf_name_t; 96 + 97 typedef struct xfs_dir_leafblock { 98 + xfs_dir_leaf_hdr_t hdr; /* constant-structure header block */ 99 + xfs_dir_leaf_entry_t entries[1]; /* var sized array */ 100 + xfs_dir_leaf_name_t namelist[1]; /* grows from bottom of buf */ 101 } xfs_dir_leafblock_t; 102 103 /* 104 * Length of name for which a 512-byte block filesystem ··· 126 #define XFS_PUT_COOKIE(c,mp,bno,entry,hash) \ 127 ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash)) 128 129 + typedef struct xfs_dir_put_args { 130 xfs_dircook_t cook; /* cookie of (next) entry */ 131 xfs_intino_t ino; /* inode number */ 132 + struct xfs_dirent *dbp; /* buffer pointer */ 133 char *name; /* directory entry name */ 134 int namelen; /* length of name */ 135 int done; /* output: set if value was stored */ ··· 138 struct uio *uio; /* uio control structure */ 139 } xfs_dir_put_args_t; 140 141 + #define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) \ 142 + xfs_dir_leaf_entsize_byname(len) 143 static inline int xfs_dir_leaf_entsize_byname(int len) 144 { 145 return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len;

-1

fs/xfs/xfs_error.c

··· 54 if (e != xfs_etrap[i]) 55 continue; 56 cmn_err(CE_NOTE, "xfs_error_trap: error %d", e); 57 - debug_stop_all_cpus((void *)-1LL); 58 BUG(); 59 break; 60 }

··· 54 if (e != xfs_etrap[i]) 55 continue; 56 cmn_err(CE_NOTE, "xfs_error_trap: error %d", e); 57 BUG(); 58 break; 59 }

+4 -4

fs/xfs/xfs_error.h

··· 18 #ifndef __XFS_ERROR_H__ 19 #define __XFS_ERROR_H__ 20 21 - #define prdev(fmt,targ,args...) \ 22 - printk("XFS: device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args) 23 - 24 #define XFS_ERECOVER 1 /* Failure to recover log */ 25 #define XFS_ELOGSTAT 2 /* Failure to stat log in user space */ 26 #define XFS_ENOLOGSPACE 3 /* Reservation too large */ ··· 179 struct xfs_mount; 180 /* PRINTFLIKE4 */ 181 extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, 182 - char *fmt, ...); 183 /* PRINTFLIKE3 */ 184 extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...); 185 186 #endif /* __XFS_ERROR_H__ */

··· 18 #ifndef __XFS_ERROR_H__ 19 #define __XFS_ERROR_H__ 20 21 #define XFS_ERECOVER 1 /* Failure to recover log */ 22 #define XFS_ELOGSTAT 2 /* Failure to stat log in user space */ 23 #define XFS_ENOLOGSPACE 3 /* Reservation too large */ ··· 182 struct xfs_mount; 183 /* PRINTFLIKE4 */ 184 extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, 185 + char *fmt, ...); 186 /* PRINTFLIKE3 */ 187 extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...); 188 + 189 + #define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \ 190 + xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args) 191 192 #endif /* __XFS_ERROR_H__ */

+6 -4

fs/xfs/xfs_fs.h

··· 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 - * modify it under the terms of the GNU General Public License as 7 - * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 - * GNU General Public License for more details. 13 * 14 - * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ ··· 65 #define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ 66 #define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ 67 #define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ 68 #define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ 69 70 /*

··· 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 + * modify it under the terms of the GNU Lesser General Public License 7 + * as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU Lesser General Public License for more details. 13 * 14 + * You should have received a copy of the GNU Lesser General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ ··· 65 #define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ 66 #define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ 67 #define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ 68 + #define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ 69 + #define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ 70 #define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ 71 72 /*

+26

fs/xfs/xfs_fsops.c

··· 540 return(0); 541 } 542 543 int 544 xfs_fs_goingdown( 545 xfs_mount_t *mp,

··· 540 return(0); 541 } 542 543 + void 544 + xfs_fs_log_dummy(xfs_mount_t *mp) 545 + { 546 + xfs_trans_t *tp; 547 + xfs_inode_t *ip; 548 + 549 + 550 + tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); 551 + atomic_inc(&mp->m_active_trans); 552 + if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) { 553 + xfs_trans_cancel(tp, 0); 554 + return; 555 + } 556 + 557 + ip = mp->m_rootip; 558 + xfs_ilock(ip, XFS_ILOCK_EXCL); 559 + 560 + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 561 + xfs_trans_ihold(tp, ip); 562 + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 563 + xfs_trans_set_sync(tp); 564 + xfs_trans_commit(tp, 0, NULL); 565 + 566 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 567 + } 568 + 569 int 570 xfs_fs_goingdown( 571 xfs_mount_t *mp,

+1

fs/xfs/xfs_fsops.h

··· 25 extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, 26 xfs_fsop_resblks_t *outval); 27 extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); 28 29 #endif /* __XFS_FSOPS_H__ */

··· 25 extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, 26 xfs_fsop_resblks_t *outval); 27 extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); 28 + extern void xfs_fs_log_dummy(xfs_mount_t *mp); 29 30 #endif /* __XFS_FSOPS_H__ */

+1 -4

fs/xfs/xfs_iget.c

··· 493 494 retry: 495 if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { 496 - bhv_desc_t *bdp; 497 xfs_inode_t *ip; 498 499 vp = LINVFS_GET_VP(inode); ··· 516 * to wait for the inode to go away. 517 */ 518 if (is_bad_inode(inode) || 519 - ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), 520 - &xfs_vnodeops)) == NULL)) { 521 iput(inode); 522 delay(1); 523 goto retry; 524 } 525 526 - ip = XFS_BHVTOI(bdp); 527 if (lock_flags != 0) 528 xfs_ilock(ip, lock_flags); 529 XFS_STATS_INC(xs_ig_found);

··· 493 494 retry: 495 if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { 496 xfs_inode_t *ip; 497 498 vp = LINVFS_GET_VP(inode); ··· 517 * to wait for the inode to go away. 518 */ 519 if (is_bad_inode(inode) || 520 + ((ip = xfs_vtoi(vp)) == NULL)) { 521 iput(inode); 522 delay(1); 523 goto retry; 524 } 525 526 if (lock_flags != 0) 527 xfs_ilock(ip, lock_flags); 528 XFS_STATS_INC(xs_ig_found);

+38 -23

fs/xfs/xfs_inode.c

··· 404 INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + 405 INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) > 406 INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) { 407 - xfs_fs_cmn_err(CE_WARN, ip->i_mount, 408 - "corrupt dinode %Lu, extent total = %d, nblocks = %Lu." 409 - " Unmount and run xfs_repair.", 410 (unsigned long long)ip->i_ino, 411 (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) 412 + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)), ··· 417 } 418 419 if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) { 420 - xfs_fs_cmn_err(CE_WARN, ip->i_mount, 421 - "corrupt dinode %Lu, forkoff = 0x%x." 422 - " Unmount and run xfs_repair.", 423 (unsigned long long)ip->i_ino, 424 (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT))); 425 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, ··· 449 * no local regular files yet 450 */ 451 if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) { 452 - xfs_fs_cmn_err(CE_WARN, ip->i_mount, 453 - "corrupt inode (local format for regular file) %Lu. Unmount and run xfs_repair.", 454 (unsigned long long) ip->i_ino); 455 XFS_CORRUPTION_ERROR("xfs_iformat(4)", 456 XFS_ERRLEVEL_LOW, ··· 461 462 di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT); 463 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 464 - xfs_fs_cmn_err(CE_WARN, ip->i_mount, 465 - "corrupt inode %Lu (bad size %Ld for local inode). Unmount and run xfs_repair.", 466 (unsigned long long) ip->i_ino, 467 (long long) di_size); 468 XFS_CORRUPTION_ERROR("xfs_iformat(5)", ··· 551 * kmem_alloc() or memcpy() below. 552 */ 553 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 554 - xfs_fs_cmn_err(CE_WARN, ip->i_mount, 555 - "corrupt inode %Lu (bad size %d for local fork, size = %d). Unmount and run xfs_repair.", 556 (unsigned long long) ip->i_ino, size, 557 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 558 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, ··· 611 * kmem_alloc() or memcpy() below. 612 */ 613 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 614 - xfs_fs_cmn_err(CE_WARN, ip->i_mount, 615 - "corrupt inode %Lu ((a)extents = %d). Unmount and run xfs_repair.", 616 (unsigned long long) ip->i_ino, nex); 617 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 618 ip->i_mount, dip); ··· 693 || XFS_BMDR_SPACE_CALC(nrecs) > 694 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 695 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 696 - xfs_fs_cmn_err(CE_WARN, ip->i_mount, 697 - "corrupt inode %Lu (btree). Unmount and run xfs_repair.", 698 (unsigned long long) ip->i_ino); 699 XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 700 ip->i_mount); ··· 810 flags |= XFS_XFLAG_PROJINHERIT; 811 if (di_flags & XFS_DIFLAG_NOSYMLINKS) 812 flags |= XFS_XFLAG_NOSYMLINKS; 813 } 814 815 return flags; ··· 1197 if ((mode & S_IFMT) == S_IFDIR) { 1198 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1199 di_flags |= XFS_DIFLAG_RTINHERIT; 1200 - } else { 1201 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) { 1202 di_flags |= XFS_DIFLAG_REALTIME; 1203 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 1204 } 1205 } 1206 if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && ··· 1275 if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) 1276 return; 1277 1278 - if ( ip->i_d.di_flags & XFS_DIFLAG_REALTIME ) 1279 return; 1280 1281 nimaps = 2; ··· 1778 xfs_fsize_t new_size, 1779 cred_t *credp) 1780 { 1781 - xfs_fsize_t isize; 1782 int error; 1783 1784 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1785 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 1786 ASSERT(new_size > ip->i_d.di_size); 1787 1788 - error = 0; 1789 - isize = ip->i_d.di_size; 1790 /* 1791 * Zero any pages that may have been created by 1792 * xfs_write_file() beyond the end of the file 1793 * and any blocks between the old and new file sizes. 1794 */ 1795 - error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, isize, 1796 - new_size); 1797 return error; 1798 } 1799 ··· 3364 */ 3365 ip->i_update_core = 0; 3366 SYNCHRONIZE(); 3367 3368 if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC, 3369 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {

··· 404 INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + 405 INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) > 406 INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) { 407 + xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 408 + "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 409 (unsigned long long)ip->i_ino, 410 (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) 411 + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)), ··· 418 } 419 420 if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) { 421 + xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 422 + "corrupt dinode %Lu, forkoff = 0x%x.", 423 (unsigned long long)ip->i_ino, 424 (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT))); 425 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, ··· 451 * no local regular files yet 452 */ 453 if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) { 454 + xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 455 + "corrupt inode %Lu " 456 + "(local format for regular file).", 457 (unsigned long long) ip->i_ino); 458 XFS_CORRUPTION_ERROR("xfs_iformat(4)", 459 XFS_ERRLEVEL_LOW, ··· 462 463 di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT); 464 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 465 + xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 466 + "corrupt inode %Lu " 467 + "(bad size %Ld for local inode).", 468 (unsigned long long) ip->i_ino, 469 (long long) di_size); 470 XFS_CORRUPTION_ERROR("xfs_iformat(5)", ··· 551 * kmem_alloc() or memcpy() below. 552 */ 553 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 554 + xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 555 + "corrupt inode %Lu " 556 + "(bad size %d for local fork, size = %d).", 557 (unsigned long long) ip->i_ino, size, 558 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 559 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, ··· 610 * kmem_alloc() or memcpy() below. 611 */ 612 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 613 + xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 614 + "corrupt inode %Lu ((a)extents = %d).", 615 (unsigned long long) ip->i_ino, nex); 616 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 617 ip->i_mount, dip); ··· 692 || XFS_BMDR_SPACE_CALC(nrecs) > 693 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 694 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 695 + xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 696 + "corrupt inode %Lu (btree).", 697 (unsigned long long) ip->i_ino); 698 XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 699 ip->i_mount); ··· 809 flags |= XFS_XFLAG_PROJINHERIT; 810 if (di_flags & XFS_DIFLAG_NOSYMLINKS) 811 flags |= XFS_XFLAG_NOSYMLINKS; 812 + if (di_flags & XFS_DIFLAG_EXTSIZE) 813 + flags |= XFS_XFLAG_EXTSIZE; 814 + if (di_flags & XFS_DIFLAG_EXTSZINHERIT) 815 + flags |= XFS_XFLAG_EXTSZINHERIT; 816 } 817 818 return flags; ··· 1192 if ((mode & S_IFMT) == S_IFDIR) { 1193 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1194 di_flags |= XFS_DIFLAG_RTINHERIT; 1195 + if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1196 + di_flags |= XFS_DIFLAG_EXTSZINHERIT; 1197 + ip->i_d.di_extsize = pip->i_d.di_extsize; 1198 + } 1199 + } else if ((mode & S_IFMT) == S_IFREG) { 1200 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) { 1201 di_flags |= XFS_DIFLAG_REALTIME; 1202 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 1203 + } 1204 + if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1205 + di_flags |= XFS_DIFLAG_EXTSIZE; 1206 + ip->i_d.di_extsize = pip->i_d.di_extsize; 1207 } 1208 } 1209 if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && ··· 1262 if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) 1263 return; 1264 1265 + if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE)) 1266 return; 1267 1268 nimaps = 2; ··· 1765 xfs_fsize_t new_size, 1766 cred_t *credp) 1767 { 1768 int error; 1769 1770 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1771 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 1772 ASSERT(new_size > ip->i_d.di_size); 1773 1774 /* 1775 * Zero any pages that may have been created by 1776 * xfs_write_file() beyond the end of the file 1777 * and any blocks between the old and new file sizes. 1778 */ 1779 + error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, 1780 + ip->i_d.di_size, new_size); 1781 return error; 1782 } 1783 ··· 3354 */ 3355 ip->i_update_core = 0; 3356 SYNCHRONIZE(); 3357 + 3358 + /* 3359 + * Make sure to get the latest atime from the Linux inode. 3360 + */ 3361 + xfs_synchronize_atime(ip); 3362 3363 if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC, 3364 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {

+4

fs/xfs/xfs_inode.h

··· 436 xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 437 void xfs_lock_inodes(xfs_inode_t **, int, int, uint); 438 439 #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 440 441 #ifdef DEBUG

··· 436 xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 437 void xfs_lock_inodes(xfs_inode_t **, int, int, uint); 438 439 + xfs_inode_t *xfs_vtoi(struct vnode *vp); 440 + 441 + void xfs_synchronize_atime(xfs_inode_t *); 442 + 443 #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 444 445 #ifdef DEBUG

+7 -2

fs/xfs/xfs_inode_item.c

··· 271 if (ip->i_update_size) 272 ip->i_update_size = 0; 273 274 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 275 vecp->i_len = sizeof(xfs_dinode_core_t); 276 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); ··· 608 if (iip->ili_pushbuf_flag == 0) { 609 iip->ili_pushbuf_flag = 1; 610 #ifdef DEBUG 611 - iip->ili_push_owner = get_thread_id(); 612 #endif 613 /* 614 * Inode is left locked in shared mode. ··· 787 * trying to duplicate our effort. 788 */ 789 ASSERT(iip->ili_pushbuf_flag != 0); 790 - ASSERT(iip->ili_push_owner == get_thread_id()); 791 792 /* 793 * If flushlock isn't locked anymore, chances are that the

··· 271 if (ip->i_update_size) 272 ip->i_update_size = 0; 273 274 + /* 275 + * Make sure to get the latest atime from the Linux inode. 276 + */ 277 + xfs_synchronize_atime(ip); 278 + 279 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 280 vecp->i_len = sizeof(xfs_dinode_core_t); 281 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); ··· 603 if (iip->ili_pushbuf_flag == 0) { 604 iip->ili_pushbuf_flag = 1; 605 #ifdef DEBUG 606 + iip->ili_push_owner = current_pid(); 607 #endif 608 /* 609 * Inode is left locked in shared mode. ··· 782 * trying to duplicate our effort. 783 */ 784 ASSERT(iip->ili_pushbuf_flag != 0); 785 + ASSERT(iip->ili_push_owner == current_pid()); 786 787 /* 788 * If flushlock isn't locked anymore, chances are that the

+236 -193

fs/xfs/xfs_iomap.c

··· 262 case BMAPI_WRITE: 263 /* If we found an extent, return it */ 264 if (nimaps && 265 - (imap.br_startblock != HOLESTARTBLOCK) && 266 (imap.br_startblock != DELAYSTARTBLOCK)) { 267 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io, 268 offset, count, iomapp, &imap, flags); ··· 317 } 318 319 STATIC int 320 xfs_flush_space( 321 xfs_inode_t *ip, 322 int *fsynced, ··· 414 xfs_iocore_t *io = &ip->i_iocore; 415 xfs_fileoff_t offset_fsb; 416 xfs_fileoff_t last_fsb; 417 - xfs_filblks_t count_fsb; 418 xfs_fsblock_t firstfsb; 419 int nimaps; 420 - int error; 421 int bmapi_flag; 422 int quota_flag; 423 int rt; 424 xfs_trans_t *tp; 425 xfs_bmbt_irec_t imap; 426 xfs_bmap_free_t free_list; 427 - xfs_filblks_t qblocks, resblks; 428 int committed; 429 - int resrtextents; 430 431 /* 432 * Make sure that the dquots are there. This doesn't hold ··· 437 if (error) 438 return XFS_ERROR(error); 439 440 - offset_fsb = XFS_B_TO_FSBT(mp, offset); 441 - last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 442 - count_fsb = last_fsb - offset_fsb; 443 - if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) { 444 - xfs_fileoff_t map_last_fsb; 445 - 446 - map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff; 447 - if (map_last_fsb < last_fsb) { 448 - last_fsb = map_last_fsb; 449 - count_fsb = last_fsb - offset_fsb; 450 - } 451 - ASSERT(count_fsb > 0); 452 - } 453 - 454 - /* 455 - * Determine if reserving space on the data or realtime partition. 456 - */ 457 - if ((rt = XFS_IS_REALTIME_INODE(ip))) { 458 - xfs_extlen_t extsz; 459 - 460 if (!(extsz = ip->i_d.di_extsize)) 461 extsz = mp->m_sb.sb_rextsize; 462 - resrtextents = qblocks = (count_fsb + extsz - 1); 463 - do_div(resrtextents, mp->m_sb.sb_rextsize); 464 - resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 465 - quota_flag = XFS_QMOPT_RES_RTBLKS; 466 } else { 467 - resrtextents = 0; 468 - resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb); 469 - quota_flag = XFS_QMOPT_RES_REGBLKS; 470 } 471 472 /* 473 * Allocate and setup the transaction ··· 493 XFS_WRITE_LOG_RES(mp), resrtextents, 494 XFS_TRANS_PERM_LOG_RES, 495 XFS_WRITE_LOG_COUNT); 496 - 497 /* 498 * Check for running out of space, note: need lock to return 499 */ ··· 502 if (error) 503 goto error_out; 504 505 - if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) { 506 - error = (EDQUOT); 507 goto error1; 508 - } 509 510 - bmapi_flag = XFS_BMAPI_WRITE; 511 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 512 xfs_trans_ihold(tp, ip); 513 514 - if (!(flags & BMAPI_MMAP) && (offset < ip->i_d.di_size || rt)) 515 bmapi_flag |= XFS_BMAPI_PREALLOC; 516 517 /* 518 - * Issue the bmapi() call to allocate the blocks 519 */ 520 XFS_BMAP_INIT(&free_list, &firstfsb); 521 nimaps = 1; ··· 550 "extent-state : %x \n", 551 (ip->i_mount)->m_fsname, 552 (long long)ip->i_ino, 553 - ret_imap->br_startblock, ret_imap->br_startoff, 554 - ret_imap->br_blockcount,ret_imap->br_state); 555 } 556 return 0; 557 ··· 569 return XFS_ERROR(error); 570 } 571 572 int 573 xfs_iomap_write_delay( 574 xfs_inode_t *ip, ··· 639 xfs_iocore_t *io = &ip->i_iocore; 640 xfs_fileoff_t offset_fsb; 641 xfs_fileoff_t last_fsb; 642 - xfs_fsize_t isize; 643 xfs_fsblock_t firstblock; 644 int nimaps; 645 - int error; 646 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; 647 - int aeof; 648 - int fsynced = 0; 649 650 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 651 ··· 655 * Make sure that the dquots are there. This doesn't hold 656 * the ilock across a disk read. 657 */ 658 - 659 error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); 660 if (error) 661 return XFS_ERROR(error); 662 663 retry: 664 isize = ip->i_d.di_size; 665 - if (io->io_new_size > isize) { 666 isize = io->io_new_size; 667 - } 668 669 - aeof = 0; 670 - offset_fsb = XFS_B_TO_FSBT(mp, offset); 671 - last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 672 - /* 673 - * If the caller is doing a write at the end of the file, 674 - * then extend the allocation (and the buffer used for the write) 675 - * out to the file system's write iosize. We clean up any extra 676 - * space left over when the file is closed in xfs_inactive(). 677 - * 678 - * For sync writes, we are flushing delayed allocate space to 679 - * try to make additional space available for allocation near 680 - * the filesystem full boundary - preallocation hurts in that 681 - * situation, of course. 682 - */ 683 - if (!(ioflag & BMAPI_SYNC) && ((offset + count) > ip->i_d.di_size)) { 684 - xfs_off_t aligned_offset; 685 - xfs_filblks_t count_fsb; 686 - unsigned int iosize; 687 - xfs_fileoff_t ioalign; 688 - int n; 689 - xfs_fileoff_t start_fsb; 690 691 - /* 692 - * If there are any real blocks past eof, then don't 693 - * do any speculative allocation. 694 - */ 695 - start_fsb = XFS_B_TO_FSBT(mp, 696 - ((xfs_ufsize_t)(offset + count - 1))); 697 - count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 698 - while (count_fsb > 0) { 699 - nimaps = XFS_WRITE_IMAPS; 700 - error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 701 - 0, &firstblock, 0, imap, &nimaps, NULL); 702 - if (error) { 703 - return error; 704 - } 705 - for (n = 0; n < nimaps; n++) { 706 - if ( !(io->io_flags & XFS_IOCORE_RT) && 707 - !imap[n].br_startblock) { 708 - cmn_err(CE_PANIC,"Access to block " 709 - "zero: fs <%s> inode: %lld " 710 - "start_block : %llx start_off " 711 - ": %llx blkcnt : %llx " 712 - "extent-state : %x \n", 713 - (ip->i_mount)->m_fsname, 714 - (long long)ip->i_ino, 715 - imap[n].br_startblock, 716 - imap[n].br_startoff, 717 - imap[n].br_blockcount, 718 - imap[n].br_state); 719 - } 720 - if ((imap[n].br_startblock != HOLESTARTBLOCK) && 721 - (imap[n].br_startblock != DELAYSTARTBLOCK)) { 722 - goto write_map; 723 - } 724 - start_fsb += imap[n].br_blockcount; 725 - count_fsb -= imap[n].br_blockcount; 726 - } 727 - } 728 - iosize = mp->m_writeio_blocks; 729 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 730 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 731 - last_fsb = ioalign + iosize; 732 - aeof = 1; 733 } 734 - write_map: 735 nimaps = XFS_WRITE_IMAPS; 736 firstblock = NULLFSBLOCK; 737 - 738 - /* 739 - * If mounted with the "-o swalloc" option, roundup the allocation 740 - * request to a stripe width boundary if the file size is >= 741 - * stripe width and we are allocating past the allocation eof. 742 - */ 743 - if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_swidth 744 - && (mp->m_flags & XFS_MOUNT_SWALLOC) 745 - && (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)) && aeof) { 746 - int eof; 747 - xfs_fileoff_t new_last_fsb; 748 - 749 - new_last_fsb = roundup_64(last_fsb, mp->m_swidth); 750 - error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); 751 - if (error) { 752 - return error; 753 - } 754 - if (eof) { 755 - last_fsb = new_last_fsb; 756 - } 757 - /* 758 - * Roundup the allocation request to a stripe unit (m_dalign) boundary 759 - * if the file size is >= stripe unit size, and we are allocating past 760 - * the allocation eof. 761 - */ 762 - } else if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_dalign && 763 - (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)) && aeof) { 764 - int eof; 765 - xfs_fileoff_t new_last_fsb; 766 - new_last_fsb = roundup_64(last_fsb, mp->m_dalign); 767 - error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); 768 - if (error) { 769 - return error; 770 - } 771 - if (eof) { 772 - last_fsb = new_last_fsb; 773 - } 774 - /* 775 - * Round up the allocation request to a real-time extent boundary 776 - * if the file is on the real-time subvolume. 777 - */ 778 - } else if (io->io_flags & XFS_IOCORE_RT && aeof) { 779 - int eof; 780 - xfs_fileoff_t new_last_fsb; 781 - 782 - new_last_fsb = roundup_64(last_fsb, mp->m_sb.sb_rextsize); 783 - error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof); 784 - if (error) { 785 - return error; 786 - } 787 - if (eof) 788 - last_fsb = new_last_fsb; 789 - } 790 error = xfs_bmapi(NULL, ip, offset_fsb, 791 (xfs_filblks_t)(last_fsb - offset_fsb), 792 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | 793 XFS_BMAPI_ENTIRE, &firstblock, 1, imap, 794 &nimaps, NULL); 795 - /* 796 - * This can be EDQUOT, if nimaps == 0 797 - */ 798 - if (error && (error != ENOSPC)) { 799 return XFS_ERROR(error); 800 - } 801 /* 802 * If bmapi returned us nothing, and if we didn't get back EDQUOT, 803 - * then we must have run out of space. 804 */ 805 if (nimaps == 0) { 806 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, ··· 717 goto retry; 718 } 719 720 - *ret_imap = imap[0]; 721 - *nmaps = 1; 722 - if ( !(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) { 723 cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld " 724 "start_block : %llx start_off : %llx blkcnt : %llx " 725 "extent-state : %x \n", 726 (ip->i_mount)->m_fsname, 727 (long long)ip->i_ino, 728 - ret_imap->br_startblock, ret_imap->br_startoff, 729 - ret_imap->br_blockcount,ret_imap->br_state); 730 } 731 return 0; 732 } 733 ··· 857 */ 858 859 for (i = 0; i < nimaps; i++) { 860 - if ( !(io->io_flags & XFS_IOCORE_RT) && 861 - !imap[i].br_startblock) { 862 cmn_err(CE_PANIC,"Access to block zero: " 863 "fs <%s> inode: %lld " 864 - "start_block : %llx start_off : %llx " 865 "blkcnt : %llx extent-state : %x \n", 866 (ip->i_mount)->m_fsname, 867 (long long)ip->i_ino, 868 - imap[i].br_startblock, 869 - imap[i].br_startoff, 870 - imap[i].br_blockcount,imap[i].br_state); 871 } 872 if ((offset_fsb >= imap[i].br_startoff) && 873 (offset_fsb < (imap[i].br_startoff + ··· 908 { 909 xfs_mount_t *mp = ip->i_mount; 910 xfs_iocore_t *io = &ip->i_iocore; 911 - xfs_trans_t *tp; 912 xfs_fileoff_t offset_fsb; 913 xfs_filblks_t count_fsb; 914 xfs_filblks_t numblks_fsb; 915 - xfs_bmbt_irec_t imap; 916 int committed; 917 int error; 918 - int nres; 919 - int nimaps; 920 - xfs_fsblock_t firstfsb; 921 - xfs_bmap_free_t free_list; 922 923 xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, 924 &ip->i_iocore, offset, count); ··· 927 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 928 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); 929 930 - do { 931 - nres = XFS_DIOSTRAT_SPACE_RES(mp, 0); 932 933 /* 934 * set up a transaction to convert the range of extents 935 * from unwritten to real. Do allocations in a loop until ··· 937 */ 938 939 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 940 - error = xfs_trans_reserve(tp, nres, 941 XFS_WRITE_LOG_RES(mp), 0, 942 XFS_TRANS_PERM_LOG_RES, 943 XFS_WRITE_LOG_COUNT); ··· 956 XFS_BMAP_INIT(&free_list, &firstfsb); 957 nimaps = 1; 958 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, 959 - XFS_BMAPI_WRITE, &firstfsb, 960 1, &imap, &nimaps, &free_list); 961 if (error) 962 goto error_on_bmapi_transaction; ··· 970 xfs_iunlock(ip, XFS_ILOCK_EXCL); 971 if (error) 972 goto error0; 973 - 974 if ( !(io->io_flags & XFS_IOCORE_RT) && !imap.br_startblock) { 975 cmn_err(CE_PANIC,"Access to block zero: fs <%s> " 976 "inode: %lld start_block : %llx start_off : " 977 "%llx blkcnt : %llx extent-state : %x \n", 978 (ip->i_mount)->m_fsname, 979 (long long)ip->i_ino, 980 - imap.br_startblock,imap.br_startoff, 981 - imap.br_blockcount,imap.br_state); 982 } 983 984 if ((numblks_fsb = imap.br_blockcount) == 0) {

··· 262 case BMAPI_WRITE: 263 /* If we found an extent, return it */ 264 if (nimaps && 265 + (imap.br_startblock != HOLESTARTBLOCK) && 266 (imap.br_startblock != DELAYSTARTBLOCK)) { 267 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io, 268 offset, count, iomapp, &imap, flags); ··· 317 } 318 319 STATIC int 320 + xfs_iomap_eof_align_last_fsb( 321 + xfs_mount_t *mp, 322 + xfs_iocore_t *io, 323 + xfs_fsize_t isize, 324 + xfs_extlen_t extsize, 325 + xfs_fileoff_t *last_fsb) 326 + { 327 + xfs_fileoff_t new_last_fsb = 0; 328 + xfs_extlen_t align; 329 + int eof, error; 330 + 331 + if (io->io_flags & XFS_IOCORE_RT) 332 + ; 333 + /* 334 + * If mounted with the "-o swalloc" option, roundup the allocation 335 + * request to a stripe width boundary if the file size is >= 336 + * stripe width and we are allocating past the allocation eof. 337 + */ 338 + else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) && 339 + (isize >= XFS_FSB_TO_B(mp, mp->m_swidth))) 340 + new_last_fsb = roundup_64(*last_fsb, mp->m_swidth); 341 + /* 342 + * Roundup the allocation request to a stripe unit (m_dalign) boundary 343 + * if the file size is >= stripe unit size, and we are allocating past 344 + * the allocation eof. 345 + */ 346 + else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign))) 347 + new_last_fsb = roundup_64(*last_fsb, mp->m_dalign); 348 + 349 + /* 350 + * Always round up the allocation request to an extent boundary 351 + * (when file on a real-time subvolume or has di_extsize hint). 352 + */ 353 + if (extsize) { 354 + if (new_last_fsb) 355 + align = roundup_64(new_last_fsb, extsize); 356 + else 357 + align = extsize; 358 + new_last_fsb = roundup_64(*last_fsb, align); 359 + } 360 + 361 + if (new_last_fsb) { 362 + error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof); 363 + if (error) 364 + return error; 365 + if (eof) 366 + *last_fsb = new_last_fsb; 367 + } 368 + return 0; 369 + } 370 + 371 + STATIC int 372 xfs_flush_space( 373 xfs_inode_t *ip, 374 int *fsynced, ··· 362 xfs_iocore_t *io = &ip->i_iocore; 363 xfs_fileoff_t offset_fsb; 364 xfs_fileoff_t last_fsb; 365 + xfs_filblks_t count_fsb, resaligned; 366 xfs_fsblock_t firstfsb; 367 + xfs_extlen_t extsz, temp; 368 + xfs_fsize_t isize; 369 int nimaps; 370 int bmapi_flag; 371 int quota_flag; 372 int rt; 373 xfs_trans_t *tp; 374 xfs_bmbt_irec_t imap; 375 xfs_bmap_free_t free_list; 376 + uint qblocks, resblks, resrtextents; 377 int committed; 378 + int error; 379 380 /* 381 * Make sure that the dquots are there. This doesn't hold ··· 384 if (error) 385 return XFS_ERROR(error); 386 387 + rt = XFS_IS_REALTIME_INODE(ip); 388 + if (unlikely(rt)) { 389 if (!(extsz = ip->i_d.di_extsize)) 390 extsz = mp->m_sb.sb_rextsize; 391 } else { 392 + extsz = ip->i_d.di_extsize; 393 } 394 + 395 + isize = ip->i_d.di_size; 396 + if (io->io_new_size > isize) 397 + isize = io->io_new_size; 398 + 399 + offset_fsb = XFS_B_TO_FSBT(mp, offset); 400 + last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 401 + if ((offset + count) > isize) { 402 + error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz, 403 + &last_fsb); 404 + if (error) 405 + goto error_out; 406 + } else { 407 + if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) 408 + last_fsb = MIN(last_fsb, (xfs_fileoff_t) 409 + ret_imap->br_blockcount + 410 + ret_imap->br_startoff); 411 + } 412 + count_fsb = last_fsb - offset_fsb; 413 + ASSERT(count_fsb > 0); 414 + 415 + resaligned = count_fsb; 416 + if (unlikely(extsz)) { 417 + if ((temp = do_mod(offset_fsb, extsz))) 418 + resaligned += temp; 419 + if ((temp = do_mod(resaligned, extsz))) 420 + resaligned += extsz - temp; 421 + } 422 + 423 + if (unlikely(rt)) { 424 + resrtextents = qblocks = resaligned; 425 + resrtextents /= mp->m_sb.sb_rextsize; 426 + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 427 + quota_flag = XFS_QMOPT_RES_RTBLKS; 428 + } else { 429 + resrtextents = 0; 430 + resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); 431 + quota_flag = XFS_QMOPT_RES_REGBLKS; 432 + } 433 434 /* 435 * Allocate and setup the transaction ··· 425 XFS_WRITE_LOG_RES(mp), resrtextents, 426 XFS_TRANS_PERM_LOG_RES, 427 XFS_WRITE_LOG_COUNT); 428 /* 429 * Check for running out of space, note: need lock to return 430 */ ··· 435 if (error) 436 goto error_out; 437 438 + error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, 439 + qblocks, 0, quota_flag); 440 + if (error) 441 goto error1; 442 443 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 444 xfs_trans_ihold(tp, ip); 445 446 + bmapi_flag = XFS_BMAPI_WRITE; 447 + if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz)) 448 bmapi_flag |= XFS_BMAPI_PREALLOC; 449 450 /* 451 + * Issue the xfs_bmapi() call to allocate the blocks 452 */ 453 XFS_BMAP_INIT(&free_list, &firstfsb); 454 nimaps = 1; ··· 483 "extent-state : %x \n", 484 (ip->i_mount)->m_fsname, 485 (long long)ip->i_ino, 486 + (unsigned long long)ret_imap->br_startblock, 487 + (unsigned long long)ret_imap->br_startoff, 488 + (unsigned long long)ret_imap->br_blockcount, 489 + ret_imap->br_state); 490 } 491 return 0; 492 ··· 500 return XFS_ERROR(error); 501 } 502 503 + /* 504 + * If the caller is doing a write at the end of the file, 505 + * then extend the allocation out to the file system's write 506 + * iosize. We clean up any extra space left over when the 507 + * file is closed in xfs_inactive(). 508 + * 509 + * For sync writes, we are flushing delayed allocate space to 510 + * try to make additional space available for allocation near 511 + * the filesystem full boundary - preallocation hurts in that 512 + * situation, of course. 513 + */ 514 + STATIC int 515 + xfs_iomap_eof_want_preallocate( 516 + xfs_mount_t *mp, 517 + xfs_iocore_t *io, 518 + xfs_fsize_t isize, 519 + xfs_off_t offset, 520 + size_t count, 521 + int ioflag, 522 + xfs_bmbt_irec_t *imap, 523 + int nimaps, 524 + int *prealloc) 525 + { 526 + xfs_fileoff_t start_fsb; 527 + xfs_filblks_t count_fsb; 528 + xfs_fsblock_t firstblock; 529 + int n, error, imaps; 530 + 531 + *prealloc = 0; 532 + if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize) 533 + return 0; 534 + 535 + /* 536 + * If there are any real blocks past eof, then don't 537 + * do any speculative allocation. 538 + */ 539 + start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1))); 540 + count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 541 + while (count_fsb > 0) { 542 + imaps = nimaps; 543 + firstblock = NULLFSBLOCK; 544 + error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 545 + 0, &firstblock, 0, imap, &imaps, NULL); 546 + if (error) 547 + return error; 548 + for (n = 0; n < imaps; n++) { 549 + if ((imap[n].br_startblock != HOLESTARTBLOCK) && 550 + (imap[n].br_startblock != DELAYSTARTBLOCK)) 551 + return 0; 552 + start_fsb += imap[n].br_blockcount; 553 + count_fsb -= imap[n].br_blockcount; 554 + } 555 + } 556 + *prealloc = 1; 557 + return 0; 558 + } 559 + 560 int 561 xfs_iomap_write_delay( 562 xfs_inode_t *ip, ··· 513 xfs_iocore_t *io = &ip->i_iocore; 514 xfs_fileoff_t offset_fsb; 515 xfs_fileoff_t last_fsb; 516 + xfs_off_t aligned_offset; 517 + xfs_fileoff_t ioalign; 518 xfs_fsblock_t firstblock; 519 + xfs_extlen_t extsz; 520 + xfs_fsize_t isize; 521 int nimaps; 522 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; 523 + int prealloc, fsynced = 0; 524 + int error; 525 526 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 527 ··· 527 * Make sure that the dquots are there. This doesn't hold 528 * the ilock across a disk read. 529 */ 530 error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); 531 if (error) 532 return XFS_ERROR(error); 533 534 + if (XFS_IS_REALTIME_INODE(ip)) { 535 + if (!(extsz = ip->i_d.di_extsize)) 536 + extsz = mp->m_sb.sb_rextsize; 537 + } else { 538 + extsz = ip->i_d.di_extsize; 539 + } 540 + 541 + offset_fsb = XFS_B_TO_FSBT(mp, offset); 542 + 543 retry: 544 isize = ip->i_d.di_size; 545 + if (io->io_new_size > isize) 546 isize = io->io_new_size; 547 548 + error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count, 549 + ioflag, imap, XFS_WRITE_IMAPS, &prealloc); 550 + if (error) 551 + return error; 552 553 + if (prealloc) { 554 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 555 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 556 + last_fsb = ioalign + mp->m_writeio_blocks; 557 + } else { 558 + last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 559 } 560 + 561 + if (prealloc || extsz) { 562 + error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz, 563 + &last_fsb); 564 + if (error) 565 + return error; 566 + } 567 + 568 nimaps = XFS_WRITE_IMAPS; 569 firstblock = NULLFSBLOCK; 570 error = xfs_bmapi(NULL, ip, offset_fsb, 571 (xfs_filblks_t)(last_fsb - offset_fsb), 572 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | 573 XFS_BMAPI_ENTIRE, &firstblock, 1, imap, 574 &nimaps, NULL); 575 + if (error && (error != ENOSPC)) 576 return XFS_ERROR(error); 577 + 578 /* 579 * If bmapi returned us nothing, and if we didn't get back EDQUOT, 580 + * then we must have run out of space - flush delalloc, and retry.. 581 */ 582 if (nimaps == 0) { 583 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, ··· 684 goto retry; 685 } 686 687 + if (!(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) { 688 cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld " 689 "start_block : %llx start_off : %llx blkcnt : %llx " 690 "extent-state : %x \n", 691 (ip->i_mount)->m_fsname, 692 (long long)ip->i_ino, 693 + (unsigned long long)ret_imap->br_startblock, 694 + (unsigned long long)ret_imap->br_startoff, 695 + (unsigned long long)ret_imap->br_blockcount, 696 + ret_imap->br_state); 697 } 698 + 699 + *ret_imap = imap[0]; 700 + *nmaps = 1; 701 + 702 return 0; 703 } 704 ··· 820 */ 821 822 for (i = 0; i < nimaps; i++) { 823 + if (!(io->io_flags & XFS_IOCORE_RT) && 824 + !imap[i].br_startblock) { 825 cmn_err(CE_PANIC,"Access to block zero: " 826 "fs <%s> inode: %lld " 827 + "start_block : %llx start_off : %llx " 828 "blkcnt : %llx extent-state : %x \n", 829 (ip->i_mount)->m_fsname, 830 (long long)ip->i_ino, 831 + (unsigned long long) 832 + imap[i].br_startblock, 833 + (unsigned long long) 834 + imap[i].br_startoff, 835 + (unsigned long long) 836 + imap[i].br_blockcount, 837 + imap[i].br_state); 838 } 839 if ((offset_fsb >= imap[i].br_startoff) && 840 (offset_fsb < (imap[i].br_startoff + ··· 867 { 868 xfs_mount_t *mp = ip->i_mount; 869 xfs_iocore_t *io = &ip->i_iocore; 870 xfs_fileoff_t offset_fsb; 871 xfs_filblks_t count_fsb; 872 xfs_filblks_t numblks_fsb; 873 + xfs_fsblock_t firstfsb; 874 + int nimaps; 875 + xfs_trans_t *tp; 876 + xfs_bmbt_irec_t imap; 877 + xfs_bmap_free_t free_list; 878 + uint resblks; 879 int committed; 880 int error; 881 882 xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, 883 &ip->i_iocore, offset, count); ··· 886 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 887 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); 888 889 + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 890 891 + do { 892 /* 893 * set up a transaction to convert the range of extents 894 * from unwritten to real. Do allocations in a loop until ··· 896 */ 897 898 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 899 + error = xfs_trans_reserve(tp, resblks, 900 XFS_WRITE_LOG_RES(mp), 0, 901 XFS_TRANS_PERM_LOG_RES, 902 XFS_WRITE_LOG_COUNT); ··· 915 XFS_BMAP_INIT(&free_list, &firstfsb); 916 nimaps = 1; 917 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, 918 + XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, 919 1, &imap, &nimaps, &free_list); 920 if (error) 921 goto error_on_bmapi_transaction; ··· 929 xfs_iunlock(ip, XFS_ILOCK_EXCL); 930 if (error) 931 goto error0; 932 + 933 if ( !(io->io_flags & XFS_IOCORE_RT) && !imap.br_startblock) { 934 cmn_err(CE_PANIC,"Access to block zero: fs <%s> " 935 "inode: %lld start_block : %llx start_off : " 936 "%llx blkcnt : %llx extent-state : %x \n", 937 (ip->i_mount)->m_fsname, 938 (long long)ip->i_ino, 939 + (unsigned long long)imap.br_startblock, 940 + (unsigned long long)imap.br_startoff, 941 + (unsigned long long)imap.br_blockcount, 942 + imap.br_state); 943 } 944 945 if ((numblks_fsb = imap.br_blockcount) == 0) {

+3 -2

fs/xfs/xfs_itable.c

··· 56 { 57 xfs_dinode_core_t *dic; /* dinode core info pointer */ 58 xfs_inode_t *ip; /* incore inode pointer */ 59 int error; 60 61 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno); ··· 73 goto out_iput; 74 } 75 76 dic = &ip->i_d; 77 78 /* xfs_iget returns the following without needing ··· 86 buf->bs_uid = dic->di_uid; 87 buf->bs_gid = dic->di_gid; 88 buf->bs_size = dic->di_size; 89 - buf->bs_atime.tv_sec = dic->di_atime.t_sec; 90 - buf->bs_atime.tv_nsec = dic->di_atime.t_nsec; 91 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; 92 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; 93 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;

··· 56 { 57 xfs_dinode_core_t *dic; /* dinode core info pointer */ 58 xfs_inode_t *ip; /* incore inode pointer */ 59 + vnode_t *vp; 60 int error; 61 62 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno); ··· 72 goto out_iput; 73 } 74 75 + vp = XFS_ITOV(ip); 76 dic = &ip->i_d; 77 78 /* xfs_iget returns the following without needing ··· 84 buf->bs_uid = dic->di_uid; 85 buf->bs_gid = dic->di_gid; 86 buf->bs_size = dic->di_size; 87 + vn_atime_to_bstime(vp, &buf->bs_atime); 88 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; 89 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; 90 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;

+96 -27

fs/xfs/xfs_log.c

··· 178 #define xlog_trace_iclog(iclog,state) 179 #endif /* XFS_LOG_TRACE */ 180 181 /* 182 * NOTES: 183 * ··· 505 if (readonly) 506 vfsp->vfs_flag &= ~VFS_RDONLY; 507 508 - error = xlog_recover(mp->m_log, readonly); 509 510 if (readonly) 511 vfsp->vfs_flag |= VFS_RDONLY; ··· 1397 1398 /* move grant heads by roundoff in sync */ 1399 s = GRANT_LOCK(log); 1400 - XLOG_GRANT_ADD_SPACE(log, roundoff, 'w'); 1401 - XLOG_GRANT_ADD_SPACE(log, roundoff, 'r'); 1402 GRANT_UNLOCK(log, s); 1403 1404 /* put cycle number in every block */ ··· 1591 * print out info relating to regions written which consume 1592 * the reservation 1593 */ 1594 - #if defined(XFS_LOG_RES_DEBUG) 1595 STATIC void 1596 xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) 1597 { ··· 1680 ticket->t_res_arr_sum, ticket->t_res_o_flow, 1681 ticket->t_res_num_ophdrs, ophdr_spc, 1682 ticket->t_res_arr_sum + 1683 - ticket->t_res_o_flow + ophdr_spc, 1684 ticket->t_res_num); 1685 1686 for (i = 0; i < ticket->t_res_num; i++) { 1687 - uint r_type = ticket->t_res_arr[i].r_type; 1688 cmn_err(CE_WARN, 1689 "region[%u]: %s - %u bytes\n", 1690 i, ··· 1693 ticket->t_res_arr[i].r_len); 1694 } 1695 } 1696 - #else 1697 - #define xlog_print_tic_res(mp, ticket) 1698 - #endif 1699 1700 /* 1701 * Write some region out to in-core log ··· 2461 2462 /* something is already sleeping; insert new transaction at end */ 2463 if (log->l_reserve_headq) { 2464 - XLOG_INS_TICKETQ(log->l_reserve_headq, tic); 2465 xlog_trace_loggrant(log, tic, 2466 "xlog_grant_log_space: sleep 1"); 2467 /* ··· 2494 log->l_grant_reserve_bytes); 2495 if (free_bytes < need_bytes) { 2496 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2497 - XLOG_INS_TICKETQ(log->l_reserve_headq, tic); 2498 xlog_trace_loggrant(log, tic, 2499 "xlog_grant_log_space: sleep 2"); 2500 XFS_STATS_INC(xs_sleep_logspace); ··· 2511 s = GRANT_LOCK(log); 2512 goto redo; 2513 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2514 - XLOG_DEL_TICKETQ(log->l_reserve_headq, tic); 2515 2516 /* we've got enough space */ 2517 - XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w'); 2518 - XLOG_GRANT_ADD_SPACE(log, need_bytes, 'r'); 2519 #ifdef DEBUG 2520 tail_lsn = log->l_tail_lsn; 2521 /* ··· 2535 2536 error_return: 2537 if (tic->t_flags & XLOG_TIC_IN_Q) 2538 - XLOG_DEL_TICKETQ(log->l_reserve_headq, tic); 2539 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret"); 2540 /* 2541 * If we are failing, make sure the ticket doesn't have any ··· 2604 2605 if (ntic != log->l_write_headq) { 2606 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2607 - XLOG_INS_TICKETQ(log->l_write_headq, tic); 2608 2609 xlog_trace_loggrant(log, tic, 2610 "xlog_regrant_write_log_space: sleep 1"); ··· 2636 log->l_grant_write_bytes); 2637 if (free_bytes < need_bytes) { 2638 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2639 - XLOG_INS_TICKETQ(log->l_write_headq, tic); 2640 XFS_STATS_INC(xs_sleep_logspace); 2641 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2642 ··· 2652 s = GRANT_LOCK(log); 2653 goto redo; 2654 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2655 - XLOG_DEL_TICKETQ(log->l_write_headq, tic); 2656 2657 - XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w'); /* we've got enough space */ 2658 #ifdef DEBUG 2659 tail_lsn = log->l_tail_lsn; 2660 if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { ··· 2672 2673 error_return: 2674 if (tic->t_flags & XLOG_TIC_IN_Q) 2675 - XLOG_DEL_TICKETQ(log->l_reserve_headq, tic); 2676 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret"); 2677 /* 2678 * If we are failing, make sure the ticket doesn't have any ··· 2705 ticket->t_cnt--; 2706 2707 s = GRANT_LOCK(log); 2708 - XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w'); 2709 - XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r'); 2710 ticket->t_curr_res = ticket->t_unit_res; 2711 XLOG_TIC_RESET_RES(ticket); 2712 xlog_trace_loggrant(log, ticket, ··· 2718 return; 2719 } 2720 2721 - XLOG_GRANT_ADD_SPACE(log, ticket->t_unit_res, 'r'); 2722 xlog_trace_loggrant(log, ticket, 2723 "xlog_regrant_reserve_log_space: exit"); 2724 xlog_verify_grant_head(log, 0); ··· 2754 s = GRANT_LOCK(log); 2755 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter"); 2756 2757 - XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w'); 2758 - XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r'); 2759 2760 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current"); 2761 ··· 2763 */ 2764 if (ticket->t_cnt > 0) { 2765 ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); 2766 - XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'w'); 2767 - XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'r'); 2768 } 2769 2770 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit");

··· 178 #define xlog_trace_iclog(iclog,state) 179 #endif /* XFS_LOG_TRACE */ 180 181 + 182 + static void 183 + xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) 184 + { 185 + if (*qp) { 186 + tic->t_next = (*qp); 187 + tic->t_prev = (*qp)->t_prev; 188 + (*qp)->t_prev->t_next = tic; 189 + (*qp)->t_prev = tic; 190 + } else { 191 + tic->t_prev = tic->t_next = tic; 192 + *qp = tic; 193 + } 194 + 195 + tic->t_flags |= XLOG_TIC_IN_Q; 196 + } 197 + 198 + static void 199 + xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) 200 + { 201 + if (tic == tic->t_next) { 202 + *qp = NULL; 203 + } else { 204 + *qp = tic->t_next; 205 + tic->t_next->t_prev = tic->t_prev; 206 + tic->t_prev->t_next = tic->t_next; 207 + } 208 + 209 + tic->t_next = tic->t_prev = NULL; 210 + tic->t_flags &= ~XLOG_TIC_IN_Q; 211 + } 212 + 213 + static void 214 + xlog_grant_sub_space(struct log *log, int bytes) 215 + { 216 + log->l_grant_write_bytes -= bytes; 217 + if (log->l_grant_write_bytes < 0) { 218 + log->l_grant_write_bytes += log->l_logsize; 219 + log->l_grant_write_cycle--; 220 + } 221 + 222 + log->l_grant_reserve_bytes -= bytes; 223 + if ((log)->l_grant_reserve_bytes < 0) { 224 + log->l_grant_reserve_bytes += log->l_logsize; 225 + log->l_grant_reserve_cycle--; 226 + } 227 + 228 + } 229 + 230 + static void 231 + xlog_grant_add_space_write(struct log *log, int bytes) 232 + { 233 + log->l_grant_write_bytes += bytes; 234 + if (log->l_grant_write_bytes > log->l_logsize) { 235 + log->l_grant_write_bytes -= log->l_logsize; 236 + log->l_grant_write_cycle++; 237 + } 238 + } 239 + 240 + static void 241 + xlog_grant_add_space_reserve(struct log *log, int bytes) 242 + { 243 + log->l_grant_reserve_bytes += bytes; 244 + if (log->l_grant_reserve_bytes > log->l_logsize) { 245 + log->l_grant_reserve_bytes -= log->l_logsize; 246 + log->l_grant_reserve_cycle++; 247 + } 248 + } 249 + 250 + static inline void 251 + xlog_grant_add_space(struct log *log, int bytes) 252 + { 253 + xlog_grant_add_space_write(log, bytes); 254 + xlog_grant_add_space_reserve(log, bytes); 255 + } 256 + 257 + 258 /* 259 * NOTES: 260 * ··· 428 if (readonly) 429 vfsp->vfs_flag &= ~VFS_RDONLY; 430 431 + error = xlog_recover(mp->m_log); 432 433 if (readonly) 434 vfsp->vfs_flag |= VFS_RDONLY; ··· 1320 1321 /* move grant heads by roundoff in sync */ 1322 s = GRANT_LOCK(log); 1323 + xlog_grant_add_space(log, roundoff); 1324 GRANT_UNLOCK(log, s); 1325 1326 /* put cycle number in every block */ ··· 1515 * print out info relating to regions written which consume 1516 * the reservation 1517 */ 1518 STATIC void 1519 xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) 1520 { ··· 1605 ticket->t_res_arr_sum, ticket->t_res_o_flow, 1606 ticket->t_res_num_ophdrs, ophdr_spc, 1607 ticket->t_res_arr_sum + 1608 + ticket->t_res_o_flow + ophdr_spc, 1609 ticket->t_res_num); 1610 1611 for (i = 0; i < ticket->t_res_num; i++) { 1612 + uint r_type = ticket->t_res_arr[i].r_type; 1613 cmn_err(CE_WARN, 1614 "region[%u]: %s - %u bytes\n", 1615 i, ··· 1618 ticket->t_res_arr[i].r_len); 1619 } 1620 } 1621 1622 /* 1623 * Write some region out to in-core log ··· 2389 2390 /* something is already sleeping; insert new transaction at end */ 2391 if (log->l_reserve_headq) { 2392 + xlog_ins_ticketq(&log->l_reserve_headq, tic); 2393 xlog_trace_loggrant(log, tic, 2394 "xlog_grant_log_space: sleep 1"); 2395 /* ··· 2422 log->l_grant_reserve_bytes); 2423 if (free_bytes < need_bytes) { 2424 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2425 + xlog_ins_ticketq(&log->l_reserve_headq, tic); 2426 xlog_trace_loggrant(log, tic, 2427 "xlog_grant_log_space: sleep 2"); 2428 XFS_STATS_INC(xs_sleep_logspace); ··· 2439 s = GRANT_LOCK(log); 2440 goto redo; 2441 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2442 + xlog_del_ticketq(&log->l_reserve_headq, tic); 2443 2444 /* we've got enough space */ 2445 + xlog_grant_add_space(log, need_bytes); 2446 #ifdef DEBUG 2447 tail_lsn = log->l_tail_lsn; 2448 /* ··· 2464 2465 error_return: 2466 if (tic->t_flags & XLOG_TIC_IN_Q) 2467 + xlog_del_ticketq(&log->l_reserve_headq, tic); 2468 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret"); 2469 /* 2470 * If we are failing, make sure the ticket doesn't have any ··· 2533 2534 if (ntic != log->l_write_headq) { 2535 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2536 + xlog_ins_ticketq(&log->l_write_headq, tic); 2537 2538 xlog_trace_loggrant(log, tic, 2539 "xlog_regrant_write_log_space: sleep 1"); ··· 2565 log->l_grant_write_bytes); 2566 if (free_bytes < need_bytes) { 2567 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2568 + xlog_ins_ticketq(&log->l_write_headq, tic); 2569 XFS_STATS_INC(xs_sleep_logspace); 2570 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2571 ··· 2581 s = GRANT_LOCK(log); 2582 goto redo; 2583 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2584 + xlog_del_ticketq(&log->l_write_headq, tic); 2585 2586 + /* we've got enough space */ 2587 + xlog_grant_add_space_write(log, need_bytes); 2588 #ifdef DEBUG 2589 tail_lsn = log->l_tail_lsn; 2590 if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { ··· 2600 2601 error_return: 2602 if (tic->t_flags & XLOG_TIC_IN_Q) 2603 + xlog_del_ticketq(&log->l_reserve_headq, tic); 2604 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret"); 2605 /* 2606 * If we are failing, make sure the ticket doesn't have any ··· 2633 ticket->t_cnt--; 2634 2635 s = GRANT_LOCK(log); 2636 + xlog_grant_sub_space(log, ticket->t_curr_res); 2637 ticket->t_curr_res = ticket->t_unit_res; 2638 XLOG_TIC_RESET_RES(ticket); 2639 xlog_trace_loggrant(log, ticket, ··· 2647 return; 2648 } 2649 2650 + xlog_grant_add_space_reserve(log, ticket->t_unit_res); 2651 xlog_trace_loggrant(log, ticket, 2652 "xlog_regrant_reserve_log_space: exit"); 2653 xlog_verify_grant_head(log, 0); ··· 2683 s = GRANT_LOCK(log); 2684 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter"); 2685 2686 + xlog_grant_sub_space(log, ticket->t_curr_res); 2687 2688 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current"); 2689 ··· 2693 */ 2694 if (ticket->t_cnt > 0) { 2695 ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); 2696 + xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); 2697 } 2698 2699 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit");

+1 -10

fs/xfs/xfs_log.h

··· 96 97 98 /* Region types for iovec's i_type */ 99 - #if defined(XFS_LOG_RES_DEBUG) 100 #define XLOG_REG_TYPE_BFORMAT 1 101 #define XLOG_REG_TYPE_BCHUNK 2 102 #define XLOG_REG_TYPE_EFI_FORMAT 3 ··· 116 #define XLOG_REG_TYPE_COMMIT 18 117 #define XLOG_REG_TYPE_TRANSHDR 19 118 #define XLOG_REG_TYPE_MAX 19 119 - #endif 120 121 - #if defined(XFS_LOG_RES_DEBUG) 122 #define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t)) 123 - #else 124 - #define XLOG_VEC_SET_TYPE(vecp, t) 125 - #endif 126 - 127 128 typedef struct xfs_log_iovec { 129 xfs_caddr_t i_addr; /* beginning address of region */ 130 int i_len; /* length in bytes of region */ 131 - #if defined(XFS_LOG_RES_DEBUG) 132 - uint i_type; /* type of region */ 133 - #endif 134 } xfs_log_iovec_t; 135 136 typedef void* xfs_log_ticket_t;

··· 96 97 98 /* Region types for iovec's i_type */ 99 #define XLOG_REG_TYPE_BFORMAT 1 100 #define XLOG_REG_TYPE_BCHUNK 2 101 #define XLOG_REG_TYPE_EFI_FORMAT 3 ··· 117 #define XLOG_REG_TYPE_COMMIT 18 118 #define XLOG_REG_TYPE_TRANSHDR 19 119 #define XLOG_REG_TYPE_MAX 19 120 121 #define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t)) 122 123 typedef struct xfs_log_iovec { 124 xfs_caddr_t i_addr; /* beginning address of region */ 125 int i_len; /* length in bytes of region */ 126 + uint i_type; /* type of region */ 127 } xfs_log_iovec_t; 128 129 typedef void* xfs_log_ticket_t;

+5 -72

fs/xfs/xfs_log_priv.h

··· 253 254 255 /* Ticket reservation region accounting */ 256 - #if defined(XFS_LOG_RES_DEBUG) 257 #define XLOG_TIC_LEN_MAX 15 258 #define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \ 259 (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0) ··· 277 * we don't care about. 278 */ 279 typedef struct xlog_res { 280 - uint r_len; 281 - uint r_type; 282 } xlog_res_t; 283 - #else 284 - #define XLOG_TIC_RESET_RES(t) 285 - #define XLOG_TIC_ADD_OPHDR(t) 286 - #define XLOG_TIC_ADD_REGION(t, len, type) 287 - #endif 288 - 289 290 typedef struct xlog_ticket { 291 sv_t t_sema; /* sleep on this semaphore : 20 */ ··· 294 char t_flags; /* properties of reservation : 1 */ 295 uint t_trans_type; /* transaction type : 4 */ 296 297 - #if defined (XFS_LOG_RES_DEBUG) 298 /* reservation array fields */ 299 uint t_res_num; /* num in array : 4 */ 300 - xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : X */ 301 uint t_res_num_ophdrs; /* num op hdrs : 4 */ 302 uint t_res_arr_sum; /* array sum : 4 */ 303 uint t_res_o_flow; /* sum overflow : 4 */ 304 - #endif 305 } xlog_ticket_t; 306 307 #endif ··· 485 486 #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) 487 488 - #define XLOG_GRANT_SUB_SPACE(log,bytes,type) \ 489 - { \ 490 - if (type == 'w') { \ 491 - (log)->l_grant_write_bytes -= (bytes); \ 492 - if ((log)->l_grant_write_bytes < 0) { \ 493 - (log)->l_grant_write_bytes += (log)->l_logsize; \ 494 - (log)->l_grant_write_cycle--; \ 495 - } \ 496 - } else { \ 497 - (log)->l_grant_reserve_bytes -= (bytes); \ 498 - if ((log)->l_grant_reserve_bytes < 0) { \ 499 - (log)->l_grant_reserve_bytes += (log)->l_logsize;\ 500 - (log)->l_grant_reserve_cycle--; \ 501 - } \ 502 - } \ 503 - } 504 - #define XLOG_GRANT_ADD_SPACE(log,bytes,type) \ 505 - { \ 506 - if (type == 'w') { \ 507 - (log)->l_grant_write_bytes += (bytes); \ 508 - if ((log)->l_grant_write_bytes > (log)->l_logsize) { \ 509 - (log)->l_grant_write_bytes -= (log)->l_logsize; \ 510 - (log)->l_grant_write_cycle++; \ 511 - } \ 512 - } else { \ 513 - (log)->l_grant_reserve_bytes += (bytes); \ 514 - if ((log)->l_grant_reserve_bytes > (log)->l_logsize) { \ 515 - (log)->l_grant_reserve_bytes -= (log)->l_logsize;\ 516 - (log)->l_grant_reserve_cycle++; \ 517 - } \ 518 - } \ 519 - } 520 - #define XLOG_INS_TICKETQ(q, tic) \ 521 - { \ 522 - if (q) { \ 523 - (tic)->t_next = (q); \ 524 - (tic)->t_prev = (q)->t_prev; \ 525 - (q)->t_prev->t_next = (tic); \ 526 - (q)->t_prev = (tic); \ 527 - } else { \ 528 - (tic)->t_prev = (tic)->t_next = (tic); \ 529 - (q) = (tic); \ 530 - } \ 531 - (tic)->t_flags |= XLOG_TIC_IN_Q; \ 532 - } 533 - #define XLOG_DEL_TICKETQ(q, tic) \ 534 - { \ 535 - if ((tic) == (tic)->t_next) { \ 536 - (q) = NULL; \ 537 - } else { \ 538 - (q) = (tic)->t_next; \ 539 - (tic)->t_next->t_prev = (tic)->t_prev; \ 540 - (tic)->t_prev->t_next = (tic)->t_next; \ 541 - } \ 542 - (tic)->t_next = (tic)->t_prev = NULL; \ 543 - (tic)->t_flags &= ~XLOG_TIC_IN_Q; \ 544 - } 545 546 /* common routines */ 547 extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); 548 extern int xlog_find_tail(xlog_t *log, 549 xfs_daddr_t *head_blk, 550 - xfs_daddr_t *tail_blk, 551 - int readonly); 552 - extern int xlog_recover(xlog_t *log, int readonly); 553 extern int xlog_recover_finish(xlog_t *log, int mfsi_flags); 554 extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 555 extern void xlog_recover_process_iunlinks(xlog_t *log);

··· 253 254 255 /* Ticket reservation region accounting */ 256 #define XLOG_TIC_LEN_MAX 15 257 #define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \ 258 (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0) ··· 278 * we don't care about. 279 */ 280 typedef struct xlog_res { 281 + uint r_len; /* region length :4 */ 282 + uint r_type; /* region's transaction type :4 */ 283 } xlog_res_t; 284 285 typedef struct xlog_ticket { 286 sv_t t_sema; /* sleep on this semaphore : 20 */ ··· 301 char t_flags; /* properties of reservation : 1 */ 302 uint t_trans_type; /* transaction type : 4 */ 303 304 /* reservation array fields */ 305 uint t_res_num; /* num in array : 4 */ 306 uint t_res_num_ophdrs; /* num op hdrs : 4 */ 307 uint t_res_arr_sum; /* array sum : 4 */ 308 uint t_res_o_flow; /* sum overflow : 4 */ 309 + xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : 8 * 15 */ 310 } xlog_ticket_t; 311 312 #endif ··· 494 495 #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) 496 497 498 /* common routines */ 499 extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); 500 extern int xlog_find_tail(xlog_t *log, 501 xfs_daddr_t *head_blk, 502 + xfs_daddr_t *tail_blk); 503 + extern int xlog_recover(xlog_t *log); 504 extern int xlog_recover_finish(xlog_t *log, int mfsi_flags); 505 extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 506 extern void xlog_recover_process_iunlinks(xlog_t *log);

+6 -6

fs/xfs/xfs_log_recover.c

··· 783 xlog_find_tail( 784 xlog_t *log, 785 xfs_daddr_t *head_blk, 786 - xfs_daddr_t *tail_blk, 787 - int readonly) 788 { 789 xlog_rec_header_t *rhead; 790 xlog_op_header_t *op_head; ··· 2562 2563 /* 2564 * The logitem format's flag tells us if this was user quotaoff, 2565 - * group quotaoff or both. 2566 */ 2567 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) 2568 log->l_quotaoffs_flag |= XFS_DQ_USER; 2569 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) 2570 log->l_quotaoffs_flag |= XFS_DQ_GROUP; 2571 ··· 3891 */ 3892 int 3893 xlog_recover( 3894 - xlog_t *log, 3895 - int readonly) 3896 { 3897 xfs_daddr_t head_blk, tail_blk; 3898 int error; 3899 3900 /* find the tail of the log */ 3901 - if ((error = xlog_find_tail(log, &head_blk, &tail_blk, readonly))) 3902 return error; 3903 3904 if (tail_blk != head_blk) {

··· 783 xlog_find_tail( 784 xlog_t *log, 785 xfs_daddr_t *head_blk, 786 + xfs_daddr_t *tail_blk) 787 { 788 xlog_rec_header_t *rhead; 789 xlog_op_header_t *op_head; ··· 2563 2564 /* 2565 * The logitem format's flag tells us if this was user quotaoff, 2566 + * group/project quotaoff or both. 2567 */ 2568 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) 2569 log->l_quotaoffs_flag |= XFS_DQ_USER; 2570 + if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) 2571 + log->l_quotaoffs_flag |= XFS_DQ_PROJ; 2572 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) 2573 log->l_quotaoffs_flag |= XFS_DQ_GROUP; 2574 ··· 3890 */ 3891 int 3892 xlog_recover( 3893 + xlog_t *log) 3894 { 3895 xfs_daddr_t head_blk, tail_blk; 3896 int error; 3897 3898 /* find the tail of the log */ 3899 + if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) 3900 return error; 3901 3902 if (tail_blk != head_blk) {

+2 -3

fs/xfs/xfs_mount.c

··· 51 STATIC void xfs_uuid_unmount(xfs_mount_t *mp); 52 STATIC void xfs_unmountfs_wait(xfs_mount_t *); 53 54 - static struct { 55 short offset; 56 short type; /* 0 = integer 57 * 1 = binary / string (no translation) ··· 1077 1078 xfs_iflush_all(mp); 1079 1080 - XFS_QM_DQPURGEALL(mp, 1081 - XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING); 1082 1083 /* 1084 * Flush out the log synchronously so that we know for sure

··· 51 STATIC void xfs_uuid_unmount(xfs_mount_t *mp); 52 STATIC void xfs_unmountfs_wait(xfs_mount_t *); 53 54 + static const struct { 55 short offset; 56 short type; /* 0 = integer 57 * 1 = binary / string (no translation) ··· 1077 1078 xfs_iflush_all(mp); 1079 1080 + XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1081 1082 /* 1083 * Flush out the log synchronously so that we know for sure

+1 -2

fs/xfs/xfs_mount.h

··· 308 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ 309 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ 310 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ 311 - #define m_dev m_ddev_targp->pbr_dev 312 __uint8_t m_dircook_elog; /* log d-cookie entry bits */ 313 __uint8_t m_blkbit_log; /* blocklog + NBBY */ 314 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ ··· 392 user */ 393 #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment 394 allocations */ 395 - #define XFS_MOUNT_COMPAT_ATTR (1ULL << 8) /* do not use attr2 format */ 396 /* (1ULL << 9) -- currently unused */ 397 #define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */ 398 #define XFS_MOUNT_SHARED (1ULL << 11) /* shared mount */

··· 308 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ 309 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ 310 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ 311 __uint8_t m_dircook_elog; /* log d-cookie entry bits */ 312 __uint8_t m_blkbit_log; /* blocklog + NBBY */ 313 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ ··· 393 user */ 394 #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment 395 allocations */ 396 + #define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */ 397 /* (1ULL << 9) -- currently unused */ 398 #define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */ 399 #define XFS_MOUNT_SHARED (1ULL << 11) /* shared mount */

+2 -5

fs/xfs/xfs_rename.c

··· 243 xfs_inode_t *inodes[4]; 244 int target_ip_dropped = 0; /* dropped target_ip link? */ 245 vnode_t *src_dir_vp; 246 - bhv_desc_t *target_dir_bdp; 247 int spaceres; 248 int target_link_zero = 0; 249 int num_inodes; ··· 259 * Find the XFS behavior descriptor for the target directory 260 * vnode since it was not handed to us. 261 */ 262 - target_dir_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(target_dir_vp), 263 - &xfs_vnodeops); 264 - if (target_dir_bdp == NULL) { 265 return XFS_ERROR(EXDEV); 266 } 267 268 src_dp = XFS_BHVTOI(src_dir_bdp); 269 - target_dp = XFS_BHVTOI(target_dir_bdp); 270 mp = src_dp->i_mount; 271 272 if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) ||

··· 243 xfs_inode_t *inodes[4]; 244 int target_ip_dropped = 0; /* dropped target_ip link? */ 245 vnode_t *src_dir_vp; 246 int spaceres; 247 int target_link_zero = 0; 248 int num_inodes; ··· 260 * Find the XFS behavior descriptor for the target directory 261 * vnode since it was not handed to us. 262 */ 263 + target_dp = xfs_vtoi(target_dir_vp); 264 + if (target_dp == NULL) { 265 return XFS_ERROR(EXDEV); 266 } 267 268 src_dp = XFS_BHVTOI(src_dir_bdp); 269 mp = src_dp->i_mount; 270 271 if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) ||

+4 -5

fs/xfs/xfs_rw.c

··· 238 } 239 return (EIO); 240 } 241 /* 242 * Prints out an ALERT message about I/O error. 243 */ ··· 253 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx" 254 " (\"%s\") error %d buf count %zd", 255 (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname, 256 - XFS_BUFTARG_NAME(bp->pb_target), 257 - (__uint64_t)blkno, 258 - func, 259 - XFS_BUF_GETERROR(bp), 260 - XFS_BUF_COUNT(bp)); 261 } 262 263 /*

··· 238 } 239 return (EIO); 240 } 241 + 242 /* 243 * Prints out an ALERT message about I/O error. 244 */ ··· 252 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx" 253 " (\"%s\") error %d buf count %zd", 254 (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname, 255 + XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), 256 + (__uint64_t)blkno, func, 257 + XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); 258 } 259 260 /*

-17

fs/xfs/xfs_sb.h

··· 68 (XFS_SB_VERSION_NUMBITS | \ 69 XFS_SB_VERSION_OKREALFBITS | \ 70 XFS_SB_VERSION_OKSASHFBITS) 71 - #define XFS_SB_VERSION_MKFS(ia,dia,extflag,dirv2,na,sflag,morebits) \ 72 - (((ia) || (dia) || (extflag) || (dirv2) || (na) || (sflag) || \ 73 - (morebits)) ? \ 74 - (XFS_SB_VERSION_4 | \ 75 - ((ia) ? XFS_SB_VERSION_ALIGNBIT : 0) | \ 76 - ((dia) ? XFS_SB_VERSION_DALIGNBIT : 0) | \ 77 - ((extflag) ? XFS_SB_VERSION_EXTFLGBIT : 0) | \ 78 - ((dirv2) ? XFS_SB_VERSION_DIRV2BIT : 0) | \ 79 - ((na) ? XFS_SB_VERSION_LOGV2BIT : 0) | \ 80 - ((sflag) ? XFS_SB_VERSION_SECTORBIT : 0) | \ 81 - ((morebits) ? XFS_SB_VERSION_MOREBITSBIT : 0)) : \ 82 - XFS_SB_VERSION_1) 83 84 /* 85 * There are two words to hold XFS "feature" bits: the original ··· 92 #define XFS_SB_VERSION2_OKREALBITS \ 93 (XFS_SB_VERSION2_OKREALFBITS | \ 94 XFS_SB_VERSION2_OKSASHFBITS ) 95 - 96 - /* 97 - * mkfs macro to set up sb_features2 word 98 - */ 99 - #define XFS_SB_VERSION2_MKFS(resvd1, sbcntr) 0 100 101 typedef struct xfs_sb 102 {

··· 68 (XFS_SB_VERSION_NUMBITS | \ 69 XFS_SB_VERSION_OKREALFBITS | \ 70 XFS_SB_VERSION_OKSASHFBITS) 71 72 /* 73 * There are two words to hold XFS "feature" bits: the original ··· 104 #define XFS_SB_VERSION2_OKREALBITS \ 105 (XFS_SB_VERSION2_OKREALFBITS | \ 106 XFS_SB_VERSION2_OKSASHFBITS ) 107 108 typedef struct xfs_sb 109 {

+8 -6

fs/xfs/xfs_trans.c

··· 1014 xfs_log_item_t *lip; 1015 int i; 1016 #endif 1017 1018 /* 1019 * See if the caller is being too lazy to figure out if ··· 1027 * filesystem. This happens in paths where we detect 1028 * corruption and decide to give up. 1029 */ 1030 - if ((tp->t_flags & XFS_TRANS_DIRTY) && 1031 - !XFS_FORCED_SHUTDOWN(tp->t_mountp)) 1032 - xfs_force_shutdown(tp->t_mountp, XFS_CORRUPT_INCORE); 1033 #ifdef DEBUG 1034 if (!(flags & XFS_TRANS_ABORT)) { 1035 licp = &(tp->t_items); ··· 1042 } 1043 1044 lip = lidp->lid_item; 1045 - if (!XFS_FORCED_SHUTDOWN(tp->t_mountp)) 1046 ASSERT(!(lip->li_type == XFS_LI_EFD)); 1047 } 1048 licp = licp->lic_next; ··· 1050 } 1051 #endif 1052 xfs_trans_unreserve_and_mod_sb(tp); 1053 - XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp); 1054 1055 if (tp->t_ticket) { 1056 if (flags & XFS_TRANS_RELEASE_LOG_RES) { ··· 1059 } else { 1060 log_flags = 0; 1061 } 1062 - xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags); 1063 } 1064 1065 /* mark this thread as no longer being in a transaction */

··· 1014 xfs_log_item_t *lip; 1015 int i; 1016 #endif 1017 + xfs_mount_t *mp = tp->t_mountp; 1018 1019 /* 1020 * See if the caller is being too lazy to figure out if ··· 1026 * filesystem. This happens in paths where we detect 1027 * corruption and decide to give up. 1028 */ 1029 + if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) { 1030 + XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp); 1031 + xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); 1032 + } 1033 #ifdef DEBUG 1034 if (!(flags & XFS_TRANS_ABORT)) { 1035 licp = &(tp->t_items); ··· 1040 } 1041 1042 lip = lidp->lid_item; 1043 + if (!XFS_FORCED_SHUTDOWN(mp)) 1044 ASSERT(!(lip->li_type == XFS_LI_EFD)); 1045 } 1046 licp = licp->lic_next; ··· 1048 } 1049 #endif 1050 xfs_trans_unreserve_and_mod_sb(tp); 1051 + XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp); 1052 1053 if (tp->t_ticket) { 1054 if (flags & XFS_TRANS_RELEASE_LOG_RES) { ··· 1057 } else { 1058 log_flags = 0; 1059 } 1060 + xfs_log_done(mp, tp->t_ticket, NULL, log_flags); 1061 } 1062 1063 /* mark this thread as no longer being in a transaction */

-1

fs/xfs/xfs_trans.h

··· 973 void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); 974 void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); 975 void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); 976 - void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); 977 void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); 978 void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); 979 void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);

··· 973 void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); 974 void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); 975 void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); 976 void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); 977 void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); 978 void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);

+3 -6

fs/xfs/xfs_utils.c

··· 55 xfs_inode_t **ipp) 56 { 57 vnode_t *vp; 58 - bhv_desc_t *bdp; 59 60 vp = VNAME_TO_VNODE(dentry); 61 - bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops); 62 - if (!bdp) { 63 - *ipp = NULL; 64 return XFS_ERROR(ENOENT); 65 - } 66 VN_HOLD(vp); 67 - *ipp = XFS_BHVTOI(bdp); 68 return 0; 69 } 70

··· 55 xfs_inode_t **ipp) 56 { 57 vnode_t *vp; 58 59 vp = VNAME_TO_VNODE(dentry); 60 + 61 + *ipp = xfs_vtoi(vp); 62 + if (!*ipp) 63 return XFS_ERROR(ENOENT); 64 VN_HOLD(vp); 65 return 0; 66 } 67

+28 -22

fs/xfs/xfs_vfsops.c

··· 53 #include "xfs_acl.h" 54 #include "xfs_attr.h" 55 #include "xfs_clnt.h" 56 57 STATIC int xfs_sync(bhv_desc_t *, int, cred_t *); 58 ··· 291 mp->m_flags |= XFS_MOUNT_IDELETE; 292 if (ap->flags & XFSMNT_DIRSYNC) 293 mp->m_flags |= XFS_MOUNT_DIRSYNC; 294 - if (ap->flags & XFSMNT_COMPAT_ATTR) 295 - mp->m_flags |= XFS_MOUNT_COMPAT_ATTR; 296 297 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE) 298 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; ··· 313 mp->m_flags |= XFS_MOUNT_NOUUID; 314 if (ap->flags & XFSMNT_BARRIER) 315 mp->m_flags |= XFS_MOUNT_BARRIER; 316 317 return 0; 318 } ··· 333 334 /* Fail a mount where the logbuf is smaller then the log stripe */ 335 if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) { 336 - if ((ap->logbufsize == -1) && 337 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { 338 mp->m_logbsize = mp->m_sb.sb_logsunit; 339 - } else if (ap->logbufsize < mp->m_sb.sb_logsunit) { 340 cmn_err(CE_WARN, 341 "XFS: logbuf size must be greater than or equal to log stripe size"); 342 return XFS_ERROR(EINVAL); ··· 349 "XFS: logbuf size for version 1 logs must be 16K or 32K"); 350 return XFS_ERROR(EINVAL); 351 } 352 } 353 354 /* ··· 388 */ 389 if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI)) 390 return XFS_ERROR(EINVAL); 391 - } 392 - 393 - if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) { 394 - mp->m_flags &= ~XFS_MOUNT_COMPAT_ATTR; 395 } 396 397 return 0; ··· 508 if (error) 509 goto error2; 510 511 error = XFS_IOINIT(vfsp, args, flags); 512 if (error) 513 goto error2; 514 515 - if ((args->flags & XFSMNT_BARRIER) && 516 - !(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)) 517 - xfs_mountfs_check_barriers(mp); 518 return 0; 519 520 error2: ··· 658 mp->m_flags |= XFS_MOUNT_NOATIME; 659 else 660 mp->m_flags &= ~XFS_MOUNT_NOATIME; 661 662 if ((vfsp->vfs_flag & VFS_RDONLY) && 663 !(*flags & MS_RDONLY)) { ··· 1643 #define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ 1644 #define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and 1645 * unwritten extent conversion */ 1646 #define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ 1647 #define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ 1648 #define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ ··· 1690 int iosize; 1691 1692 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 1693 - args->flags |= XFSMNT_COMPAT_ATTR; 1694 1695 #if 0 /* XXX: off by default, until some remaining issues ironed out */ 1696 args->flags |= XFSMNT_IDELETE; /* default to on */ ··· 1815 args->flags |= XFSMNT_NOUUID; 1816 } else if (!strcmp(this_char, MNTOPT_BARRIER)) { 1817 args->flags |= XFSMNT_BARRIER; 1818 } else if (!strcmp(this_char, MNTOPT_IKEEP)) { 1819 args->flags &= ~XFSMNT_IDELETE; 1820 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { ··· 1826 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { 1827 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 1828 } else if (!strcmp(this_char, MNTOPT_ATTR2)) { 1829 - args->flags &= ~XFSMNT_COMPAT_ATTR; 1830 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { 1831 - args->flags |= XFSMNT_COMPAT_ATTR; 1832 } else if (!strcmp(this_char, "osyncisdsync")) { 1833 /* no-op, this is now the default */ 1834 printk("XFS: osyncisdsync is now the default, option is deprecated.\n"); ··· 1903 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, 1904 { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, 1905 { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC }, 1906 - { XFS_MOUNT_BARRIER, "," MNTOPT_BARRIER }, 1907 { XFS_MOUNT_IDELETE, "," MNTOPT_NOIKEEP }, 1908 { 0, NULL } 1909 }; ··· 1924 1925 if (mp->m_logbufs > 0) 1926 seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs); 1927 - 1928 if (mp->m_logbsize > 0) 1929 seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); 1930 1931 if (mp->m_logname) 1932 seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); 1933 - 1934 if (mp->m_rtname) 1935 seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); 1936 1937 if (mp->m_dalign > 0) 1938 seq_printf(m, "," MNTOPT_SUNIT "=%d", 1939 (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); 1940 - 1941 if (mp->m_swidth > 0) 1942 seq_printf(m, "," MNTOPT_SWIDTH "=%d", 1943 (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); 1944 1945 - if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) 1946 - seq_printf(m, "," MNTOPT_ATTR2); 1947 - 1948 if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)) 1949 seq_printf(m, "," MNTOPT_LARGEIO); 1950 1951 if (!(vfsp->vfs_flag & VFS_32BITINODES)) 1952 seq_printf(m, "," MNTOPT_64BITINODE); 1953 - 1954 if (vfsp->vfs_flag & VFS_GRPID) 1955 seq_printf(m, "," MNTOPT_GRPID); 1956 ··· 1964 /* Push the superblock and write an unmount record */ 1965 xfs_log_unmount_write(mp); 1966 xfs_unmountfs_writesb(mp); 1967 } 1968 1969

··· 53 #include "xfs_acl.h" 54 #include "xfs_attr.h" 55 #include "xfs_clnt.h" 56 + #include "xfs_fsops.h" 57 58 STATIC int xfs_sync(bhv_desc_t *, int, cred_t *); 59 ··· 290 mp->m_flags |= XFS_MOUNT_IDELETE; 291 if (ap->flags & XFSMNT_DIRSYNC) 292 mp->m_flags |= XFS_MOUNT_DIRSYNC; 293 + if (ap->flags & XFSMNT_ATTR2) 294 + mp->m_flags |= XFS_MOUNT_ATTR2; 295 296 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE) 297 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; ··· 312 mp->m_flags |= XFS_MOUNT_NOUUID; 313 if (ap->flags & XFSMNT_BARRIER) 314 mp->m_flags |= XFS_MOUNT_BARRIER; 315 + else 316 + mp->m_flags &= ~XFS_MOUNT_BARRIER; 317 318 return 0; 319 } ··· 330 331 /* Fail a mount where the logbuf is smaller then the log stripe */ 332 if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) { 333 + if ((ap->logbufsize <= 0) && 334 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { 335 mp->m_logbsize = mp->m_sb.sb_logsunit; 336 + } else if (ap->logbufsize > 0 && 337 + ap->logbufsize < mp->m_sb.sb_logsunit) { 338 cmn_err(CE_WARN, 339 "XFS: logbuf size must be greater than or equal to log stripe size"); 340 return XFS_ERROR(EINVAL); ··· 345 "XFS: logbuf size for version 1 logs must be 16K or 32K"); 346 return XFS_ERROR(EINVAL); 347 } 348 + } 349 + 350 + if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) { 351 + mp->m_flags |= XFS_MOUNT_ATTR2; 352 } 353 354 /* ··· 380 */ 381 if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI)) 382 return XFS_ERROR(EINVAL); 383 } 384 385 return 0; ··· 504 if (error) 505 goto error2; 506 507 + if ((mp->m_flags & XFS_MOUNT_BARRIER) && !(vfsp->vfs_flag & VFS_RDONLY)) 508 + xfs_mountfs_check_barriers(mp); 509 + 510 error = XFS_IOINIT(vfsp, args, flags); 511 if (error) 512 goto error2; 513 514 return 0; 515 516 error2: ··· 654 mp->m_flags |= XFS_MOUNT_NOATIME; 655 else 656 mp->m_flags &= ~XFS_MOUNT_NOATIME; 657 + 658 + if (args->flags & XFSMNT_BARRIER) 659 + mp->m_flags |= XFS_MOUNT_BARRIER; 660 + else 661 + mp->m_flags &= ~XFS_MOUNT_BARRIER; 662 663 if ((vfsp->vfs_flag & VFS_RDONLY) && 664 !(*flags & MS_RDONLY)) { ··· 1634 #define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ 1635 #define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and 1636 * unwritten extent conversion */ 1637 + #define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ 1638 #define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ 1639 #define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ 1640 #define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ ··· 1680 int iosize; 1681 1682 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 1683 1684 #if 0 /* XXX: off by default, until some remaining issues ironed out */ 1685 args->flags |= XFSMNT_IDELETE; /* default to on */ ··· 1806 args->flags |= XFSMNT_NOUUID; 1807 } else if (!strcmp(this_char, MNTOPT_BARRIER)) { 1808 args->flags |= XFSMNT_BARRIER; 1809 + } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { 1810 + args->flags &= ~XFSMNT_BARRIER; 1811 } else if (!strcmp(this_char, MNTOPT_IKEEP)) { 1812 args->flags &= ~XFSMNT_IDELETE; 1813 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { ··· 1815 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { 1816 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 1817 } else if (!strcmp(this_char, MNTOPT_ATTR2)) { 1818 + args->flags |= XFSMNT_ATTR2; 1819 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { 1820 + args->flags &= ~XFSMNT_ATTR2; 1821 } else if (!strcmp(this_char, "osyncisdsync")) { 1822 /* no-op, this is now the default */ 1823 printk("XFS: osyncisdsync is now the default, option is deprecated.\n"); ··· 1892 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, 1893 { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, 1894 { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC }, 1895 { XFS_MOUNT_IDELETE, "," MNTOPT_NOIKEEP }, 1896 { 0, NULL } 1897 }; ··· 1914 1915 if (mp->m_logbufs > 0) 1916 seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs); 1917 if (mp->m_logbsize > 0) 1918 seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); 1919 1920 if (mp->m_logname) 1921 seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); 1922 if (mp->m_rtname) 1923 seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); 1924 1925 if (mp->m_dalign > 0) 1926 seq_printf(m, "," MNTOPT_SUNIT "=%d", 1927 (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); 1928 if (mp->m_swidth > 0) 1929 seq_printf(m, "," MNTOPT_SWIDTH "=%d", 1930 (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); 1931 1932 if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)) 1933 seq_printf(m, "," MNTOPT_LARGEIO); 1934 + if (mp->m_flags & XFS_MOUNT_BARRIER) 1935 + seq_printf(m, "," MNTOPT_BARRIER); 1936 1937 if (!(vfsp->vfs_flag & VFS_32BITINODES)) 1938 seq_printf(m, "," MNTOPT_64BITINODE); 1939 if (vfsp->vfs_flag & VFS_GRPID) 1940 seq_printf(m, "," MNTOPT_GRPID); 1941 ··· 1959 /* Push the superblock and write an unmount record */ 1960 xfs_log_unmount_write(mp); 1961 xfs_unmountfs_writesb(mp); 1962 + xfs_fs_log_dummy(mp); 1963 } 1964 1965

+93 -104

fs/xfs/xfs_vnodeops.c

··· 185 break; 186 } 187 188 - vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec; 189 - vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec; 190 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec; 191 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; 192 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec; ··· 543 } 544 545 /* 546 - * Can't set extent size unless the file is marked, or 547 - * about to be marked as a realtime file. 548 - * 549 - * This check will be removed when fixed size extents 550 - * with buffered data writes is implemented. 551 - * 552 - */ 553 - if ((mask & XFS_AT_EXTSIZE) && 554 - ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != 555 - vap->va_extsize) && 556 - (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) || 557 - ((mask & XFS_AT_XFLAGS) && 558 - (vap->va_xflags & XFS_XFLAG_REALTIME))))) { 559 - code = XFS_ERROR(EINVAL); 560 - goto error_return; 561 - } 562 - 563 - /* 564 * Can't change realtime flag if any extents are allocated. 565 */ 566 if ((ip->i_d.di_nextents || ip->i_delayed_blks) && ··· 804 di_flags |= XFS_DIFLAG_RTINHERIT; 805 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) 806 di_flags |= XFS_DIFLAG_NOSYMLINKS; 807 - } else { 808 if (vap->va_xflags & XFS_XFLAG_REALTIME) { 809 di_flags |= XFS_DIFLAG_REALTIME; 810 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 811 } else { 812 ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; 813 } 814 } 815 ip->i_d.di_flags = di_flags; 816 } ··· 982 if (count <= 0) { 983 error = 0; 984 goto error_return; 985 - } 986 - 987 - if (!(ioflags & IO_INVIS)) { 988 - xfs_ichgtime(ip, XFS_ICHGTIME_ACC); 989 } 990 991 /* ··· 1215 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1216 1217 if (!error && (nimaps != 0) && 1218 - (imap.br_startblock != HOLESTARTBLOCK)) { 1219 /* 1220 * Attach the dquots to the inode up front. 1221 */ ··· 1551 1552 if (ip->i_d.di_nlink != 0) { 1553 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1554 - ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) && 1555 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1556 - (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)))) { 1557 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1558 return (error); 1559 /* Update linux inode block count after free above */ ··· 1612 * only one with a reference to the inode. 1613 */ 1614 truncate = ((ip->i_d.di_nlink == 0) && 1615 - ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0)) && 1616 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 1617 1618 mp = ip->i_mount; ··· 1631 1632 if (ip->i_d.di_nlink != 0) { 1633 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1634 - ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) && 1635 - (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1636 - (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)) || 1637 - (ip->i_delayed_blks != 0))) { 1638 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1639 return (VN_INACTIVE_CACHE); 1640 /* Update linux inode block count after free above */ ··· 2580 int cancel_flags; 2581 int committed; 2582 vnode_t *target_dir_vp; 2583 - bhv_desc_t *src_bdp; 2584 int resblks; 2585 char *target_name = VNAME(dentry); 2586 int target_namelen; ··· 2592 if (VN_ISDIR(src_vp)) 2593 return XFS_ERROR(EPERM); 2594 2595 - src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); 2596 - sip = XFS_BHVTOI(src_bdp); 2597 tdp = XFS_BHVTOI(target_dir_bdp); 2598 mp = tdp->i_mount; 2599 if (XFS_FORCED_SHUTDOWN(mp)) ··· 3225 xfs_trans_t *tp = NULL; 3226 int error = 0; 3227 uint lock_mode; 3228 - xfs_off_t start_offset; 3229 3230 vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__, 3231 (inst_t *)__return_address); ··· 3235 } 3236 3237 lock_mode = xfs_ilock_map_shared(dp); 3238 - start_offset = uiop->uio_offset; 3239 error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp); 3240 - if (start_offset != uiop->uio_offset) { 3241 - xfs_ichgtime(dp, XFS_ICHGTIME_ACC); 3242 - } 3243 xfs_iunlock_map_shared(dp, lock_mode); 3244 return error; 3245 } ··· 3812 vn_iowait(vp); 3813 3814 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 3815 - ASSERT(VN_CACHED(vp) == 0); 3816 3817 /* If we have nothing to flush with this inode then complete the 3818 * teardown now, otherwise break the link between the xfs inode ··· 3987 int alloc_type, 3988 int attr_flags) 3989 { 3990 xfs_filblks_t allocated_fsb; 3991 xfs_filblks_t allocatesize_fsb; 3992 - int committed; 3993 - xfs_off_t count; 3994 - xfs_filblks_t datablocks; 3995 - int error; 3996 - xfs_fsblock_t firstfsb; 3997 - xfs_bmap_free_t free_list; 3998 - xfs_bmbt_irec_t *imapp; 3999 - xfs_bmbt_irec_t imaps[1]; 4000 - xfs_mount_t *mp; 4001 - int numrtextents; 4002 - int reccount; 4003 - uint resblks; 4004 - int rt; 4005 - int rtextsize; 4006 xfs_fileoff_t startoffset_fsb; 4007 xfs_trans_t *tp; 4008 - int xfs_bmapi_flags; 4009 4010 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 4011 - mp = ip->i_mount; 4012 4013 if (XFS_FORCED_SHUTDOWN(mp)) 4014 return XFS_ERROR(EIO); 4015 4016 - /* 4017 - * determine if this is a realtime file 4018 - */ 4019 - if ((rt = XFS_IS_REALTIME_INODE(ip)) != 0) { 4020 - if (ip->i_d.di_extsize) 4021 - rtextsize = ip->i_d.di_extsize; 4022 - else 4023 - rtextsize = mp->m_sb.sb_rextsize; 4024 - } else 4025 - rtextsize = 0; 4026 4027 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4028 return error; ··· 4027 count = len; 4028 error = 0; 4029 imapp = &imaps[0]; 4030 - reccount = 1; 4031 - xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 4032 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 4033 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 4034 ··· 4049 } 4050 4051 /* 4052 - * allocate file space until done or until there is an error 4053 */ 4054 retry: 4055 while (allocatesize_fsb && !error) { 4056 - /* 4057 - * determine if reserving space on 4058 - * the data or realtime partition. 4059 - */ 4060 - if (rt) { 4061 - xfs_fileoff_t s, e; 4062 4063 s = startoffset_fsb; 4064 - do_div(s, rtextsize); 4065 - s *= rtextsize; 4066 - e = roundup_64(startoffset_fsb + allocatesize_fsb, 4067 - rtextsize); 4068 - numrtextents = (int)(e - s) / mp->m_sb.sb_rextsize; 4069 - datablocks = 0; 4070 } else { 4071 - datablocks = allocatesize_fsb; 4072 - numrtextents = 0; 4073 } 4074 4075 /* 4076 - * allocate and setup the transaction 4077 */ 4078 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 4079 - resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks); 4080 - error = xfs_trans_reserve(tp, 4081 - resblks, 4082 - XFS_WRITE_LOG_RES(mp), 4083 - numrtextents, 4084 XFS_TRANS_PERM_LOG_RES, 4085 XFS_WRITE_LOG_COUNT); 4086 - 4087 /* 4088 - * check for running out of space 4089 */ 4090 if (error) { 4091 /* ··· 4104 break; 4105 } 4106 xfs_ilock(ip, XFS_ILOCK_EXCL); 4107 - error = XFS_TRANS_RESERVE_QUOTA(mp, tp, 4108 - ip->i_udquot, ip->i_gdquot, resblks, 0, 0); 4109 if (error) 4110 goto error1; 4111 ··· 4113 xfs_trans_ihold(tp, ip); 4114 4115 /* 4116 - * issue the bmapi() call to allocate the blocks 4117 */ 4118 XFS_BMAP_INIT(&free_list, &firstfsb); 4119 error = xfs_bmapi(tp, ip, startoffset_fsb, 4120 - allocatesize_fsb, xfs_bmapi_flags, 4121 - &firstfsb, 0, imapp, &reccount, 4122 &free_list); 4123 if (error) { 4124 goto error0; 4125 } 4126 4127 /* 4128 - * complete the transaction 4129 */ 4130 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); 4131 if (error) { ··· 4140 4141 allocated_fsb = imapp->br_blockcount; 4142 4143 - if (reccount == 0) { 4144 error = XFS_ERROR(ENOSPC); 4145 break; 4146 } ··· 4163 4164 return error; 4165 4166 - error0: 4167 xfs_bmap_cancel(&free_list); 4168 - error1: 4169 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4170 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4171 goto dmapi_enospc_check; ··· 4412 } 4413 xfs_ilock(ip, XFS_ILOCK_EXCL); 4414 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, 4415 - ip->i_udquot, ip->i_gdquot, resblks, 0, rt ? 4416 - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); 4417 if (error) 4418 goto error1; 4419

··· 185 break; 186 } 187 188 + vn_atime_to_timespec(vp, &vap->va_atime); 189 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec; 190 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; 191 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec; ··· 544 } 545 546 /* 547 * Can't change realtime flag if any extents are allocated. 548 */ 549 if ((ip->i_d.di_nextents || ip->i_delayed_blks) && ··· 823 di_flags |= XFS_DIFLAG_RTINHERIT; 824 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) 825 di_flags |= XFS_DIFLAG_NOSYMLINKS; 826 + if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT) 827 + di_flags |= XFS_DIFLAG_EXTSZINHERIT; 828 + } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 829 if (vap->va_xflags & XFS_XFLAG_REALTIME) { 830 di_flags |= XFS_DIFLAG_REALTIME; 831 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 832 } else { 833 ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; 834 } 835 + if (vap->va_xflags & XFS_XFLAG_EXTSIZE) 836 + di_flags |= XFS_DIFLAG_EXTSIZE; 837 } 838 ip->i_d.di_flags = di_flags; 839 } ··· 997 if (count <= 0) { 998 error = 0; 999 goto error_return; 1000 } 1001 1002 /* ··· 1234 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1235 1236 if (!error && (nimaps != 0) && 1237 + (imap.br_startblock != HOLESTARTBLOCK || 1238 + ip->i_delayed_blks)) { 1239 /* 1240 * Attach the dquots to the inode up front. 1241 */ ··· 1569 1570 if (ip->i_d.di_nlink != 0) { 1571 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1572 + ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || 1573 + ip->i_delayed_blks > 0)) && 1574 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1575 + (!(ip->i_d.di_flags & 1576 + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 1577 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1578 return (error); 1579 /* Update linux inode block count after free above */ ··· 1628 * only one with a reference to the inode. 1629 */ 1630 truncate = ((ip->i_d.di_nlink == 0) && 1631 + ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) || 1632 + (ip->i_delayed_blks > 0)) && 1633 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 1634 1635 mp = ip->i_mount; ··· 1646 1647 if (ip->i_d.di_nlink != 0) { 1648 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1649 + ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || 1650 + ip->i_delayed_blks > 0)) && 1651 + (ip->i_df.if_flags & XFS_IFEXTENTS) && 1652 + (!(ip->i_d.di_flags & 1653 + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 1654 + (ip->i_delayed_blks != 0)))) { 1655 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1656 return (VN_INACTIVE_CACHE); 1657 /* Update linux inode block count after free above */ ··· 2593 int cancel_flags; 2594 int committed; 2595 vnode_t *target_dir_vp; 2596 int resblks; 2597 char *target_name = VNAME(dentry); 2598 int target_namelen; ··· 2606 if (VN_ISDIR(src_vp)) 2607 return XFS_ERROR(EPERM); 2608 2609 + sip = xfs_vtoi(src_vp); 2610 tdp = XFS_BHVTOI(target_dir_bdp); 2611 mp = tdp->i_mount; 2612 if (XFS_FORCED_SHUTDOWN(mp)) ··· 3240 xfs_trans_t *tp = NULL; 3241 int error = 0; 3242 uint lock_mode; 3243 3244 vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__, 3245 (inst_t *)__return_address); ··· 3251 } 3252 3253 lock_mode = xfs_ilock_map_shared(dp); 3254 error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp); 3255 xfs_iunlock_map_shared(dp, lock_mode); 3256 return error; 3257 } ··· 3832 vn_iowait(vp); 3833 3834 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 3835 + 3836 + /* 3837 + * Make sure the atime in the XFS inode is correct before freeing the 3838 + * Linux inode. 3839 + */ 3840 + xfs_synchronize_atime(ip); 3841 3842 /* If we have nothing to flush with this inode then complete the 3843 * teardown now, otherwise break the link between the xfs inode ··· 4002 int alloc_type, 4003 int attr_flags) 4004 { 4005 + xfs_mount_t *mp = ip->i_mount; 4006 + xfs_off_t count; 4007 xfs_filblks_t allocated_fsb; 4008 xfs_filblks_t allocatesize_fsb; 4009 + xfs_extlen_t extsz, temp; 4010 xfs_fileoff_t startoffset_fsb; 4011 + xfs_fsblock_t firstfsb; 4012 + int nimaps; 4013 + int bmapi_flag; 4014 + int quota_flag; 4015 + int rt; 4016 xfs_trans_t *tp; 4017 + xfs_bmbt_irec_t imaps[1], *imapp; 4018 + xfs_bmap_free_t free_list; 4019 + uint qblocks, resblks, resrtextents; 4020 + int committed; 4021 + int error; 4022 4023 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 4024 4025 if (XFS_FORCED_SHUTDOWN(mp)) 4026 return XFS_ERROR(EIO); 4027 4028 + rt = XFS_IS_REALTIME_INODE(ip); 4029 + if (unlikely(rt)) { 4030 + if (!(extsz = ip->i_d.di_extsize)) 4031 + extsz = mp->m_sb.sb_rextsize; 4032 + } else { 4033 + extsz = ip->i_d.di_extsize; 4034 + } 4035 4036 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4037 return error; ··· 4048 count = len; 4049 error = 0; 4050 imapp = &imaps[0]; 4051 + nimaps = 1; 4052 + bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 4053 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 4054 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 4055 ··· 4070 } 4071 4072 /* 4073 + * Allocate file space until done or until there is an error 4074 */ 4075 retry: 4076 while (allocatesize_fsb && !error) { 4077 + xfs_fileoff_t s, e; 4078 4079 + /* 4080 + * Determine space reservations for data/realtime. 4081 + */ 4082 + if (unlikely(extsz)) { 4083 s = startoffset_fsb; 4084 + do_div(s, extsz); 4085 + s *= extsz; 4086 + e = startoffset_fsb + allocatesize_fsb; 4087 + if ((temp = do_mod(startoffset_fsb, extsz))) 4088 + e += temp; 4089 + if ((temp = do_mod(e, extsz))) 4090 + e += extsz - temp; 4091 } else { 4092 + s = 0; 4093 + e = allocatesize_fsb; 4094 + } 4095 + 4096 + if (unlikely(rt)) { 4097 + resrtextents = qblocks = (uint)(e - s); 4098 + resrtextents /= mp->m_sb.sb_rextsize; 4099 + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 4100 + quota_flag = XFS_QMOPT_RES_RTBLKS; 4101 + } else { 4102 + resrtextents = 0; 4103 + resblks = qblocks = \ 4104 + XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); 4105 + quota_flag = XFS_QMOPT_RES_REGBLKS; 4106 } 4107 4108 /* 4109 + * Allocate and setup the transaction. 4110 */ 4111 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 4112 + error = xfs_trans_reserve(tp, resblks, 4113 + XFS_WRITE_LOG_RES(mp), resrtextents, 4114 XFS_TRANS_PERM_LOG_RES, 4115 XFS_WRITE_LOG_COUNT); 4116 /* 4117 + * Check for running out of space 4118 */ 4119 if (error) { 4120 /* ··· 4117 break; 4118 } 4119 xfs_ilock(ip, XFS_ILOCK_EXCL); 4120 + error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, 4121 + qblocks, 0, quota_flag); 4122 if (error) 4123 goto error1; 4124 ··· 4126 xfs_trans_ihold(tp, ip); 4127 4128 /* 4129 + * Issue the xfs_bmapi() call to allocate the blocks 4130 */ 4131 XFS_BMAP_INIT(&free_list, &firstfsb); 4132 error = xfs_bmapi(tp, ip, startoffset_fsb, 4133 + allocatesize_fsb, bmapi_flag, 4134 + &firstfsb, 0, imapp, &nimaps, 4135 &free_list); 4136 if (error) { 4137 goto error0; 4138 } 4139 4140 /* 4141 + * Complete the transaction 4142 */ 4143 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); 4144 if (error) { ··· 4153 4154 allocated_fsb = imapp->br_blockcount; 4155 4156 + if (nimaps == 0) { 4157 error = XFS_ERROR(ENOSPC); 4158 break; 4159 } ··· 4176 4177 return error; 4178 4179 + error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 4180 xfs_bmap_cancel(&free_list); 4181 + XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); 4182 + 4183 + error1: /* Just cancel transaction */ 4184 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4185 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4186 goto dmapi_enospc_check; ··· 4423 } 4424 xfs_ilock(ip, XFS_ILOCK_EXCL); 4425 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, 4426 + ip->i_udquot, ip->i_gdquot, resblks, 0, 4427 + XFS_QMOPT_RES_REGBLKS); 4428 if (error) 4429 goto error1; 4430

+2

mm/swap.c

··· 384 return pagevec_count(pvec); 385 } 386 387 unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, 388 pgoff_t *index, int tag, unsigned nr_pages) 389 {

··· 384 return pagevec_count(pvec); 385 } 386 387 + EXPORT_SYMBOL(pagevec_lookup); 388 + 389 unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, 390 pgoff_t *index, int tag, unsigned nr_pages) 391 {