Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.26-rc7 1586 lines 40 kB view raw
1/* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18#include "xfs.h" 19#include "xfs_bit.h" 20#include "xfs_log.h" 21#include "xfs_inum.h" 22#include "xfs_sb.h" 23#include "xfs_ag.h" 24#include "xfs_dir2.h" 25#include "xfs_trans.h" 26#include "xfs_dmapi.h" 27#include "xfs_mount.h" 28#include "xfs_bmap_btree.h" 29#include "xfs_alloc_btree.h" 30#include "xfs_ialloc_btree.h" 31#include "xfs_dir2_sf.h" 32#include "xfs_attr_sf.h" 33#include "xfs_dinode.h" 34#include "xfs_inode.h" 35#include "xfs_alloc.h" 36#include "xfs_btree.h" 37#include "xfs_error.h" 38#include "xfs_rw.h" 39#include "xfs_iomap.h" 40#include "xfs_vnodeops.h" 41#include <linux/mpage.h> 42#include <linux/pagevec.h> 43#include <linux/writeback.h> 44 45STATIC void 46xfs_count_page_state( 47 struct page *page, 48 int *delalloc, 49 int *unmapped, 50 int *unwritten) 51{ 52 struct buffer_head *bh, *head; 53 54 *delalloc = *unmapped = *unwritten = 0; 55 56 bh = head = page_buffers(page); 57 do { 58 if (buffer_uptodate(bh) && !buffer_mapped(bh)) 59 (*unmapped) = 1; 60 else if (buffer_unwritten(bh)) 61 (*unwritten) = 1; 62 else if (buffer_delay(bh)) 63 (*delalloc) = 1; 64 } while ((bh = bh->b_this_page) != head); 65} 66 67#if defined(XFS_RW_TRACE) 68void 69xfs_page_trace( 70 int tag, 71 struct inode *inode, 72 struct page *page, 73 unsigned long pgoff) 74{ 75 xfs_inode_t *ip; 76 bhv_vnode_t *vp = vn_from_inode(inode); 77 loff_t isize = i_size_read(inode); 78 loff_t offset = page_offset(page); 79 int delalloc = -1, unmapped = -1, unwritten = -1; 80 81 if (page_has_buffers(page)) 82 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 83 84 ip = xfs_vtoi(vp); 85 if (!ip->i_rwtrace) 86 return; 87 88 ktrace_enter(ip->i_rwtrace, 89 (void *)((unsigned long)tag), 90 (void *)ip, 91 (void *)inode, 92 (void *)page, 93 (void *)pgoff, 94 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), 95 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), 96 (void *)((unsigned long)((isize >> 32) & 0xffffffff)), 97 (void *)((unsigned long)(isize & 0xffffffff)), 98 (void *)((unsigned long)((offset >> 32) & 0xffffffff)), 99 (void *)((unsigned long)(offset & 0xffffffff)), 100 (void *)((unsigned long)delalloc), 101 (void *)((unsigned long)unmapped), 102 (void *)((unsigned long)unwritten), 103 (void *)((unsigned long)current_pid()), 104 (void *)NULL); 105} 106#else 107#define xfs_page_trace(tag, inode, page, pgoff) 108#endif 109 110STATIC struct block_device * 111xfs_find_bdev_for_inode( 112 struct xfs_inode *ip) 113{ 114 struct xfs_mount *mp = ip->i_mount; 115 116 if (XFS_IS_REALTIME_INODE(ip)) 117 return mp->m_rtdev_targp->bt_bdev; 118 else 119 return mp->m_ddev_targp->bt_bdev; 120} 121 122/* 123 * Schedule IO completion handling on a xfsdatad if this was 124 * the final hold on this ioend. If we are asked to wait, 125 * flush the workqueue. 126 */ 127STATIC void 128xfs_finish_ioend( 129 xfs_ioend_t *ioend, 130 int wait) 131{ 132 if (atomic_dec_and_test(&ioend->io_remaining)) { 133 queue_work(xfsdatad_workqueue, &ioend->io_work); 134 if (wait) 135 flush_workqueue(xfsdatad_workqueue); 136 } 137} 138 139/* 140 * We're now finished for good with this ioend structure. 141 * Update the page state via the associated buffer_heads, 142 * release holds on the inode and bio, and finally free 143 * up memory. Do not use the ioend after this. 144 */ 145STATIC void 146xfs_destroy_ioend( 147 xfs_ioend_t *ioend) 148{ 149 struct buffer_head *bh, *next; 150 151 for (bh = ioend->io_buffer_head; bh; bh = next) { 152 next = bh->b_private; 153 bh->b_end_io(bh, !ioend->io_error); 154 } 155 if (unlikely(ioend->io_error)) { 156 vn_ioerror(XFS_I(ioend->io_inode), ioend->io_error, 157 __FILE__,__LINE__); 158 } 159 vn_iowake(XFS_I(ioend->io_inode)); 160 mempool_free(ioend, xfs_ioend_pool); 161} 162 163/* 164 * Update on-disk file size now that data has been written to disk. 165 * The current in-memory file size is i_size. If a write is beyond 166 * eof i_new_size will be the intended file size until i_size is 167 * updated. If this write does not extend all the way to the valid 168 * file size then restrict this update to the end of the write. 169 */ 170STATIC void 171xfs_setfilesize( 172 xfs_ioend_t *ioend) 173{ 174 xfs_inode_t *ip = XFS_I(ioend->io_inode); 175 xfs_fsize_t isize; 176 xfs_fsize_t bsize; 177 178 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); 179 ASSERT(ioend->io_type != IOMAP_READ); 180 181 if (unlikely(ioend->io_error)) 182 return; 183 184 bsize = ioend->io_offset + ioend->io_size; 185 186 xfs_ilock(ip, XFS_ILOCK_EXCL); 187 188 isize = MAX(ip->i_size, ip->i_new_size); 189 isize = MIN(isize, bsize); 190 191 if (ip->i_d.di_size < isize) { 192 ip->i_d.di_size = isize; 193 ip->i_update_core = 1; 194 ip->i_update_size = 1; 195 mark_inode_dirty_sync(ioend->io_inode); 196 } 197 198 xfs_iunlock(ip, XFS_ILOCK_EXCL); 199} 200 201/* 202 * Buffered IO write completion for delayed allocate extents. 203 */ 204STATIC void 205xfs_end_bio_delalloc( 206 struct work_struct *work) 207{ 208 xfs_ioend_t *ioend = 209 container_of(work, xfs_ioend_t, io_work); 210 211 xfs_setfilesize(ioend); 212 xfs_destroy_ioend(ioend); 213} 214 215/* 216 * Buffered IO write completion for regular, written extents. 217 */ 218STATIC void 219xfs_end_bio_written( 220 struct work_struct *work) 221{ 222 xfs_ioend_t *ioend = 223 container_of(work, xfs_ioend_t, io_work); 224 225 xfs_setfilesize(ioend); 226 xfs_destroy_ioend(ioend); 227} 228 229/* 230 * IO write completion for unwritten extents. 231 * 232 * Issue transactions to convert a buffer range from unwritten 233 * to written extents. 234 */ 235STATIC void 236xfs_end_bio_unwritten( 237 struct work_struct *work) 238{ 239 xfs_ioend_t *ioend = 240 container_of(work, xfs_ioend_t, io_work); 241 struct xfs_inode *ip = XFS_I(ioend->io_inode); 242 xfs_off_t offset = ioend->io_offset; 243 size_t size = ioend->io_size; 244 245 if (likely(!ioend->io_error)) { 246 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 247 int error; 248 error = xfs_iomap_write_unwritten(ip, offset, size); 249 if (error) 250 ioend->io_error = error; 251 } 252 xfs_setfilesize(ioend); 253 } 254 xfs_destroy_ioend(ioend); 255} 256 257/* 258 * IO read completion for regular, written extents. 259 */ 260STATIC void 261xfs_end_bio_read( 262 struct work_struct *work) 263{ 264 xfs_ioend_t *ioend = 265 container_of(work, xfs_ioend_t, io_work); 266 267 xfs_destroy_ioend(ioend); 268} 269 270/* 271 * Allocate and initialise an IO completion structure. 272 * We need to track unwritten extent write completion here initially. 273 * We'll need to extend this for updating the ondisk inode size later 274 * (vs. incore size). 275 */ 276STATIC xfs_ioend_t * 277xfs_alloc_ioend( 278 struct inode *inode, 279 unsigned int type) 280{ 281 xfs_ioend_t *ioend; 282 283 ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); 284 285 /* 286 * Set the count to 1 initially, which will prevent an I/O 287 * completion callback from happening before we have started 288 * all the I/O from calling the completion routine too early. 289 */ 290 atomic_set(&ioend->io_remaining, 1); 291 ioend->io_error = 0; 292 ioend->io_list = NULL; 293 ioend->io_type = type; 294 ioend->io_inode = inode; 295 ioend->io_buffer_head = NULL; 296 ioend->io_buffer_tail = NULL; 297 atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); 298 ioend->io_offset = 0; 299 ioend->io_size = 0; 300 301 if (type == IOMAP_UNWRITTEN) 302 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten); 303 else if (type == IOMAP_DELAY) 304 INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc); 305 else if (type == IOMAP_READ) 306 INIT_WORK(&ioend->io_work, xfs_end_bio_read); 307 else 308 INIT_WORK(&ioend->io_work, xfs_end_bio_written); 309 310 return ioend; 311} 312 313STATIC int 314xfs_map_blocks( 315 struct inode *inode, 316 loff_t offset, 317 ssize_t count, 318 xfs_iomap_t *mapp, 319 int flags) 320{ 321 xfs_inode_t *ip = XFS_I(inode); 322 int error, nmaps = 1; 323 324 error = xfs_iomap(ip, offset, count, 325 flags, mapp, &nmaps); 326 if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE))) 327 xfs_iflags_set(ip, XFS_IMODIFIED); 328 return -error; 329} 330 331STATIC_INLINE int 332xfs_iomap_valid( 333 xfs_iomap_t *iomapp, 334 loff_t offset) 335{ 336 return offset >= iomapp->iomap_offset && 337 offset < iomapp->iomap_offset + iomapp->iomap_bsize; 338} 339 340/* 341 * BIO completion handler for buffered IO. 342 */ 343STATIC void 344xfs_end_bio( 345 struct bio *bio, 346 int error) 347{ 348 xfs_ioend_t *ioend = bio->bi_private; 349 350 ASSERT(atomic_read(&bio->bi_cnt) >= 1); 351 ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; 352 353 /* Toss bio and pass work off to an xfsdatad thread */ 354 bio->bi_private = NULL; 355 bio->bi_end_io = NULL; 356 bio_put(bio); 357 358 xfs_finish_ioend(ioend, 0); 359} 360 361STATIC void 362xfs_submit_ioend_bio( 363 xfs_ioend_t *ioend, 364 struct bio *bio) 365{ 366 atomic_inc(&ioend->io_remaining); 367 368 bio->bi_private = ioend; 369 bio->bi_end_io = xfs_end_bio; 370 371 submit_bio(WRITE, bio); 372 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); 373 bio_put(bio); 374} 375 376STATIC struct bio * 377xfs_alloc_ioend_bio( 378 struct buffer_head *bh) 379{ 380 struct bio *bio; 381 int nvecs = bio_get_nr_vecs(bh->b_bdev); 382 383 do { 384 bio = bio_alloc(GFP_NOIO, nvecs); 385 nvecs >>= 1; 386 } while (!bio); 387 388 ASSERT(bio->bi_private == NULL); 389 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 390 bio->bi_bdev = bh->b_bdev; 391 bio_get(bio); 392 return bio; 393} 394 395STATIC void 396xfs_start_buffer_writeback( 397 struct buffer_head *bh) 398{ 399 ASSERT(buffer_mapped(bh)); 400 ASSERT(buffer_locked(bh)); 401 ASSERT(!buffer_delay(bh)); 402 ASSERT(!buffer_unwritten(bh)); 403 404 mark_buffer_async_write(bh); 405 set_buffer_uptodate(bh); 406 clear_buffer_dirty(bh); 407} 408 409STATIC void 410xfs_start_page_writeback( 411 struct page *page, 412 struct writeback_control *wbc, 413 int clear_dirty, 414 int buffers) 415{ 416 ASSERT(PageLocked(page)); 417 ASSERT(!PageWriteback(page)); 418 if (clear_dirty) 419 clear_page_dirty_for_io(page); 420 set_page_writeback(page); 421 unlock_page(page); 422 /* If no buffers on the page are to be written, finish it here */ 423 if (!buffers) 424 end_page_writeback(page); 425} 426 427static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) 428{ 429 return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 430} 431 432/* 433 * Submit all of the bios for all of the ioends we have saved up, covering the 434 * initial writepage page and also any probed pages. 435 * 436 * Because we may have multiple ioends spanning a page, we need to start 437 * writeback on all the buffers before we submit them for I/O. If we mark the 438 * buffers as we got, then we can end up with a page that only has buffers 439 * marked async write and I/O complete on can occur before we mark the other 440 * buffers async write. 441 * 442 * The end result of this is that we trip a bug in end_page_writeback() because 443 * we call it twice for the one page as the code in end_buffer_async_write() 444 * assumes that all buffers on the page are started at the same time. 445 * 446 * The fix is two passes across the ioend list - one to start writeback on the 447 * buffer_heads, and then submit them for I/O on the second pass. 448 */ 449STATIC void 450xfs_submit_ioend( 451 xfs_ioend_t *ioend) 452{ 453 xfs_ioend_t *head = ioend; 454 xfs_ioend_t *next; 455 struct buffer_head *bh; 456 struct bio *bio; 457 sector_t lastblock = 0; 458 459 /* Pass 1 - start writeback */ 460 do { 461 next = ioend->io_list; 462 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 463 xfs_start_buffer_writeback(bh); 464 } 465 } while ((ioend = next) != NULL); 466 467 /* Pass 2 - submit I/O */ 468 ioend = head; 469 do { 470 next = ioend->io_list; 471 bio = NULL; 472 473 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 474 475 if (!bio) { 476 retry: 477 bio = xfs_alloc_ioend_bio(bh); 478 } else if (bh->b_blocknr != lastblock + 1) { 479 xfs_submit_ioend_bio(ioend, bio); 480 goto retry; 481 } 482 483 if (bio_add_buffer(bio, bh) != bh->b_size) { 484 xfs_submit_ioend_bio(ioend, bio); 485 goto retry; 486 } 487 488 lastblock = bh->b_blocknr; 489 } 490 if (bio) 491 xfs_submit_ioend_bio(ioend, bio); 492 xfs_finish_ioend(ioend, 0); 493 } while ((ioend = next) != NULL); 494} 495 496/* 497 * Cancel submission of all buffer_heads so far in this endio. 498 * Toss the endio too. Only ever called for the initial page 499 * in a writepage request, so only ever one page. 500 */ 501STATIC void 502xfs_cancel_ioend( 503 xfs_ioend_t *ioend) 504{ 505 xfs_ioend_t *next; 506 struct buffer_head *bh, *next_bh; 507 508 do { 509 next = ioend->io_list; 510 bh = ioend->io_buffer_head; 511 do { 512 next_bh = bh->b_private; 513 clear_buffer_async_write(bh); 514 unlock_buffer(bh); 515 } while ((bh = next_bh) != NULL); 516 517 vn_iowake(XFS_I(ioend->io_inode)); 518 mempool_free(ioend, xfs_ioend_pool); 519 } while ((ioend = next) != NULL); 520} 521 522/* 523 * Test to see if we've been building up a completion structure for 524 * earlier buffers -- if so, we try to append to this ioend if we 525 * can, otherwise we finish off any current ioend and start another. 526 * Return true if we've finished the given ioend. 527 */ 528STATIC void 529xfs_add_to_ioend( 530 struct inode *inode, 531 struct buffer_head *bh, 532 xfs_off_t offset, 533 unsigned int type, 534 xfs_ioend_t **result, 535 int need_ioend) 536{ 537 xfs_ioend_t *ioend = *result; 538 539 if (!ioend || need_ioend || type != ioend->io_type) { 540 xfs_ioend_t *previous = *result; 541 542 ioend = xfs_alloc_ioend(inode, type); 543 ioend->io_offset = offset; 544 ioend->io_buffer_head = bh; 545 ioend->io_buffer_tail = bh; 546 if (previous) 547 previous->io_list = ioend; 548 *result = ioend; 549 } else { 550 ioend->io_buffer_tail->b_private = bh; 551 ioend->io_buffer_tail = bh; 552 } 553 554 bh->b_private = NULL; 555 ioend->io_size += bh->b_size; 556} 557 558STATIC void 559xfs_map_buffer( 560 struct buffer_head *bh, 561 xfs_iomap_t *mp, 562 xfs_off_t offset, 563 uint block_bits) 564{ 565 sector_t bn; 566 567 ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL); 568 569 bn = (mp->iomap_bn >> (block_bits - BBSHIFT)) + 570 ((offset - mp->iomap_offset) >> block_bits); 571 572 ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME)); 573 574 bh->b_blocknr = bn; 575 set_buffer_mapped(bh); 576} 577 578STATIC void 579xfs_map_at_offset( 580 struct buffer_head *bh, 581 loff_t offset, 582 int block_bits, 583 xfs_iomap_t *iomapp) 584{ 585 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); 586 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); 587 588 lock_buffer(bh); 589 xfs_map_buffer(bh, iomapp, offset, block_bits); 590 bh->b_bdev = iomapp->iomap_target->bt_bdev; 591 set_buffer_mapped(bh); 592 clear_buffer_delay(bh); 593 clear_buffer_unwritten(bh); 594} 595 596/* 597 * Look for a page at index that is suitable for clustering. 598 */ 599STATIC unsigned int 600xfs_probe_page( 601 struct page *page, 602 unsigned int pg_offset, 603 int mapped) 604{ 605 int ret = 0; 606 607 if (PageWriteback(page)) 608 return 0; 609 610 if (page->mapping && PageDirty(page)) { 611 if (page_has_buffers(page)) { 612 struct buffer_head *bh, *head; 613 614 bh = head = page_buffers(page); 615 do { 616 if (!buffer_uptodate(bh)) 617 break; 618 if (mapped != buffer_mapped(bh)) 619 break; 620 ret += bh->b_size; 621 if (ret >= pg_offset) 622 break; 623 } while ((bh = bh->b_this_page) != head); 624 } else 625 ret = mapped ? 0 : PAGE_CACHE_SIZE; 626 } 627 628 return ret; 629} 630 631STATIC size_t 632xfs_probe_cluster( 633 struct inode *inode, 634 struct page *startpage, 635 struct buffer_head *bh, 636 struct buffer_head *head, 637 int mapped) 638{ 639 struct pagevec pvec; 640 pgoff_t tindex, tlast, tloff; 641 size_t total = 0; 642 int done = 0, i; 643 644 /* First sum forwards in this page */ 645 do { 646 if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh))) 647 return total; 648 total += bh->b_size; 649 } while ((bh = bh->b_this_page) != head); 650 651 /* if we reached the end of the page, sum forwards in following pages */ 652 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; 653 tindex = startpage->index + 1; 654 655 /* Prune this back to avoid pathological behavior */ 656 tloff = min(tlast, startpage->index + 64); 657 658 pagevec_init(&pvec, 0); 659 while (!done && tindex <= tloff) { 660 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); 661 662 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) 663 break; 664 665 for (i = 0; i < pagevec_count(&pvec); i++) { 666 struct page *page = pvec.pages[i]; 667 size_t pg_offset, pg_len = 0; 668 669 if (tindex == tlast) { 670 pg_offset = 671 i_size_read(inode) & (PAGE_CACHE_SIZE - 1); 672 if (!pg_offset) { 673 done = 1; 674 break; 675 } 676 } else 677 pg_offset = PAGE_CACHE_SIZE; 678 679 if (page->index == tindex && !TestSetPageLocked(page)) { 680 pg_len = xfs_probe_page(page, pg_offset, mapped); 681 unlock_page(page); 682 } 683 684 if (!pg_len) { 685 done = 1; 686 break; 687 } 688 689 total += pg_len; 690 tindex++; 691 } 692 693 pagevec_release(&pvec); 694 cond_resched(); 695 } 696 697 return total; 698} 699 700/* 701 * Test if a given page is suitable for writing as part of an unwritten 702 * or delayed allocate extent. 703 */ 704STATIC int 705xfs_is_delayed_page( 706 struct page *page, 707 unsigned int type) 708{ 709 if (PageWriteback(page)) 710 return 0; 711 712 if (page->mapping && page_has_buffers(page)) { 713 struct buffer_head *bh, *head; 714 int acceptable = 0; 715 716 bh = head = page_buffers(page); 717 do { 718 if (buffer_unwritten(bh)) 719 acceptable = (type == IOMAP_UNWRITTEN); 720 else if (buffer_delay(bh)) 721 acceptable = (type == IOMAP_DELAY); 722 else if (buffer_dirty(bh) && buffer_mapped(bh)) 723 acceptable = (type == IOMAP_NEW); 724 else 725 break; 726 } while ((bh = bh->b_this_page) != head); 727 728 if (acceptable) 729 return 1; 730 } 731 732 return 0; 733} 734 735/* 736 * Allocate & map buffers for page given the extent map. Write it out. 737 * except for the original page of a writepage, this is called on 738 * delalloc/unwritten pages only, for the original page it is possible 739 * that the page has no mapping at all. 740 */ 741STATIC int 742xfs_convert_page( 743 struct inode *inode, 744 struct page *page, 745 loff_t tindex, 746 xfs_iomap_t *mp, 747 xfs_ioend_t **ioendp, 748 struct writeback_control *wbc, 749 int startio, 750 int all_bh) 751{ 752 struct buffer_head *bh, *head; 753 xfs_off_t end_offset; 754 unsigned long p_offset; 755 unsigned int type; 756 int bbits = inode->i_blkbits; 757 int len, page_dirty; 758 int count = 0, done = 0, uptodate = 1; 759 xfs_off_t offset = page_offset(page); 760 761 if (page->index != tindex) 762 goto fail; 763 if (TestSetPageLocked(page)) 764 goto fail; 765 if (PageWriteback(page)) 766 goto fail_unlock_page; 767 if (page->mapping != inode->i_mapping) 768 goto fail_unlock_page; 769 if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) 770 goto fail_unlock_page; 771 772 /* 773 * page_dirty is initially a count of buffers on the page before 774 * EOF and is decremented as we move each into a cleanable state. 775 * 776 * Derivation: 777 * 778 * End offset is the highest offset that this page should represent. 779 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) 780 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and 781 * hence give us the correct page_dirty count. On any other page, 782 * it will be zero and in that case we need page_dirty to be the 783 * count of buffers on the page. 784 */ 785 end_offset = min_t(unsigned long long, 786 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, 787 i_size_read(inode)); 788 789 len = 1 << inode->i_blkbits; 790 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), 791 PAGE_CACHE_SIZE); 792 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; 793 page_dirty = p_offset / len; 794 795 bh = head = page_buffers(page); 796 do { 797 if (offset >= end_offset) 798 break; 799 if (!buffer_uptodate(bh)) 800 uptodate = 0; 801 if (!(PageUptodate(page) || buffer_uptodate(bh))) { 802 done = 1; 803 continue; 804 } 805 806 if (buffer_unwritten(bh) || buffer_delay(bh)) { 807 if (buffer_unwritten(bh)) 808 type = IOMAP_UNWRITTEN; 809 else 810 type = IOMAP_DELAY; 811 812 if (!xfs_iomap_valid(mp, offset)) { 813 done = 1; 814 continue; 815 } 816 817 ASSERT(!(mp->iomap_flags & IOMAP_HOLE)); 818 ASSERT(!(mp->iomap_flags & IOMAP_DELAY)); 819 820 xfs_map_at_offset(bh, offset, bbits, mp); 821 if (startio) { 822 xfs_add_to_ioend(inode, bh, offset, 823 type, ioendp, done); 824 } else { 825 set_buffer_dirty(bh); 826 unlock_buffer(bh); 827 mark_buffer_dirty(bh); 828 } 829 page_dirty--; 830 count++; 831 } else { 832 type = IOMAP_NEW; 833 if (buffer_mapped(bh) && all_bh && startio) { 834 lock_buffer(bh); 835 xfs_add_to_ioend(inode, bh, offset, 836 type, ioendp, done); 837 count++; 838 page_dirty--; 839 } else { 840 done = 1; 841 } 842 } 843 } while (offset += len, (bh = bh->b_this_page) != head); 844 845 if (uptodate && bh == head) 846 SetPageUptodate(page); 847 848 if (startio) { 849 if (count) { 850 struct backing_dev_info *bdi; 851 852 bdi = inode->i_mapping->backing_dev_info; 853 wbc->nr_to_write--; 854 if (bdi_write_congested(bdi)) { 855 wbc->encountered_congestion = 1; 856 done = 1; 857 } else if (wbc->nr_to_write <= 0) { 858 done = 1; 859 } 860 } 861 xfs_start_page_writeback(page, wbc, !page_dirty, count); 862 } 863 864 return done; 865 fail_unlock_page: 866 unlock_page(page); 867 fail: 868 return 1; 869} 870 871/* 872 * Convert & write out a cluster of pages in the same extent as defined 873 * by mp and following the start page. 874 */ 875STATIC void 876xfs_cluster_write( 877 struct inode *inode, 878 pgoff_t tindex, 879 xfs_iomap_t *iomapp, 880 xfs_ioend_t **ioendp, 881 struct writeback_control *wbc, 882 int startio, 883 int all_bh, 884 pgoff_t tlast) 885{ 886 struct pagevec pvec; 887 int done = 0, i; 888 889 pagevec_init(&pvec, 0); 890 while (!done && tindex <= tlast) { 891 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); 892 893 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) 894 break; 895 896 for (i = 0; i < pagevec_count(&pvec); i++) { 897 done = xfs_convert_page(inode, pvec.pages[i], tindex++, 898 iomapp, ioendp, wbc, startio, all_bh); 899 if (done) 900 break; 901 } 902 903 pagevec_release(&pvec); 904 cond_resched(); 905 } 906} 907 908/* 909 * Calling this without startio set means we are being asked to make a dirty 910 * page ready for freeing it's buffers. When called with startio set then 911 * we are coming from writepage. 912 * 913 * When called with startio set it is important that we write the WHOLE 914 * page if possible. 915 * The bh->b_state's cannot know if any of the blocks or which block for 916 * that matter are dirty due to mmap writes, and therefore bh uptodate is 917 * only valid if the page itself isn't completely uptodate. Some layers 918 * may clear the page dirty flag prior to calling write page, under the 919 * assumption the entire page will be written out; by not writing out the 920 * whole page the page can be reused before all valid dirty data is 921 * written out. Note: in the case of a page that has been dirty'd by 922 * mapwrite and but partially setup by block_prepare_write the 923 * bh->b_states's will not agree and only ones setup by BPW/BCW will have 924 * valid state, thus the whole page must be written out thing. 925 */ 926 927STATIC int 928xfs_page_state_convert( 929 struct inode *inode, 930 struct page *page, 931 struct writeback_control *wbc, 932 int startio, 933 int unmapped) /* also implies page uptodate */ 934{ 935 struct buffer_head *bh, *head; 936 xfs_iomap_t iomap; 937 xfs_ioend_t *ioend = NULL, *iohead = NULL; 938 loff_t offset; 939 unsigned long p_offset = 0; 940 unsigned int type; 941 __uint64_t end_offset; 942 pgoff_t end_index, last_index, tlast; 943 ssize_t size, len; 944 int flags, err, iomap_valid = 0, uptodate = 1; 945 int page_dirty, count = 0; 946 int trylock = 0; 947 int all_bh = unmapped; 948 949 if (startio) { 950 if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) 951 trylock |= BMAPI_TRYLOCK; 952 } 953 954 /* Is this page beyond the end of the file? */ 955 offset = i_size_read(inode); 956 end_index = offset >> PAGE_CACHE_SHIFT; 957 last_index = (offset - 1) >> PAGE_CACHE_SHIFT; 958 if (page->index >= end_index) { 959 if ((page->index >= end_index + 1) || 960 !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { 961 if (startio) 962 unlock_page(page); 963 return 0; 964 } 965 } 966 967 /* 968 * page_dirty is initially a count of buffers on the page before 969 * EOF and is decremented as we move each into a cleanable state. 970 * 971 * Derivation: 972 * 973 * End offset is the highest offset that this page should represent. 974 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) 975 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and 976 * hence give us the correct page_dirty count. On any other page, 977 * it will be zero and in that case we need page_dirty to be the 978 * count of buffers on the page. 979 */ 980 end_offset = min_t(unsigned long long, 981 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); 982 len = 1 << inode->i_blkbits; 983 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), 984 PAGE_CACHE_SIZE); 985 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; 986 page_dirty = p_offset / len; 987 988 bh = head = page_buffers(page); 989 offset = page_offset(page); 990 flags = BMAPI_READ; 991 type = IOMAP_NEW; 992 993 /* TODO: cleanup count and page_dirty */ 994 995 do { 996 if (offset >= end_offset) 997 break; 998 if (!buffer_uptodate(bh)) 999 uptodate = 0; 1000 if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { 1001 /* 1002 * the iomap is actually still valid, but the ioend 1003 * isn't. shouldn't happen too often. 1004 */ 1005 iomap_valid = 0; 1006 continue; 1007 } 1008 1009 if (iomap_valid) 1010 iomap_valid = xfs_iomap_valid(&iomap, offset); 1011 1012 /* 1013 * First case, map an unwritten extent and prepare for 1014 * extent state conversion transaction on completion. 1015 * 1016 * Second case, allocate space for a delalloc buffer. 1017 * We can return EAGAIN here in the release page case. 1018 * 1019 * Third case, an unmapped buffer was found, and we are 1020 * in a path where we need to write the whole page out. 1021 */ 1022 if (buffer_unwritten(bh) || buffer_delay(bh) || 1023 ((buffer_uptodate(bh) || PageUptodate(page)) && 1024 !buffer_mapped(bh) && (unmapped || startio))) { 1025 int new_ioend = 0; 1026 1027 /* 1028 * Make sure we don't use a read-only iomap 1029 */ 1030 if (flags == BMAPI_READ) 1031 iomap_valid = 0; 1032 1033 if (buffer_unwritten(bh)) { 1034 type = IOMAP_UNWRITTEN; 1035 flags = BMAPI_WRITE | BMAPI_IGNSTATE; 1036 } else if (buffer_delay(bh)) { 1037 type = IOMAP_DELAY; 1038 flags = BMAPI_ALLOCATE | trylock; 1039 } else { 1040 type = IOMAP_NEW; 1041 flags = BMAPI_WRITE | BMAPI_MMAP; 1042 } 1043 1044 if (!iomap_valid) { 1045 /* 1046 * if we didn't have a valid mapping then we 1047 * need to ensure that we put the new mapping 1048 * in a new ioend structure. This needs to be 1049 * done to ensure that the ioends correctly 1050 * reflect the block mappings at io completion 1051 * for unwritten extent conversion. 1052 */ 1053 new_ioend = 1; 1054 if (type == IOMAP_NEW) { 1055 size = xfs_probe_cluster(inode, 1056 page, bh, head, 0); 1057 } else { 1058 size = len; 1059 } 1060 1061 err = xfs_map_blocks(inode, offset, size, 1062 &iomap, flags); 1063 if (err) 1064 goto error; 1065 iomap_valid = xfs_iomap_valid(&iomap, offset); 1066 } 1067 if (iomap_valid) { 1068 xfs_map_at_offset(bh, offset, 1069 inode->i_blkbits, &iomap); 1070 if (startio) { 1071 xfs_add_to_ioend(inode, bh, offset, 1072 type, &ioend, 1073 new_ioend); 1074 } else { 1075 set_buffer_dirty(bh); 1076 unlock_buffer(bh); 1077 mark_buffer_dirty(bh); 1078 } 1079 page_dirty--; 1080 count++; 1081 } 1082 } else if (buffer_uptodate(bh) && startio) { 1083 /* 1084 * we got here because the buffer is already mapped. 1085 * That means it must already have extents allocated 1086 * underneath it. Map the extent by reading it. 1087 */ 1088 if (!iomap_valid || flags != BMAPI_READ) { 1089 flags = BMAPI_READ; 1090 size = xfs_probe_cluster(inode, page, bh, 1091 head, 1); 1092 err = xfs_map_blocks(inode, offset, size, 1093 &iomap, flags); 1094 if (err) 1095 goto error; 1096 iomap_valid = xfs_iomap_valid(&iomap, offset); 1097 } 1098 1099 /* 1100 * We set the type to IOMAP_NEW in case we are doing a 1101 * small write at EOF that is extending the file but 1102 * without needing an allocation. We need to update the 1103 * file size on I/O completion in this case so it is 1104 * the same case as having just allocated a new extent 1105 * that we are writing into for the first time. 1106 */ 1107 type = IOMAP_NEW; 1108 if (!test_and_set_bit(BH_Lock, &bh->b_state)) { 1109 ASSERT(buffer_mapped(bh)); 1110 if (iomap_valid) 1111 all_bh = 1; 1112 xfs_add_to_ioend(inode, bh, offset, type, 1113 &ioend, !iomap_valid); 1114 page_dirty--; 1115 count++; 1116 } else { 1117 iomap_valid = 0; 1118 } 1119 } else if ((buffer_uptodate(bh) || PageUptodate(page)) && 1120 (unmapped || startio)) { 1121 iomap_valid = 0; 1122 } 1123 1124 if (!iohead) 1125 iohead = ioend; 1126 1127 } while (offset += len, ((bh = bh->b_this_page) != head)); 1128 1129 if (uptodate && bh == head) 1130 SetPageUptodate(page); 1131 1132 if (startio) 1133 xfs_start_page_writeback(page, wbc, 1, count); 1134 1135 if (ioend && iomap_valid) { 1136 offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> 1137 PAGE_CACHE_SHIFT; 1138 tlast = min_t(pgoff_t, offset, last_index); 1139 xfs_cluster_write(inode, page->index + 1, &iomap, &ioend, 1140 wbc, startio, all_bh, tlast); 1141 } 1142 1143 if (iohead) 1144 xfs_submit_ioend(iohead); 1145 1146 return page_dirty; 1147 1148error: 1149 if (iohead) 1150 xfs_cancel_ioend(iohead); 1151 1152 /* 1153 * If it's delalloc and we have nowhere to put it, 1154 * throw it away, unless the lower layers told 1155 * us to try again. 1156 */ 1157 if (err != -EAGAIN) { 1158 if (!unmapped) 1159 block_invalidatepage(page, 0); 1160 ClearPageUptodate(page); 1161 } 1162 return err; 1163} 1164 1165/* 1166 * writepage: Called from one of two places: 1167 * 1168 * 1. we are flushing a delalloc buffer head. 1169 * 1170 * 2. we are writing out a dirty page. Typically the page dirty 1171 * state is cleared before we get here. In this case is it 1172 * conceivable we have no buffer heads. 1173 * 1174 * For delalloc space on the page we need to allocate space and 1175 * flush it. For unmapped buffer heads on the page we should 1176 * allocate space if the page is uptodate. For any other dirty 1177 * buffer heads on the page we should flush them. 1178 * 1179 * If we detect that a transaction would be required to flush 1180 * the page, we have to check the process flags first, if we 1181 * are already in a transaction or disk I/O during allocations 1182 * is off, we need to fail the writepage and redirty the page. 1183 */ 1184 1185STATIC int 1186xfs_vm_writepage( 1187 struct page *page, 1188 struct writeback_control *wbc) 1189{ 1190 int error; 1191 int need_trans; 1192 int delalloc, unmapped, unwritten; 1193 struct inode *inode = page->mapping->host; 1194 1195 xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0); 1196 1197 /* 1198 * We need a transaction if: 1199 * 1. There are delalloc buffers on the page 1200 * 2. The page is uptodate and we have unmapped buffers 1201 * 3. The page is uptodate and we have no buffers 1202 * 4. There are unwritten buffers on the page 1203 */ 1204 1205 if (!page_has_buffers(page)) { 1206 unmapped = 1; 1207 need_trans = 1; 1208 } else { 1209 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 1210 if (!PageUptodate(page)) 1211 unmapped = 0; 1212 need_trans = delalloc + unmapped + unwritten; 1213 } 1214 1215 /* 1216 * If we need a transaction and the process flags say 1217 * we are already in a transaction, or no IO is allowed 1218 * then mark the page dirty again and leave the page 1219 * as is. 1220 */ 1221 if (current_test_flags(PF_FSTRANS) && need_trans) 1222 goto out_fail; 1223 1224 /* 1225 * Delay hooking up buffer heads until we have 1226 * made our go/no-go decision. 1227 */ 1228 if (!page_has_buffers(page)) 1229 create_empty_buffers(page, 1 << inode->i_blkbits, 0); 1230 1231 /* 1232 * Convert delayed allocate, unwritten or unmapped space 1233 * to real space and flush out to disk. 1234 */ 1235 error = xfs_page_state_convert(inode, page, wbc, 1, unmapped); 1236 if (error == -EAGAIN) 1237 goto out_fail; 1238 if (unlikely(error < 0)) 1239 goto out_unlock; 1240 1241 return 0; 1242 1243out_fail: 1244 redirty_page_for_writepage(wbc, page); 1245 unlock_page(page); 1246 return 0; 1247out_unlock: 1248 unlock_page(page); 1249 return error; 1250} 1251 1252STATIC int 1253xfs_vm_writepages( 1254 struct address_space *mapping, 1255 struct writeback_control *wbc) 1256{ 1257 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 1258 return generic_writepages(mapping, wbc); 1259} 1260 1261/* 1262 * Called to move a page into cleanable state - and from there 1263 * to be released. Possibly the page is already clean. We always 1264 * have buffer heads in this call. 1265 * 1266 * Returns 0 if the page is ok to release, 1 otherwise. 1267 * 1268 * Possible scenarios are: 1269 * 1270 * 1. We are being called to release a page which has been written 1271 * to via regular I/O. buffer heads will be dirty and possibly 1272 * delalloc. If no delalloc buffer heads in this case then we 1273 * can just return zero. 1274 * 1275 * 2. We are called to release a page which has been written via 1276 * mmap, all we need to do is ensure there is no delalloc 1277 * state in the buffer heads, if not we can let the caller 1278 * free them and we should come back later via writepage. 1279 */ 1280STATIC int 1281xfs_vm_releasepage( 1282 struct page *page, 1283 gfp_t gfp_mask) 1284{ 1285 struct inode *inode = page->mapping->host; 1286 int dirty, delalloc, unmapped, unwritten; 1287 struct writeback_control wbc = { 1288 .sync_mode = WB_SYNC_ALL, 1289 .nr_to_write = 1, 1290 }; 1291 1292 xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, 0); 1293 1294 if (!page_has_buffers(page)) 1295 return 0; 1296 1297 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 1298 if (!delalloc && !unwritten) 1299 goto free_buffers; 1300 1301 if (!(gfp_mask & __GFP_FS)) 1302 return 0; 1303 1304 /* If we are already inside a transaction or the thread cannot 1305 * do I/O, we cannot release this page. 1306 */ 1307 if (current_test_flags(PF_FSTRANS)) 1308 return 0; 1309 1310 /* 1311 * Convert delalloc space to real space, do not flush the 1312 * data out to disk, that will be done by the caller. 1313 * Never need to allocate space here - we will always 1314 * come back to writepage in that case. 1315 */ 1316 dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0); 1317 if (dirty == 0 && !unwritten) 1318 goto free_buffers; 1319 return 0; 1320 1321free_buffers: 1322 return try_to_free_buffers(page); 1323} 1324 1325STATIC int 1326__xfs_get_blocks( 1327 struct inode *inode, 1328 sector_t iblock, 1329 struct buffer_head *bh_result, 1330 int create, 1331 int direct, 1332 bmapi_flags_t flags) 1333{ 1334 xfs_iomap_t iomap; 1335 xfs_off_t offset; 1336 ssize_t size; 1337 int niomap = 1; 1338 int error; 1339 1340 offset = (xfs_off_t)iblock << inode->i_blkbits; 1341 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1342 size = bh_result->b_size; 1343 error = xfs_iomap(XFS_I(inode), offset, size, 1344 create ? flags : BMAPI_READ, &iomap, &niomap); 1345 if (error) 1346 return -error; 1347 if (niomap == 0) 1348 return 0; 1349 1350 if (iomap.iomap_bn != IOMAP_DADDR_NULL) { 1351 /* 1352 * For unwritten extents do not report a disk address on 1353 * the read case (treat as if we're reading into a hole). 1354 */ 1355 if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) { 1356 xfs_map_buffer(bh_result, &iomap, offset, 1357 inode->i_blkbits); 1358 } 1359 if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) { 1360 if (direct) 1361 bh_result->b_private = inode; 1362 set_buffer_unwritten(bh_result); 1363 } 1364 } 1365 1366 /* 1367 * If this is a realtime file, data may be on a different device. 1368 * to that pointed to from the buffer_head b_bdev currently. 1369 */ 1370 bh_result->b_bdev = iomap.iomap_target->bt_bdev; 1371 1372 /* 1373 * If we previously allocated a block out beyond eof and we are now 1374 * coming back to use it then we will need to flag it as new even if it 1375 * has a disk address. 1376 * 1377 * With sub-block writes into unwritten extents we also need to mark 1378 * the buffer as new so that the unwritten parts of the buffer gets 1379 * correctly zeroed. 1380 */ 1381 if (create && 1382 ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || 1383 (offset >= i_size_read(inode)) || 1384 (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN)))) 1385 set_buffer_new(bh_result); 1386 1387 if (iomap.iomap_flags & IOMAP_DELAY) { 1388 BUG_ON(direct); 1389 if (create) { 1390 set_buffer_uptodate(bh_result); 1391 set_buffer_mapped(bh_result); 1392 set_buffer_delay(bh_result); 1393 } 1394 } 1395 1396 if (direct || size > (1 << inode->i_blkbits)) { 1397 ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0); 1398 offset = min_t(xfs_off_t, 1399 iomap.iomap_bsize - iomap.iomap_delta, size); 1400 bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset); 1401 } 1402 1403 return 0; 1404} 1405 1406int 1407xfs_get_blocks( 1408 struct inode *inode, 1409 sector_t iblock, 1410 struct buffer_head *bh_result, 1411 int create) 1412{ 1413 return __xfs_get_blocks(inode, iblock, 1414 bh_result, create, 0, BMAPI_WRITE); 1415} 1416 1417STATIC int 1418xfs_get_blocks_direct( 1419 struct inode *inode, 1420 sector_t iblock, 1421 struct buffer_head *bh_result, 1422 int create) 1423{ 1424 return __xfs_get_blocks(inode, iblock, 1425 bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT); 1426} 1427 1428STATIC void 1429xfs_end_io_direct( 1430 struct kiocb *iocb, 1431 loff_t offset, 1432 ssize_t size, 1433 void *private) 1434{ 1435 xfs_ioend_t *ioend = iocb->private; 1436 1437 /* 1438 * Non-NULL private data means we need to issue a transaction to 1439 * convert a range from unwritten to written extents. This needs 1440 * to happen from process context but aio+dio I/O completion 1441 * happens from irq context so we need to defer it to a workqueue. 1442 * This is not necessary for synchronous direct I/O, but we do 1443 * it anyway to keep the code uniform and simpler. 1444 * 1445 * Well, if only it were that simple. Because synchronous direct I/O 1446 * requires extent conversion to occur *before* we return to userspace, 1447 * we have to wait for extent conversion to complete. Look at the 1448 * iocb that has been passed to us to determine if this is AIO or 1449 * not. If it is synchronous, tell xfs_finish_ioend() to kick the 1450 * workqueue and wait for it to complete. 1451 * 1452 * The core direct I/O code might be changed to always call the 1453 * completion handler in the future, in which case all this can 1454 * go away. 1455 */ 1456 ioend->io_offset = offset; 1457 ioend->io_size = size; 1458 if (ioend->io_type == IOMAP_READ) { 1459 xfs_finish_ioend(ioend, 0); 1460 } else if (private && size > 0) { 1461 xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); 1462 } else { 1463 /* 1464 * A direct I/O write ioend starts it's life in unwritten 1465 * state in case they map an unwritten extent. This write 1466 * didn't map an unwritten extent so switch it's completion 1467 * handler. 1468 */ 1469 INIT_WORK(&ioend->io_work, xfs_end_bio_written); 1470 xfs_finish_ioend(ioend, 0); 1471 } 1472 1473 /* 1474 * blockdev_direct_IO can return an error even after the I/O 1475 * completion handler was called. Thus we need to protect 1476 * against double-freeing. 1477 */ 1478 iocb->private = NULL; 1479} 1480 1481STATIC ssize_t 1482xfs_vm_direct_IO( 1483 int rw, 1484 struct kiocb *iocb, 1485 const struct iovec *iov, 1486 loff_t offset, 1487 unsigned long nr_segs) 1488{ 1489 struct file *file = iocb->ki_filp; 1490 struct inode *inode = file->f_mapping->host; 1491 struct block_device *bdev; 1492 ssize_t ret; 1493 1494 bdev = xfs_find_bdev_for_inode(XFS_I(inode)); 1495 1496 if (rw == WRITE) { 1497 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); 1498 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1499 bdev, iov, offset, nr_segs, 1500 xfs_get_blocks_direct, 1501 xfs_end_io_direct); 1502 } else { 1503 iocb->private = xfs_alloc_ioend(inode, IOMAP_READ); 1504 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 1505 bdev, iov, offset, nr_segs, 1506 xfs_get_blocks_direct, 1507 xfs_end_io_direct); 1508 } 1509 1510 if (unlikely(ret != -EIOCBQUEUED && iocb->private)) 1511 xfs_destroy_ioend(iocb->private); 1512 return ret; 1513} 1514 1515STATIC int 1516xfs_vm_write_begin( 1517 struct file *file, 1518 struct address_space *mapping, 1519 loff_t pos, 1520 unsigned len, 1521 unsigned flags, 1522 struct page **pagep, 1523 void **fsdata) 1524{ 1525 *pagep = NULL; 1526 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 1527 xfs_get_blocks); 1528} 1529 1530STATIC sector_t 1531xfs_vm_bmap( 1532 struct address_space *mapping, 1533 sector_t block) 1534{ 1535 struct inode *inode = (struct inode *)mapping->host; 1536 struct xfs_inode *ip = XFS_I(inode); 1537 1538 xfs_itrace_entry(XFS_I(inode)); 1539 xfs_ilock(ip, XFS_IOLOCK_SHARED); 1540 xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); 1541 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 1542 return generic_block_bmap(mapping, block, xfs_get_blocks); 1543} 1544 1545STATIC int 1546xfs_vm_readpage( 1547 struct file *unused, 1548 struct page *page) 1549{ 1550 return mpage_readpage(page, xfs_get_blocks); 1551} 1552 1553STATIC int 1554xfs_vm_readpages( 1555 struct file *unused, 1556 struct address_space *mapping, 1557 struct list_head *pages, 1558 unsigned nr_pages) 1559{ 1560 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1561} 1562 1563STATIC void 1564xfs_vm_invalidatepage( 1565 struct page *page, 1566 unsigned long offset) 1567{ 1568 xfs_page_trace(XFS_INVALIDPAGE_ENTER, 1569 page->mapping->host, page, offset); 1570 block_invalidatepage(page, offset); 1571} 1572 1573const struct address_space_operations xfs_address_space_operations = { 1574 .readpage = xfs_vm_readpage, 1575 .readpages = xfs_vm_readpages, 1576 .writepage = xfs_vm_writepage, 1577 .writepages = xfs_vm_writepages, 1578 .sync_page = block_sync_page, 1579 .releasepage = xfs_vm_releasepage, 1580 .invalidatepage = xfs_vm_invalidatepage, 1581 .write_begin = xfs_vm_write_begin, 1582 .write_end = generic_write_end, 1583 .bmap = xfs_vm_bmap, 1584 .direct_IO = xfs_vm_direct_IO, 1585 .migratepage = buffer_migrate_page, 1586};