at v3.15 1527 lines 38 kB view raw
1/* 2 * Copyright (C) 2005, 2006 3 * Avishay Traeger (avishay@gmail.com) 4 * Copyright (C) 2008, 2009 5 * Boaz Harrosh <bharrosh@panasas.com> 6 * 7 * Copyrights for code taken from ext2: 8 * Copyright (C) 1992, 1993, 1994, 1995 9 * Remy Card (card@masi.ibp.fr) 10 * Laboratoire MASI - Institut Blaise Pascal 11 * Universite Pierre et Marie Curie (Paris VI) 12 * from 13 * linux/fs/minix/inode.c 14 * Copyright (C) 1991, 1992 Linus Torvalds 15 * 16 * This file is part of exofs. 17 * 18 * exofs is free software; you can redistribute it and/or modify 19 * it under the terms of the GNU General Public License as published by 20 * the Free Software Foundation. Since it is based on ext2, and the only 21 * valid version of GPL for the Linux kernel is version 2, the only valid 22 * version of GPL for exofs is version 2. 23 * 24 * exofs is distributed in the hope that it will be useful, 25 * but WITHOUT ANY WARRANTY; without even the implied warranty of 26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 27 * GNU General Public License for more details. 28 * 29 * You should have received a copy of the GNU General Public License 30 * along with exofs; if not, write to the Free Software 31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 32 */ 33 34#include <linux/slab.h> 35 36#include "exofs.h" 37 38#define EXOFS_DBGMSG2(M...) do {} while (0) 39 40unsigned exofs_max_io_pages(struct ore_layout *layout, 41 unsigned expected_pages) 42{ 43 unsigned pages = min_t(unsigned, expected_pages, 44 layout->max_io_length / PAGE_SIZE); 45 46 return pages; 47} 48 49struct page_collect { 50 struct exofs_sb_info *sbi; 51 struct inode *inode; 52 unsigned expected_pages; 53 struct ore_io_state *ios; 54 55 struct page **pages; 56 unsigned alloc_pages; 57 unsigned nr_pages; 58 unsigned long length; 59 loff_t pg_first; /* keep 64bit also in 32-arches */ 60 bool read_4_write; /* This means two things: that the read is sync 61 * And the pages should not be unlocked. 62 */ 63 struct page *that_locked_page; 64}; 65 66static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, 67 struct inode *inode) 68{ 69 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 70 71 pcol->sbi = sbi; 72 pcol->inode = inode; 73 pcol->expected_pages = expected_pages; 74 75 pcol->ios = NULL; 76 pcol->pages = NULL; 77 pcol->alloc_pages = 0; 78 pcol->nr_pages = 0; 79 pcol->length = 0; 80 pcol->pg_first = -1; 81 pcol->read_4_write = false; 82 pcol->that_locked_page = NULL; 83} 84 85static void _pcol_reset(struct page_collect *pcol) 86{ 87 pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); 88 89 pcol->pages = NULL; 90 pcol->alloc_pages = 0; 91 pcol->nr_pages = 0; 92 pcol->length = 0; 93 pcol->pg_first = -1; 94 pcol->ios = NULL; 95 pcol->that_locked_page = NULL; 96 97 /* this is probably the end of the loop but in writes 98 * it might not end here. don't be left with nothing 99 */ 100 if (!pcol->expected_pages) 101 pcol->expected_pages = 102 exofs_max_io_pages(&pcol->sbi->layout, ~0); 103} 104 105static int pcol_try_alloc(struct page_collect *pcol) 106{ 107 unsigned pages; 108 109 /* TODO: easily support bio chaining */ 110 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages); 111 112 for (; pages; pages >>= 1) { 113 pcol->pages = kmalloc(pages * sizeof(struct page *), 114 GFP_KERNEL); 115 if (likely(pcol->pages)) { 116 pcol->alloc_pages = pages; 117 return 0; 118 } 119 } 120 121 EXOFS_ERR("Failed to kmalloc expected_pages=%u\n", 122 pcol->expected_pages); 123 return -ENOMEM; 124} 125 126static void pcol_free(struct page_collect *pcol) 127{ 128 kfree(pcol->pages); 129 pcol->pages = NULL; 130 131 if (pcol->ios) { 132 ore_put_io_state(pcol->ios); 133 pcol->ios = NULL; 134 } 135} 136 137static int pcol_add_page(struct page_collect *pcol, struct page *page, 138 unsigned len) 139{ 140 if (unlikely(pcol->nr_pages >= pcol->alloc_pages)) 141 return -ENOMEM; 142 143 pcol->pages[pcol->nr_pages++] = page; 144 pcol->length += len; 145 return 0; 146} 147 148enum {PAGE_WAS_NOT_IN_IO = 17}; 149static int update_read_page(struct page *page, int ret) 150{ 151 switch (ret) { 152 case 0: 153 /* Everything is OK */ 154 SetPageUptodate(page); 155 if (PageError(page)) 156 ClearPageError(page); 157 break; 158 case -EFAULT: 159 /* In this case we were trying to read something that wasn't on 160 * disk yet - return a page full of zeroes. This should be OK, 161 * because the object should be empty (if there was a write 162 * before this read, the read would be waiting with the page 163 * locked */ 164 clear_highpage(page); 165 166 SetPageUptodate(page); 167 if (PageError(page)) 168 ClearPageError(page); 169 EXOFS_DBGMSG("recovered read error\n"); 170 /* fall through */ 171 case PAGE_WAS_NOT_IN_IO: 172 ret = 0; /* recovered error */ 173 break; 174 default: 175 SetPageError(page); 176 } 177 return ret; 178} 179 180static void update_write_page(struct page *page, int ret) 181{ 182 if (unlikely(ret == PAGE_WAS_NOT_IN_IO)) 183 return; /* don't pass start don't collect $200 */ 184 185 if (ret) { 186 mapping_set_error(page->mapping, ret); 187 SetPageError(page); 188 } 189 end_page_writeback(page); 190} 191 192/* Called at the end of reads, to optionally unlock pages and update their 193 * status. 194 */ 195static int __readpages_done(struct page_collect *pcol) 196{ 197 int i; 198 u64 good_bytes; 199 u64 length = 0; 200 int ret = ore_check_io(pcol->ios, NULL); 201 202 if (likely(!ret)) { 203 good_bytes = pcol->length; 204 ret = PAGE_WAS_NOT_IN_IO; 205 } else { 206 good_bytes = 0; 207 } 208 209 EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx" 210 " length=0x%lx nr_pages=%u\n", 211 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 212 pcol->nr_pages); 213 214 for (i = 0; i < pcol->nr_pages; i++) { 215 struct page *page = pcol->pages[i]; 216 struct inode *inode = page->mapping->host; 217 int page_stat; 218 219 if (inode != pcol->inode) 220 continue; /* osd might add more pages at end */ 221 222 if (likely(length < good_bytes)) 223 page_stat = 0; 224 else 225 page_stat = ret; 226 227 EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n", 228 inode->i_ino, page->index, 229 page_stat ? "bad_bytes" : "good_bytes"); 230 231 ret = update_read_page(page, page_stat); 232 if (!pcol->read_4_write) 233 unlock_page(page); 234 length += PAGE_SIZE; 235 } 236 237 pcol_free(pcol); 238 EXOFS_DBGMSG2("readpages_done END\n"); 239 return ret; 240} 241 242/* callback of async reads */ 243static void readpages_done(struct ore_io_state *ios, void *p) 244{ 245 struct page_collect *pcol = p; 246 247 __readpages_done(pcol); 248 atomic_dec(&pcol->sbi->s_curr_pending); 249 kfree(pcol); 250} 251 252static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) 253{ 254 int i; 255 256 for (i = 0; i < pcol->nr_pages; i++) { 257 struct page *page = pcol->pages[i]; 258 259 if (rw == READ) 260 update_read_page(page, ret); 261 else 262 update_write_page(page, ret); 263 264 unlock_page(page); 265 } 266} 267 268static int _maybe_not_all_in_one_io(struct ore_io_state *ios, 269 struct page_collect *pcol_src, struct page_collect *pcol) 270{ 271 /* length was wrong or offset was not page aligned */ 272 BUG_ON(pcol_src->nr_pages < ios->nr_pages); 273 274 if (pcol_src->nr_pages > ios->nr_pages) { 275 struct page **src_page; 276 unsigned pages_less = pcol_src->nr_pages - ios->nr_pages; 277 unsigned long len_less = pcol_src->length - ios->length; 278 unsigned i; 279 int ret; 280 281 /* This IO was trimmed */ 282 pcol_src->nr_pages = ios->nr_pages; 283 pcol_src->length = ios->length; 284 285 /* Left over pages are passed to the next io */ 286 pcol->expected_pages += pages_less; 287 pcol->nr_pages = pages_less; 288 pcol->length = len_less; 289 src_page = pcol_src->pages + pcol_src->nr_pages; 290 pcol->pg_first = (*src_page)->index; 291 292 ret = pcol_try_alloc(pcol); 293 if (unlikely(ret)) 294 return ret; 295 296 for (i = 0; i < pages_less; ++i) 297 pcol->pages[i] = *src_page++; 298 299 EXOFS_DBGMSG("Length was adjusted nr_pages=0x%x " 300 "pages_less=0x%x expected_pages=0x%x " 301 "next_offset=0x%llx next_len=0x%lx\n", 302 pcol_src->nr_pages, pages_less, pcol->expected_pages, 303 pcol->pg_first * PAGE_SIZE, pcol->length); 304 } 305 return 0; 306} 307 308static int read_exec(struct page_collect *pcol) 309{ 310 struct exofs_i_info *oi = exofs_i(pcol->inode); 311 struct ore_io_state *ios; 312 struct page_collect *pcol_copy = NULL; 313 int ret; 314 315 if (!pcol->pages) 316 return 0; 317 318 if (!pcol->ios) { 319 int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, true, 320 pcol->pg_first << PAGE_CACHE_SHIFT, 321 pcol->length, &pcol->ios); 322 323 if (ret) 324 return ret; 325 } 326 327 ios = pcol->ios; 328 ios->pages = pcol->pages; 329 330 if (pcol->read_4_write) { 331 ore_read(pcol->ios); 332 return __readpages_done(pcol); 333 } 334 335 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 336 if (!pcol_copy) { 337 ret = -ENOMEM; 338 goto err; 339 } 340 341 *pcol_copy = *pcol; 342 ios->done = readpages_done; 343 ios->private = pcol_copy; 344 345 /* pages ownership was passed to pcol_copy */ 346 _pcol_reset(pcol); 347 348 ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol); 349 if (unlikely(ret)) 350 goto err; 351 352 EXOFS_DBGMSG2("read_exec(0x%lx) offset=0x%llx length=0x%llx\n", 353 pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length)); 354 355 ret = ore_read(ios); 356 if (unlikely(ret)) 357 goto err; 358 359 atomic_inc(&pcol->sbi->s_curr_pending); 360 361 return 0; 362 363err: 364 if (!pcol_copy) /* Failed before ownership transfer */ 365 pcol_copy = pcol; 366 _unlock_pcol_pages(pcol_copy, ret, READ); 367 pcol_free(pcol_copy); 368 kfree(pcol_copy); 369 370 return ret; 371} 372 373/* readpage_strip is called either directly from readpage() or by the VFS from 374 * within read_cache_pages(), to add one more page to be read. It will try to 375 * collect as many contiguous pages as posible. If a discontinuity is 376 * encountered, or it runs out of resources, it will submit the previous segment 377 * and will start a new collection. Eventually caller must submit the last 378 * segment if present. 379 */ 380static int readpage_strip(void *data, struct page *page) 381{ 382 struct page_collect *pcol = data; 383 struct inode *inode = pcol->inode; 384 struct exofs_i_info *oi = exofs_i(inode); 385 loff_t i_size = i_size_read(inode); 386 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 387 size_t len; 388 int ret; 389 390 BUG_ON(!PageLocked(page)); 391 392 /* FIXME: Just for debugging, will be removed */ 393 if (PageUptodate(page)) 394 EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, 395 page->index); 396 397 pcol->that_locked_page = page; 398 399 if (page->index < end_index) 400 len = PAGE_CACHE_SIZE; 401 else if (page->index == end_index) 402 len = i_size & ~PAGE_CACHE_MASK; 403 else 404 len = 0; 405 406 if (!len || !obj_created(oi)) { 407 /* this will be out of bounds, or doesn't exist yet. 408 * Current page is cleared and the request is split 409 */ 410 clear_highpage(page); 411 412 SetPageUptodate(page); 413 if (PageError(page)) 414 ClearPageError(page); 415 416 if (!pcol->read_4_write) 417 unlock_page(page); 418 EXOFS_DBGMSG("readpage_strip(0x%lx) empty page len=%zx " 419 "read_4_write=%d index=0x%lx end_index=0x%lx " 420 "splitting\n", inode->i_ino, len, 421 pcol->read_4_write, page->index, end_index); 422 423 return read_exec(pcol); 424 } 425 426try_again: 427 428 if (unlikely(pcol->pg_first == -1)) { 429 pcol->pg_first = page->index; 430 } else if (unlikely((pcol->pg_first + pcol->nr_pages) != 431 page->index)) { 432 /* Discontinuity detected, split the request */ 433 ret = read_exec(pcol); 434 if (unlikely(ret)) 435 goto fail; 436 goto try_again; 437 } 438 439 if (!pcol->pages) { 440 ret = pcol_try_alloc(pcol); 441 if (unlikely(ret)) 442 goto fail; 443 } 444 445 if (len != PAGE_CACHE_SIZE) 446 zero_user(page, len, PAGE_CACHE_SIZE - len); 447 448 EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", 449 inode->i_ino, page->index, len); 450 451 ret = pcol_add_page(pcol, page, len); 452 if (ret) { 453 EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p " 454 "this_len=0x%zx nr_pages=%u length=0x%lx\n", 455 page, len, pcol->nr_pages, pcol->length); 456 457 /* split the request, and start again with current page */ 458 ret = read_exec(pcol); 459 if (unlikely(ret)) 460 goto fail; 461 462 goto try_again; 463 } 464 465 return 0; 466 467fail: 468 /* SetPageError(page); ??? */ 469 unlock_page(page); 470 return ret; 471} 472 473static int exofs_readpages(struct file *file, struct address_space *mapping, 474 struct list_head *pages, unsigned nr_pages) 475{ 476 struct page_collect pcol; 477 int ret; 478 479 _pcol_init(&pcol, nr_pages, mapping->host); 480 481 ret = read_cache_pages(mapping, pages, readpage_strip, &pcol); 482 if (ret) { 483 EXOFS_ERR("read_cache_pages => %d\n", ret); 484 return ret; 485 } 486 487 ret = read_exec(&pcol); 488 if (unlikely(ret)) 489 return ret; 490 491 return read_exec(&pcol); 492} 493 494static int _readpage(struct page *page, bool read_4_write) 495{ 496 struct page_collect pcol; 497 int ret; 498 499 _pcol_init(&pcol, 1, page->mapping->host); 500 501 pcol.read_4_write = read_4_write; 502 ret = readpage_strip(&pcol, page); 503 if (ret) { 504 EXOFS_ERR("_readpage => %d\n", ret); 505 return ret; 506 } 507 508 return read_exec(&pcol); 509} 510 511/* 512 * We don't need the file 513 */ 514static int exofs_readpage(struct file *file, struct page *page) 515{ 516 return _readpage(page, false); 517} 518 519/* Callback for osd_write. All writes are asynchronous */ 520static void writepages_done(struct ore_io_state *ios, void *p) 521{ 522 struct page_collect *pcol = p; 523 int i; 524 u64 good_bytes; 525 u64 length = 0; 526 int ret = ore_check_io(ios, NULL); 527 528 atomic_dec(&pcol->sbi->s_curr_pending); 529 530 if (likely(!ret)) { 531 good_bytes = pcol->length; 532 ret = PAGE_WAS_NOT_IN_IO; 533 } else { 534 good_bytes = 0; 535 } 536 537 EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx" 538 " length=0x%lx nr_pages=%u\n", 539 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 540 pcol->nr_pages); 541 542 for (i = 0; i < pcol->nr_pages; i++) { 543 struct page *page = pcol->pages[i]; 544 struct inode *inode = page->mapping->host; 545 int page_stat; 546 547 if (inode != pcol->inode) 548 continue; /* osd might add more pages to a bio */ 549 550 if (likely(length < good_bytes)) 551 page_stat = 0; 552 else 553 page_stat = ret; 554 555 update_write_page(page, page_stat); 556 unlock_page(page); 557 EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", 558 inode->i_ino, page->index, page_stat); 559 560 length += PAGE_SIZE; 561 } 562 563 pcol_free(pcol); 564 kfree(pcol); 565 EXOFS_DBGMSG2("writepages_done END\n"); 566} 567 568static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) 569{ 570 struct page_collect *pcol = priv; 571 pgoff_t index = offset / PAGE_SIZE; 572 573 if (!pcol->that_locked_page || 574 (pcol->that_locked_page->index != index)) { 575 struct page *page; 576 loff_t i_size = i_size_read(pcol->inode); 577 578 if (offset >= i_size) { 579 *uptodate = true; 580 EXOFS_DBGMSG2("offset >= i_size index=0x%lx\n", index); 581 return ZERO_PAGE(0); 582 } 583 584 page = find_get_page(pcol->inode->i_mapping, index); 585 if (!page) { 586 page = find_or_create_page(pcol->inode->i_mapping, 587 index, GFP_NOFS); 588 if (unlikely(!page)) { 589 EXOFS_DBGMSG("grab_cache_page Failed " 590 "index=0x%llx\n", _LLU(index)); 591 return NULL; 592 } 593 unlock_page(page); 594 } 595 if (PageDirty(page) || PageWriteback(page)) 596 *uptodate = true; 597 else 598 *uptodate = PageUptodate(page); 599 EXOFS_DBGMSG2("index=0x%lx uptodate=%d\n", index, *uptodate); 600 return page; 601 } else { 602 EXOFS_DBGMSG2("YES that_locked_page index=0x%lx\n", 603 pcol->that_locked_page->index); 604 *uptodate = true; 605 return pcol->that_locked_page; 606 } 607} 608 609static void __r4w_put_page(void *priv, struct page *page) 610{ 611 struct page_collect *pcol = priv; 612 613 if ((pcol->that_locked_page != page) && (ZERO_PAGE(0) != page)) { 614 EXOFS_DBGMSG2("index=0x%lx\n", page->index); 615 page_cache_release(page); 616 return; 617 } 618 EXOFS_DBGMSG2("that_locked_page index=0x%lx\n", 619 ZERO_PAGE(0) == page ? -1 : page->index); 620} 621 622static const struct _ore_r4w_op _r4w_op = { 623 .get_page = &__r4w_get_page, 624 .put_page = &__r4w_put_page, 625}; 626 627static int write_exec(struct page_collect *pcol) 628{ 629 struct exofs_i_info *oi = exofs_i(pcol->inode); 630 struct ore_io_state *ios; 631 struct page_collect *pcol_copy = NULL; 632 int ret; 633 634 if (!pcol->pages) 635 return 0; 636 637 BUG_ON(pcol->ios); 638 ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, false, 639 pcol->pg_first << PAGE_CACHE_SHIFT, 640 pcol->length, &pcol->ios); 641 if (unlikely(ret)) 642 goto err; 643 644 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 645 if (!pcol_copy) { 646 EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n"); 647 ret = -ENOMEM; 648 goto err; 649 } 650 651 *pcol_copy = *pcol; 652 653 ios = pcol->ios; 654 ios->pages = pcol_copy->pages; 655 ios->done = writepages_done; 656 ios->r4w = &_r4w_op; 657 ios->private = pcol_copy; 658 659 /* pages ownership was passed to pcol_copy */ 660 _pcol_reset(pcol); 661 662 ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol); 663 if (unlikely(ret)) 664 goto err; 665 666 EXOFS_DBGMSG2("write_exec(0x%lx) offset=0x%llx length=0x%llx\n", 667 pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length)); 668 669 ret = ore_write(ios); 670 if (unlikely(ret)) { 671 EXOFS_ERR("write_exec: ore_write() Failed\n"); 672 goto err; 673 } 674 675 atomic_inc(&pcol->sbi->s_curr_pending); 676 return 0; 677 678err: 679 if (!pcol_copy) /* Failed before ownership transfer */ 680 pcol_copy = pcol; 681 _unlock_pcol_pages(pcol_copy, ret, WRITE); 682 pcol_free(pcol_copy); 683 kfree(pcol_copy); 684 685 return ret; 686} 687 688/* writepage_strip is called either directly from writepage() or by the VFS from 689 * within write_cache_pages(), to add one more page to be written to storage. 690 * It will try to collect as many contiguous pages as possible. If a 691 * discontinuity is encountered or it runs out of resources it will submit the 692 * previous segment and will start a new collection. 693 * Eventually caller must submit the last segment if present. 694 */ 695static int writepage_strip(struct page *page, 696 struct writeback_control *wbc_unused, void *data) 697{ 698 struct page_collect *pcol = data; 699 struct inode *inode = pcol->inode; 700 struct exofs_i_info *oi = exofs_i(inode); 701 loff_t i_size = i_size_read(inode); 702 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 703 size_t len; 704 int ret; 705 706 BUG_ON(!PageLocked(page)); 707 708 ret = wait_obj_created(oi); 709 if (unlikely(ret)) 710 goto fail; 711 712 if (page->index < end_index) 713 /* in this case, the page is within the limits of the file */ 714 len = PAGE_CACHE_SIZE; 715 else { 716 len = i_size & ~PAGE_CACHE_MASK; 717 718 if (page->index > end_index || !len) { 719 /* in this case, the page is outside the limits 720 * (truncate in progress) 721 */ 722 ret = write_exec(pcol); 723 if (unlikely(ret)) 724 goto fail; 725 if (PageError(page)) 726 ClearPageError(page); 727 unlock_page(page); 728 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) " 729 "outside the limits\n", 730 inode->i_ino, page->index); 731 return 0; 732 } 733 } 734 735try_again: 736 737 if (unlikely(pcol->pg_first == -1)) { 738 pcol->pg_first = page->index; 739 } else if (unlikely((pcol->pg_first + pcol->nr_pages) != 740 page->index)) { 741 /* Discontinuity detected, split the request */ 742 ret = write_exec(pcol); 743 if (unlikely(ret)) 744 goto fail; 745 746 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n", 747 inode->i_ino, page->index); 748 goto try_again; 749 } 750 751 if (!pcol->pages) { 752 ret = pcol_try_alloc(pcol); 753 if (unlikely(ret)) 754 goto fail; 755 } 756 757 EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", 758 inode->i_ino, page->index, len); 759 760 ret = pcol_add_page(pcol, page, len); 761 if (unlikely(ret)) { 762 EXOFS_DBGMSG2("Failed pcol_add_page " 763 "nr_pages=%u total_length=0x%lx\n", 764 pcol->nr_pages, pcol->length); 765 766 /* split the request, next loop will start again */ 767 ret = write_exec(pcol); 768 if (unlikely(ret)) { 769 EXOFS_DBGMSG("write_exec failed => %d", ret); 770 goto fail; 771 } 772 773 goto try_again; 774 } 775 776 BUG_ON(PageWriteback(page)); 777 set_page_writeback(page); 778 779 return 0; 780 781fail: 782 EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n", 783 inode->i_ino, page->index, ret); 784 set_bit(AS_EIO, &page->mapping->flags); 785 unlock_page(page); 786 return ret; 787} 788 789static int exofs_writepages(struct address_space *mapping, 790 struct writeback_control *wbc) 791{ 792 struct page_collect pcol; 793 long start, end, expected_pages; 794 int ret; 795 796 start = wbc->range_start >> PAGE_CACHE_SHIFT; 797 end = (wbc->range_end == LLONG_MAX) ? 798 start + mapping->nrpages : 799 wbc->range_end >> PAGE_CACHE_SHIFT; 800 801 if (start || end) 802 expected_pages = end - start + 1; 803 else 804 expected_pages = mapping->nrpages; 805 806 if (expected_pages < 32L) 807 expected_pages = 32L; 808 809 EXOFS_DBGMSG2("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx " 810 "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n", 811 mapping->host->i_ino, wbc->range_start, wbc->range_end, 812 mapping->nrpages, start, end, expected_pages); 813 814 _pcol_init(&pcol, expected_pages, mapping->host); 815 816 ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol); 817 if (unlikely(ret)) { 818 EXOFS_ERR("write_cache_pages => %d\n", ret); 819 return ret; 820 } 821 822 ret = write_exec(&pcol); 823 if (unlikely(ret)) 824 return ret; 825 826 if (wbc->sync_mode == WB_SYNC_ALL) { 827 return write_exec(&pcol); /* pump the last reminder */ 828 } else if (pcol.nr_pages) { 829 /* not SYNC let the reminder join the next writeout */ 830 unsigned i; 831 832 for (i = 0; i < pcol.nr_pages; i++) { 833 struct page *page = pcol.pages[i]; 834 835 end_page_writeback(page); 836 set_page_dirty(page); 837 unlock_page(page); 838 } 839 } 840 return 0; 841} 842 843/* 844static int exofs_writepage(struct page *page, struct writeback_control *wbc) 845{ 846 struct page_collect pcol; 847 int ret; 848 849 _pcol_init(&pcol, 1, page->mapping->host); 850 851 ret = writepage_strip(page, NULL, &pcol); 852 if (ret) { 853 EXOFS_ERR("exofs_writepage => %d\n", ret); 854 return ret; 855 } 856 857 return write_exec(&pcol); 858} 859*/ 860/* i_mutex held using inode->i_size directly */ 861static void _write_failed(struct inode *inode, loff_t to) 862{ 863 if (to > inode->i_size) 864 truncate_pagecache(inode, inode->i_size); 865} 866 867int exofs_write_begin(struct file *file, struct address_space *mapping, 868 loff_t pos, unsigned len, unsigned flags, 869 struct page **pagep, void **fsdata) 870{ 871 int ret = 0; 872 struct page *page; 873 874 page = *pagep; 875 if (page == NULL) { 876 ret = simple_write_begin(file, mapping, pos, len, flags, pagep, 877 fsdata); 878 if (ret) { 879 EXOFS_DBGMSG("simple_write_begin failed\n"); 880 goto out; 881 } 882 883 page = *pagep; 884 } 885 886 /* read modify write */ 887 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { 888 loff_t i_size = i_size_read(mapping->host); 889 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 890 size_t rlen; 891 892 if (page->index < end_index) 893 rlen = PAGE_CACHE_SIZE; 894 else if (page->index == end_index) 895 rlen = i_size & ~PAGE_CACHE_MASK; 896 else 897 rlen = 0; 898 899 if (!rlen) { 900 clear_highpage(page); 901 SetPageUptodate(page); 902 goto out; 903 } 904 905 ret = _readpage(page, true); 906 if (ret) { 907 /*SetPageError was done by _readpage. Is it ok?*/ 908 unlock_page(page); 909 EXOFS_DBGMSG("__readpage failed\n"); 910 } 911 } 912out: 913 if (unlikely(ret)) 914 _write_failed(mapping->host, pos + len); 915 916 return ret; 917} 918 919static int exofs_write_begin_export(struct file *file, 920 struct address_space *mapping, 921 loff_t pos, unsigned len, unsigned flags, 922 struct page **pagep, void **fsdata) 923{ 924 *pagep = NULL; 925 926 return exofs_write_begin(file, mapping, pos, len, flags, pagep, 927 fsdata); 928} 929 930static int exofs_write_end(struct file *file, struct address_space *mapping, 931 loff_t pos, unsigned len, unsigned copied, 932 struct page *page, void *fsdata) 933{ 934 struct inode *inode = mapping->host; 935 /* According to comment in simple_write_end i_mutex is held */ 936 loff_t i_size = inode->i_size; 937 int ret; 938 939 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata); 940 if (unlikely(ret)) 941 _write_failed(inode, pos + len); 942 943 /* TODO: once simple_write_end marks inode dirty remove */ 944 if (i_size != inode->i_size) 945 mark_inode_dirty(inode); 946 return ret; 947} 948 949static int exofs_releasepage(struct page *page, gfp_t gfp) 950{ 951 EXOFS_DBGMSG("page 0x%lx\n", page->index); 952 WARN_ON(1); 953 return 0; 954} 955 956static void exofs_invalidatepage(struct page *page, unsigned int offset, 957 unsigned int length) 958{ 959 EXOFS_DBGMSG("page 0x%lx offset 0x%x length 0x%x\n", 960 page->index, offset, length); 961 WARN_ON(1); 962} 963 964 965 /* TODO: Should be easy enough to do proprly */ 966static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb, 967 const struct iovec *iov, loff_t offset, unsigned long nr_segs) 968{ 969 return 0; 970} 971 972const struct address_space_operations exofs_aops = { 973 .readpage = exofs_readpage, 974 .readpages = exofs_readpages, 975 .writepage = NULL, 976 .writepages = exofs_writepages, 977 .write_begin = exofs_write_begin_export, 978 .write_end = exofs_write_end, 979 .releasepage = exofs_releasepage, 980 .set_page_dirty = __set_page_dirty_nobuffers, 981 .invalidatepage = exofs_invalidatepage, 982 983 /* Not implemented Yet */ 984 .bmap = NULL, /* TODO: use osd's OSD_ACT_READ_MAP */ 985 .direct_IO = exofs_direct_IO, 986 987 /* With these NULL has special meaning or default is not exported */ 988 .get_xip_mem = NULL, 989 .migratepage = NULL, 990 .launder_page = NULL, 991 .is_partially_uptodate = NULL, 992 .error_remove_page = NULL, 993}; 994 995/****************************************************************************** 996 * INODE OPERATIONS 997 *****************************************************************************/ 998 999/* 1000 * Test whether an inode is a fast symlink. 1001 */ 1002static inline int exofs_inode_is_fast_symlink(struct inode *inode) 1003{ 1004 struct exofs_i_info *oi = exofs_i(inode); 1005 1006 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); 1007} 1008 1009static int _do_truncate(struct inode *inode, loff_t newsize) 1010{ 1011 struct exofs_i_info *oi = exofs_i(inode); 1012 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1013 int ret; 1014 1015 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1016 1017 ret = ore_truncate(&sbi->layout, &oi->oc, (u64)newsize); 1018 if (likely(!ret)) 1019 truncate_setsize(inode, newsize); 1020 1021 EXOFS_DBGMSG2("(0x%lx) size=0x%llx ret=>%d\n", 1022 inode->i_ino, newsize, ret); 1023 return ret; 1024} 1025 1026/* 1027 * Set inode attributes - update size attribute on OSD if needed, 1028 * otherwise just call generic functions. 1029 */ 1030int exofs_setattr(struct dentry *dentry, struct iattr *iattr) 1031{ 1032 struct inode *inode = dentry->d_inode; 1033 int error; 1034 1035 /* if we are about to modify an object, and it hasn't been 1036 * created yet, wait 1037 */ 1038 error = wait_obj_created(exofs_i(inode)); 1039 if (unlikely(error)) 1040 return error; 1041 1042 error = inode_change_ok(inode, iattr); 1043 if (unlikely(error)) 1044 return error; 1045 1046 if ((iattr->ia_valid & ATTR_SIZE) && 1047 iattr->ia_size != i_size_read(inode)) { 1048 error = _do_truncate(inode, iattr->ia_size); 1049 if (unlikely(error)) 1050 return error; 1051 } 1052 1053 setattr_copy(inode, iattr); 1054 mark_inode_dirty(inode); 1055 return 0; 1056} 1057 1058static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF( 1059 EXOFS_APAGE_FS_DATA, 1060 EXOFS_ATTR_INODE_FILE_LAYOUT, 1061 0); 1062static const struct osd_attr g_attr_inode_dir_layout = ATTR_DEF( 1063 EXOFS_APAGE_FS_DATA, 1064 EXOFS_ATTR_INODE_DIR_LAYOUT, 1065 0); 1066 1067/* 1068 * Read the Linux inode info from the OSD, and return it as is. In exofs the 1069 * inode info is in an application specific page/attribute of the osd-object. 1070 */ 1071static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, 1072 struct exofs_fcb *inode) 1073{ 1074 struct exofs_sb_info *sbi = sb->s_fs_info; 1075 struct osd_attr attrs[] = { 1076 [0] = g_attr_inode_data, 1077 [1] = g_attr_inode_file_layout, 1078 [2] = g_attr_inode_dir_layout, 1079 }; 1080 struct ore_io_state *ios; 1081 struct exofs_on_disk_inode_layout *layout; 1082 int ret; 1083 1084 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1085 if (unlikely(ret)) { 1086 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); 1087 return ret; 1088 } 1089 1090 attrs[1].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs); 1091 attrs[2].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs); 1092 1093 ios->in_attr = attrs; 1094 ios->in_attr_len = ARRAY_SIZE(attrs); 1095 1096 ret = ore_read(ios); 1097 if (unlikely(ret)) { 1098 EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", 1099 _LLU(oi->one_comp.obj.id), ret); 1100 memset(inode, 0, sizeof(*inode)); 1101 inode->i_mode = 0040000 | (0777 & ~022); 1102 /* If object is lost on target we might as well enable it's 1103 * delete. 1104 */ 1105 ret = 0; 1106 goto out; 1107 } 1108 1109 ret = extract_attr_from_ios(ios, &attrs[0]); 1110 if (ret) { 1111 EXOFS_ERR("%s: extract_attr 0 of inode failed\n", __func__); 1112 goto out; 1113 } 1114 WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE); 1115 memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE); 1116 1117 ret = extract_attr_from_ios(ios, &attrs[1]); 1118 if (ret) { 1119 EXOFS_ERR("%s: extract_attr 1 of inode failed\n", __func__); 1120 goto out; 1121 } 1122 if (attrs[1].len) { 1123 layout = attrs[1].val_ptr; 1124 if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { 1125 EXOFS_ERR("%s: unsupported files layout %d\n", 1126 __func__, layout->gen_func); 1127 ret = -ENOTSUPP; 1128 goto out; 1129 } 1130 } 1131 1132 ret = extract_attr_from_ios(ios, &attrs[2]); 1133 if (ret) { 1134 EXOFS_ERR("%s: extract_attr 2 of inode failed\n", __func__); 1135 goto out; 1136 } 1137 if (attrs[2].len) { 1138 layout = attrs[2].val_ptr; 1139 if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { 1140 EXOFS_ERR("%s: unsupported meta-data layout %d\n", 1141 __func__, layout->gen_func); 1142 ret = -ENOTSUPP; 1143 goto out; 1144 } 1145 } 1146 1147out: 1148 ore_put_io_state(ios); 1149 return ret; 1150} 1151 1152static void __oi_init(struct exofs_i_info *oi) 1153{ 1154 init_waitqueue_head(&oi->i_wq); 1155 oi->i_flags = 0; 1156} 1157/* 1158 * Fill in an inode read from the OSD and set it up for use 1159 */ 1160struct inode *exofs_iget(struct super_block *sb, unsigned long ino) 1161{ 1162 struct exofs_i_info *oi; 1163 struct exofs_fcb fcb; 1164 struct inode *inode; 1165 int ret; 1166 1167 inode = iget_locked(sb, ino); 1168 if (!inode) 1169 return ERR_PTR(-ENOMEM); 1170 if (!(inode->i_state & I_NEW)) 1171 return inode; 1172 oi = exofs_i(inode); 1173 __oi_init(oi); 1174 exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info, 1175 exofs_oi_objno(oi)); 1176 1177 /* read the inode from the osd */ 1178 ret = exofs_get_inode(sb, oi, &fcb); 1179 if (ret) 1180 goto bad_inode; 1181 1182 set_obj_created(oi); 1183 1184 /* copy stuff from on-disk struct to in-memory struct */ 1185 inode->i_mode = le16_to_cpu(fcb.i_mode); 1186 i_uid_write(inode, le32_to_cpu(fcb.i_uid)); 1187 i_gid_write(inode, le32_to_cpu(fcb.i_gid)); 1188 set_nlink(inode, le16_to_cpu(fcb.i_links_count)); 1189 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); 1190 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); 1191 inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime); 1192 inode->i_ctime.tv_nsec = 1193 inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0; 1194 oi->i_commit_size = le64_to_cpu(fcb.i_size); 1195 i_size_write(inode, oi->i_commit_size); 1196 inode->i_blkbits = EXOFS_BLKSHIFT; 1197 inode->i_generation = le32_to_cpu(fcb.i_generation); 1198 1199 oi->i_dir_start_lookup = 0; 1200 1201 if ((inode->i_nlink == 0) && (inode->i_mode == 0)) { 1202 ret = -ESTALE; 1203 goto bad_inode; 1204 } 1205 1206 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 1207 if (fcb.i_data[0]) 1208 inode->i_rdev = 1209 old_decode_dev(le32_to_cpu(fcb.i_data[0])); 1210 else 1211 inode->i_rdev = 1212 new_decode_dev(le32_to_cpu(fcb.i_data[1])); 1213 } else { 1214 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1215 } 1216 1217 inode->i_mapping->backing_dev_info = sb->s_bdi; 1218 if (S_ISREG(inode->i_mode)) { 1219 inode->i_op = &exofs_file_inode_operations; 1220 inode->i_fop = &exofs_file_operations; 1221 inode->i_mapping->a_ops = &exofs_aops; 1222 } else if (S_ISDIR(inode->i_mode)) { 1223 inode->i_op = &exofs_dir_inode_operations; 1224 inode->i_fop = &exofs_dir_operations; 1225 inode->i_mapping->a_ops = &exofs_aops; 1226 } else if (S_ISLNK(inode->i_mode)) { 1227 if (exofs_inode_is_fast_symlink(inode)) 1228 inode->i_op = &exofs_fast_symlink_inode_operations; 1229 else { 1230 inode->i_op = &exofs_symlink_inode_operations; 1231 inode->i_mapping->a_ops = &exofs_aops; 1232 } 1233 } else { 1234 inode->i_op = &exofs_special_inode_operations; 1235 if (fcb.i_data[0]) 1236 init_special_inode(inode, inode->i_mode, 1237 old_decode_dev(le32_to_cpu(fcb.i_data[0]))); 1238 else 1239 init_special_inode(inode, inode->i_mode, 1240 new_decode_dev(le32_to_cpu(fcb.i_data[1]))); 1241 } 1242 1243 unlock_new_inode(inode); 1244 return inode; 1245 1246bad_inode: 1247 iget_failed(inode); 1248 return ERR_PTR(ret); 1249} 1250 1251int __exofs_wait_obj_created(struct exofs_i_info *oi) 1252{ 1253 if (!obj_created(oi)) { 1254 EXOFS_DBGMSG("!obj_created\n"); 1255 BUG_ON(!obj_2bcreated(oi)); 1256 wait_event(oi->i_wq, obj_created(oi)); 1257 EXOFS_DBGMSG("wait_event done\n"); 1258 } 1259 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; 1260} 1261 1262/* 1263 * Callback function from exofs_new_inode(). The important thing is that we 1264 * set the obj_created flag so that other methods know that the object exists on 1265 * the OSD. 1266 */ 1267static void create_done(struct ore_io_state *ios, void *p) 1268{ 1269 struct inode *inode = p; 1270 struct exofs_i_info *oi = exofs_i(inode); 1271 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1272 int ret; 1273 1274 ret = ore_check_io(ios, NULL); 1275 ore_put_io_state(ios); 1276 1277 atomic_dec(&sbi->s_curr_pending); 1278 1279 if (unlikely(ret)) { 1280 EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx", 1281 _LLU(exofs_oi_objno(oi)), 1282 _LLU(oi->one_comp.obj.partition)); 1283 /*TODO: When FS is corrupted creation can fail, object already 1284 * exist. Get rid of this asynchronous creation, if exist 1285 * increment the obj counter and try the next object. Until we 1286 * succeed. All these dangling objects will be made into lost 1287 * files by chkfs.exofs 1288 */ 1289 } 1290 1291 set_obj_created(oi); 1292 1293 wake_up(&oi->i_wq); 1294} 1295 1296/* 1297 * Set up a new inode and create an object for it on the OSD 1298 */ 1299struct inode *exofs_new_inode(struct inode *dir, umode_t mode) 1300{ 1301 struct super_block *sb = dir->i_sb; 1302 struct exofs_sb_info *sbi = sb->s_fs_info; 1303 struct inode *inode; 1304 struct exofs_i_info *oi; 1305 struct ore_io_state *ios; 1306 int ret; 1307 1308 inode = new_inode(sb); 1309 if (!inode) 1310 return ERR_PTR(-ENOMEM); 1311 1312 oi = exofs_i(inode); 1313 __oi_init(oi); 1314 1315 set_obj_2bcreated(oi); 1316 1317 inode->i_mapping->backing_dev_info = sb->s_bdi; 1318 inode_init_owner(inode, dir, mode); 1319 inode->i_ino = sbi->s_nextid++; 1320 inode->i_blkbits = EXOFS_BLKSHIFT; 1321 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1322 oi->i_commit_size = inode->i_size = 0; 1323 spin_lock(&sbi->s_next_gen_lock); 1324 inode->i_generation = sbi->s_next_generation++; 1325 spin_unlock(&sbi->s_next_gen_lock); 1326 insert_inode_hash(inode); 1327 1328 exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info, 1329 exofs_oi_objno(oi)); 1330 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ 1331 1332 mark_inode_dirty(inode); 1333 1334 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1335 if (unlikely(ret)) { 1336 EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n"); 1337 return ERR_PTR(ret); 1338 } 1339 1340 ios->done = create_done; 1341 ios->private = inode; 1342 1343 ret = ore_create(ios); 1344 if (ret) { 1345 ore_put_io_state(ios); 1346 return ERR_PTR(ret); 1347 } 1348 atomic_inc(&sbi->s_curr_pending); 1349 1350 return inode; 1351} 1352 1353/* 1354 * struct to pass two arguments to update_inode's callback 1355 */ 1356struct updatei_args { 1357 struct exofs_sb_info *sbi; 1358 struct exofs_fcb fcb; 1359}; 1360 1361/* 1362 * Callback function from exofs_update_inode(). 1363 */ 1364static void updatei_done(struct ore_io_state *ios, void *p) 1365{ 1366 struct updatei_args *args = p; 1367 1368 ore_put_io_state(ios); 1369 1370 atomic_dec(&args->sbi->s_curr_pending); 1371 1372 kfree(args); 1373} 1374 1375/* 1376 * Write the inode to the OSD. Just fill up the struct, and set the attribute 1377 * synchronously or asynchronously depending on the do_sync flag. 1378 */ 1379static int exofs_update_inode(struct inode *inode, int do_sync) 1380{ 1381 struct exofs_i_info *oi = exofs_i(inode); 1382 struct super_block *sb = inode->i_sb; 1383 struct exofs_sb_info *sbi = sb->s_fs_info; 1384 struct ore_io_state *ios; 1385 struct osd_attr attr; 1386 struct exofs_fcb *fcb; 1387 struct updatei_args *args; 1388 int ret; 1389 1390 args = kzalloc(sizeof(*args), GFP_KERNEL); 1391 if (!args) { 1392 EXOFS_DBGMSG("Failed kzalloc of args\n"); 1393 return -ENOMEM; 1394 } 1395 1396 fcb = &args->fcb; 1397 1398 fcb->i_mode = cpu_to_le16(inode->i_mode); 1399 fcb->i_uid = cpu_to_le32(i_uid_read(inode)); 1400 fcb->i_gid = cpu_to_le32(i_gid_read(inode)); 1401 fcb->i_links_count = cpu_to_le16(inode->i_nlink); 1402 fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); 1403 fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec); 1404 fcb->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); 1405 oi->i_commit_size = i_size_read(inode); 1406 fcb->i_size = cpu_to_le64(oi->i_commit_size); 1407 fcb->i_generation = cpu_to_le32(inode->i_generation); 1408 1409 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 1410 if (old_valid_dev(inode->i_rdev)) { 1411 fcb->i_data[0] = 1412 cpu_to_le32(old_encode_dev(inode->i_rdev)); 1413 fcb->i_data[1] = 0; 1414 } else { 1415 fcb->i_data[0] = 0; 1416 fcb->i_data[1] = 1417 cpu_to_le32(new_encode_dev(inode->i_rdev)); 1418 fcb->i_data[2] = 0; 1419 } 1420 } else 1421 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1422 1423 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1424 if (unlikely(ret)) { 1425 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); 1426 goto free_args; 1427 } 1428 1429 attr = g_attr_inode_data; 1430 attr.val_ptr = fcb; 1431 ios->out_attr_len = 1; 1432 ios->out_attr = &attr; 1433 1434 wait_obj_created(oi); 1435 1436 if (!do_sync) { 1437 args->sbi = sbi; 1438 ios->done = updatei_done; 1439 ios->private = args; 1440 } 1441 1442 ret = ore_write(ios); 1443 if (!do_sync && !ret) { 1444 atomic_inc(&sbi->s_curr_pending); 1445 goto out; /* deallocation in updatei_done */ 1446 } 1447 1448 ore_put_io_state(ios); 1449free_args: 1450 kfree(args); 1451out: 1452 EXOFS_DBGMSG("(0x%lx) do_sync=%d ret=>%d\n", 1453 inode->i_ino, do_sync, ret); 1454 return ret; 1455} 1456 1457int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) 1458{ 1459 /* FIXME: fix fsync and use wbc->sync_mode == WB_SYNC_ALL */ 1460 return exofs_update_inode(inode, 1); 1461} 1462 1463/* 1464 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1465 * do. 1466 */ 1467static void delete_done(struct ore_io_state *ios, void *p) 1468{ 1469 struct exofs_sb_info *sbi = p; 1470 1471 ore_put_io_state(ios); 1472 1473 atomic_dec(&sbi->s_curr_pending); 1474} 1475 1476/* 1477 * Called when the refcount of an inode reaches zero. We remove the object 1478 * from the OSD here. We make sure the object was created before we try and 1479 * delete it. 1480 */ 1481void exofs_evict_inode(struct inode *inode) 1482{ 1483 struct exofs_i_info *oi = exofs_i(inode); 1484 struct super_block *sb = inode->i_sb; 1485 struct exofs_sb_info *sbi = sb->s_fs_info; 1486 struct ore_io_state *ios; 1487 int ret; 1488 1489 truncate_inode_pages_final(&inode->i_data); 1490 1491 /* TODO: should do better here */ 1492 if (inode->i_nlink || is_bad_inode(inode)) 1493 goto no_delete; 1494 1495 inode->i_size = 0; 1496 clear_inode(inode); 1497 1498 /* if we are deleting an obj that hasn't been created yet, wait. 1499 * This also makes sure that create_done cannot be called with an 1500 * already evicted inode. 1501 */ 1502 wait_obj_created(oi); 1503 /* ignore the error, attempt a remove anyway */ 1504 1505 /* Now Remove the OSD objects */ 1506 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1507 if (unlikely(ret)) { 1508 EXOFS_ERR("%s: ore_get_io_state failed\n", __func__); 1509 return; 1510 } 1511 1512 ios->done = delete_done; 1513 ios->private = sbi; 1514 1515 ret = ore_remove(ios); 1516 if (ret) { 1517 EXOFS_ERR("%s: ore_remove failed\n", __func__); 1518 ore_put_io_state(ios); 1519 return; 1520 } 1521 atomic_inc(&sbi->s_curr_pending); 1522 1523 return; 1524 1525no_delete: 1526 clear_inode(inode); 1527}