at v4.12 1514 lines 37 kB view raw
1/* 2 * Copyright (C) 2005, 2006 3 * Avishay Traeger (avishay@gmail.com) 4 * Copyright (C) 2008, 2009 5 * Boaz Harrosh <ooo@electrozaur.com> 6 * 7 * Copyrights for code taken from ext2: 8 * Copyright (C) 1992, 1993, 1994, 1995 9 * Remy Card (card@masi.ibp.fr) 10 * Laboratoire MASI - Institut Blaise Pascal 11 * Universite Pierre et Marie Curie (Paris VI) 12 * from 13 * linux/fs/minix/inode.c 14 * Copyright (C) 1991, 1992 Linus Torvalds 15 * 16 * This file is part of exofs. 17 * 18 * exofs is free software; you can redistribute it and/or modify 19 * it under the terms of the GNU General Public License as published by 20 * the Free Software Foundation. Since it is based on ext2, and the only 21 * valid version of GPL for the Linux kernel is version 2, the only valid 22 * version of GPL for exofs is version 2. 23 * 24 * exofs is distributed in the hope that it will be useful, 25 * but WITHOUT ANY WARRANTY; without even the implied warranty of 26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 27 * GNU General Public License for more details. 28 * 29 * You should have received a copy of the GNU General Public License 30 * along with exofs; if not, write to the Free Software 31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 32 */ 33 34#include <linux/slab.h> 35 36#include "exofs.h" 37 38#define EXOFS_DBGMSG2(M...) do {} while (0) 39 40unsigned exofs_max_io_pages(struct ore_layout *layout, 41 unsigned expected_pages) 42{ 43 unsigned pages = min_t(unsigned, expected_pages, 44 layout->max_io_length / PAGE_SIZE); 45 46 return pages; 47} 48 49struct page_collect { 50 struct exofs_sb_info *sbi; 51 struct inode *inode; 52 unsigned expected_pages; 53 struct ore_io_state *ios; 54 55 struct page **pages; 56 unsigned alloc_pages; 57 unsigned nr_pages; 58 unsigned long length; 59 loff_t pg_first; /* keep 64bit also in 32-arches */ 60 bool read_4_write; /* This means two things: that the read is sync 61 * And the pages should not be unlocked. 62 */ 63 struct page *that_locked_page; 64}; 65 66static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, 67 struct inode *inode) 68{ 69 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 70 71 pcol->sbi = sbi; 72 pcol->inode = inode; 73 pcol->expected_pages = expected_pages; 74 75 pcol->ios = NULL; 76 pcol->pages = NULL; 77 pcol->alloc_pages = 0; 78 pcol->nr_pages = 0; 79 pcol->length = 0; 80 pcol->pg_first = -1; 81 pcol->read_4_write = false; 82 pcol->that_locked_page = NULL; 83} 84 85static void _pcol_reset(struct page_collect *pcol) 86{ 87 pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); 88 89 pcol->pages = NULL; 90 pcol->alloc_pages = 0; 91 pcol->nr_pages = 0; 92 pcol->length = 0; 93 pcol->pg_first = -1; 94 pcol->ios = NULL; 95 pcol->that_locked_page = NULL; 96 97 /* this is probably the end of the loop but in writes 98 * it might not end here. don't be left with nothing 99 */ 100 if (!pcol->expected_pages) 101 pcol->expected_pages = 102 exofs_max_io_pages(&pcol->sbi->layout, ~0); 103} 104 105static int pcol_try_alloc(struct page_collect *pcol) 106{ 107 unsigned pages; 108 109 /* TODO: easily support bio chaining */ 110 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages); 111 112 for (; pages; pages >>= 1) { 113 pcol->pages = kmalloc(pages * sizeof(struct page *), 114 GFP_KERNEL); 115 if (likely(pcol->pages)) { 116 pcol->alloc_pages = pages; 117 return 0; 118 } 119 } 120 121 EXOFS_ERR("Failed to kmalloc expected_pages=%u\n", 122 pcol->expected_pages); 123 return -ENOMEM; 124} 125 126static void pcol_free(struct page_collect *pcol) 127{ 128 kfree(pcol->pages); 129 pcol->pages = NULL; 130 131 if (pcol->ios) { 132 ore_put_io_state(pcol->ios); 133 pcol->ios = NULL; 134 } 135} 136 137static int pcol_add_page(struct page_collect *pcol, struct page *page, 138 unsigned len) 139{ 140 if (unlikely(pcol->nr_pages >= pcol->alloc_pages)) 141 return -ENOMEM; 142 143 pcol->pages[pcol->nr_pages++] = page; 144 pcol->length += len; 145 return 0; 146} 147 148enum {PAGE_WAS_NOT_IN_IO = 17}; 149static int update_read_page(struct page *page, int ret) 150{ 151 switch (ret) { 152 case 0: 153 /* Everything is OK */ 154 SetPageUptodate(page); 155 if (PageError(page)) 156 ClearPageError(page); 157 break; 158 case -EFAULT: 159 /* In this case we were trying to read something that wasn't on 160 * disk yet - return a page full of zeroes. This should be OK, 161 * because the object should be empty (if there was a write 162 * before this read, the read would be waiting with the page 163 * locked */ 164 clear_highpage(page); 165 166 SetPageUptodate(page); 167 if (PageError(page)) 168 ClearPageError(page); 169 EXOFS_DBGMSG("recovered read error\n"); 170 /* fall through */ 171 case PAGE_WAS_NOT_IN_IO: 172 ret = 0; /* recovered error */ 173 break; 174 default: 175 SetPageError(page); 176 } 177 return ret; 178} 179 180static void update_write_page(struct page *page, int ret) 181{ 182 if (unlikely(ret == PAGE_WAS_NOT_IN_IO)) 183 return; /* don't pass start don't collect $200 */ 184 185 if (ret) { 186 mapping_set_error(page->mapping, ret); 187 SetPageError(page); 188 } 189 end_page_writeback(page); 190} 191 192/* Called at the end of reads, to optionally unlock pages and update their 193 * status. 194 */ 195static int __readpages_done(struct page_collect *pcol) 196{ 197 int i; 198 u64 good_bytes; 199 u64 length = 0; 200 int ret = ore_check_io(pcol->ios, NULL); 201 202 if (likely(!ret)) { 203 good_bytes = pcol->length; 204 ret = PAGE_WAS_NOT_IN_IO; 205 } else { 206 good_bytes = 0; 207 } 208 209 EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx" 210 " length=0x%lx nr_pages=%u\n", 211 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 212 pcol->nr_pages); 213 214 for (i = 0; i < pcol->nr_pages; i++) { 215 struct page *page = pcol->pages[i]; 216 struct inode *inode = page->mapping->host; 217 int page_stat; 218 219 if (inode != pcol->inode) 220 continue; /* osd might add more pages at end */ 221 222 if (likely(length < good_bytes)) 223 page_stat = 0; 224 else 225 page_stat = ret; 226 227 EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n", 228 inode->i_ino, page->index, 229 page_stat ? "bad_bytes" : "good_bytes"); 230 231 ret = update_read_page(page, page_stat); 232 if (!pcol->read_4_write) 233 unlock_page(page); 234 length += PAGE_SIZE; 235 } 236 237 pcol_free(pcol); 238 EXOFS_DBGMSG2("readpages_done END\n"); 239 return ret; 240} 241 242/* callback of async reads */ 243static void readpages_done(struct ore_io_state *ios, void *p) 244{ 245 struct page_collect *pcol = p; 246 247 __readpages_done(pcol); 248 atomic_dec(&pcol->sbi->s_curr_pending); 249 kfree(pcol); 250} 251 252static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) 253{ 254 int i; 255 256 for (i = 0; i < pcol->nr_pages; i++) { 257 struct page *page = pcol->pages[i]; 258 259 if (rw == READ) 260 update_read_page(page, ret); 261 else 262 update_write_page(page, ret); 263 264 unlock_page(page); 265 } 266} 267 268static int _maybe_not_all_in_one_io(struct ore_io_state *ios, 269 struct page_collect *pcol_src, struct page_collect *pcol) 270{ 271 /* length was wrong or offset was not page aligned */ 272 BUG_ON(pcol_src->nr_pages < ios->nr_pages); 273 274 if (pcol_src->nr_pages > ios->nr_pages) { 275 struct page **src_page; 276 unsigned pages_less = pcol_src->nr_pages - ios->nr_pages; 277 unsigned long len_less = pcol_src->length - ios->length; 278 unsigned i; 279 int ret; 280 281 /* This IO was trimmed */ 282 pcol_src->nr_pages = ios->nr_pages; 283 pcol_src->length = ios->length; 284 285 /* Left over pages are passed to the next io */ 286 pcol->expected_pages += pages_less; 287 pcol->nr_pages = pages_less; 288 pcol->length = len_less; 289 src_page = pcol_src->pages + pcol_src->nr_pages; 290 pcol->pg_first = (*src_page)->index; 291 292 ret = pcol_try_alloc(pcol); 293 if (unlikely(ret)) 294 return ret; 295 296 for (i = 0; i < pages_less; ++i) 297 pcol->pages[i] = *src_page++; 298 299 EXOFS_DBGMSG("Length was adjusted nr_pages=0x%x " 300 "pages_less=0x%x expected_pages=0x%x " 301 "next_offset=0x%llx next_len=0x%lx\n", 302 pcol_src->nr_pages, pages_less, pcol->expected_pages, 303 pcol->pg_first * PAGE_SIZE, pcol->length); 304 } 305 return 0; 306} 307 308static int read_exec(struct page_collect *pcol) 309{ 310 struct exofs_i_info *oi = exofs_i(pcol->inode); 311 struct ore_io_state *ios; 312 struct page_collect *pcol_copy = NULL; 313 int ret; 314 315 if (!pcol->pages) 316 return 0; 317 318 if (!pcol->ios) { 319 int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, true, 320 pcol->pg_first << PAGE_SHIFT, 321 pcol->length, &pcol->ios); 322 323 if (ret) 324 return ret; 325 } 326 327 ios = pcol->ios; 328 ios->pages = pcol->pages; 329 330 if (pcol->read_4_write) { 331 ore_read(pcol->ios); 332 return __readpages_done(pcol); 333 } 334 335 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 336 if (!pcol_copy) { 337 ret = -ENOMEM; 338 goto err; 339 } 340 341 *pcol_copy = *pcol; 342 ios->done = readpages_done; 343 ios->private = pcol_copy; 344 345 /* pages ownership was passed to pcol_copy */ 346 _pcol_reset(pcol); 347 348 ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol); 349 if (unlikely(ret)) 350 goto err; 351 352 EXOFS_DBGMSG2("read_exec(0x%lx) offset=0x%llx length=0x%llx\n", 353 pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length)); 354 355 ret = ore_read(ios); 356 if (unlikely(ret)) 357 goto err; 358 359 atomic_inc(&pcol->sbi->s_curr_pending); 360 361 return 0; 362 363err: 364 if (!pcol_copy) /* Failed before ownership transfer */ 365 pcol_copy = pcol; 366 _unlock_pcol_pages(pcol_copy, ret, READ); 367 pcol_free(pcol_copy); 368 kfree(pcol_copy); 369 370 return ret; 371} 372 373/* readpage_strip is called either directly from readpage() or by the VFS from 374 * within read_cache_pages(), to add one more page to be read. It will try to 375 * collect as many contiguous pages as posible. If a discontinuity is 376 * encountered, or it runs out of resources, it will submit the previous segment 377 * and will start a new collection. Eventually caller must submit the last 378 * segment if present. 379 */ 380static int readpage_strip(void *data, struct page *page) 381{ 382 struct page_collect *pcol = data; 383 struct inode *inode = pcol->inode; 384 struct exofs_i_info *oi = exofs_i(inode); 385 loff_t i_size = i_size_read(inode); 386 pgoff_t end_index = i_size >> PAGE_SHIFT; 387 size_t len; 388 int ret; 389 390 BUG_ON(!PageLocked(page)); 391 392 /* FIXME: Just for debugging, will be removed */ 393 if (PageUptodate(page)) 394 EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, 395 page->index); 396 397 pcol->that_locked_page = page; 398 399 if (page->index < end_index) 400 len = PAGE_SIZE; 401 else if (page->index == end_index) 402 len = i_size & ~PAGE_MASK; 403 else 404 len = 0; 405 406 if (!len || !obj_created(oi)) { 407 /* this will be out of bounds, or doesn't exist yet. 408 * Current page is cleared and the request is split 409 */ 410 clear_highpage(page); 411 412 SetPageUptodate(page); 413 if (PageError(page)) 414 ClearPageError(page); 415 416 if (!pcol->read_4_write) 417 unlock_page(page); 418 EXOFS_DBGMSG("readpage_strip(0x%lx) empty page len=%zx " 419 "read_4_write=%d index=0x%lx end_index=0x%lx " 420 "splitting\n", inode->i_ino, len, 421 pcol->read_4_write, page->index, end_index); 422 423 return read_exec(pcol); 424 } 425 426try_again: 427 428 if (unlikely(pcol->pg_first == -1)) { 429 pcol->pg_first = page->index; 430 } else if (unlikely((pcol->pg_first + pcol->nr_pages) != 431 page->index)) { 432 /* Discontinuity detected, split the request */ 433 ret = read_exec(pcol); 434 if (unlikely(ret)) 435 goto fail; 436 goto try_again; 437 } 438 439 if (!pcol->pages) { 440 ret = pcol_try_alloc(pcol); 441 if (unlikely(ret)) 442 goto fail; 443 } 444 445 if (len != PAGE_SIZE) 446 zero_user(page, len, PAGE_SIZE - len); 447 448 EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", 449 inode->i_ino, page->index, len); 450 451 ret = pcol_add_page(pcol, page, len); 452 if (ret) { 453 EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p " 454 "this_len=0x%zx nr_pages=%u length=0x%lx\n", 455 page, len, pcol->nr_pages, pcol->length); 456 457 /* split the request, and start again with current page */ 458 ret = read_exec(pcol); 459 if (unlikely(ret)) 460 goto fail; 461 462 goto try_again; 463 } 464 465 return 0; 466 467fail: 468 /* SetPageError(page); ??? */ 469 unlock_page(page); 470 return ret; 471} 472 473static int exofs_readpages(struct file *file, struct address_space *mapping, 474 struct list_head *pages, unsigned nr_pages) 475{ 476 struct page_collect pcol; 477 int ret; 478 479 _pcol_init(&pcol, nr_pages, mapping->host); 480 481 ret = read_cache_pages(mapping, pages, readpage_strip, &pcol); 482 if (ret) { 483 EXOFS_ERR("read_cache_pages => %d\n", ret); 484 return ret; 485 } 486 487 ret = read_exec(&pcol); 488 if (unlikely(ret)) 489 return ret; 490 491 return read_exec(&pcol); 492} 493 494static int _readpage(struct page *page, bool read_4_write) 495{ 496 struct page_collect pcol; 497 int ret; 498 499 _pcol_init(&pcol, 1, page->mapping->host); 500 501 pcol.read_4_write = read_4_write; 502 ret = readpage_strip(&pcol, page); 503 if (ret) { 504 EXOFS_ERR("_readpage => %d\n", ret); 505 return ret; 506 } 507 508 return read_exec(&pcol); 509} 510 511/* 512 * We don't need the file 513 */ 514static int exofs_readpage(struct file *file, struct page *page) 515{ 516 return _readpage(page, false); 517} 518 519/* Callback for osd_write. All writes are asynchronous */ 520static void writepages_done(struct ore_io_state *ios, void *p) 521{ 522 struct page_collect *pcol = p; 523 int i; 524 u64 good_bytes; 525 u64 length = 0; 526 int ret = ore_check_io(ios, NULL); 527 528 atomic_dec(&pcol->sbi->s_curr_pending); 529 530 if (likely(!ret)) { 531 good_bytes = pcol->length; 532 ret = PAGE_WAS_NOT_IN_IO; 533 } else { 534 good_bytes = 0; 535 } 536 537 EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx" 538 " length=0x%lx nr_pages=%u\n", 539 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 540 pcol->nr_pages); 541 542 for (i = 0; i < pcol->nr_pages; i++) { 543 struct page *page = pcol->pages[i]; 544 struct inode *inode = page->mapping->host; 545 int page_stat; 546 547 if (inode != pcol->inode) 548 continue; /* osd might add more pages to a bio */ 549 550 if (likely(length < good_bytes)) 551 page_stat = 0; 552 else 553 page_stat = ret; 554 555 update_write_page(page, page_stat); 556 unlock_page(page); 557 EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", 558 inode->i_ino, page->index, page_stat); 559 560 length += PAGE_SIZE; 561 } 562 563 pcol_free(pcol); 564 kfree(pcol); 565 EXOFS_DBGMSG2("writepages_done END\n"); 566} 567 568static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) 569{ 570 struct page_collect *pcol = priv; 571 pgoff_t index = offset / PAGE_SIZE; 572 573 if (!pcol->that_locked_page || 574 (pcol->that_locked_page->index != index)) { 575 struct page *page; 576 loff_t i_size = i_size_read(pcol->inode); 577 578 if (offset >= i_size) { 579 *uptodate = true; 580 EXOFS_DBGMSG2("offset >= i_size index=0x%lx\n", index); 581 return ZERO_PAGE(0); 582 } 583 584 page = find_get_page(pcol->inode->i_mapping, index); 585 if (!page) { 586 page = find_or_create_page(pcol->inode->i_mapping, 587 index, GFP_NOFS); 588 if (unlikely(!page)) { 589 EXOFS_DBGMSG("grab_cache_page Failed " 590 "index=0x%llx\n", _LLU(index)); 591 return NULL; 592 } 593 unlock_page(page); 594 } 595 *uptodate = PageUptodate(page); 596 EXOFS_DBGMSG2("index=0x%lx uptodate=%d\n", index, *uptodate); 597 return page; 598 } else { 599 EXOFS_DBGMSG2("YES that_locked_page index=0x%lx\n", 600 pcol->that_locked_page->index); 601 *uptodate = true; 602 return pcol->that_locked_page; 603 } 604} 605 606static void __r4w_put_page(void *priv, struct page *page) 607{ 608 struct page_collect *pcol = priv; 609 610 if ((pcol->that_locked_page != page) && (ZERO_PAGE(0) != page)) { 611 EXOFS_DBGMSG2("index=0x%lx\n", page->index); 612 put_page(page); 613 return; 614 } 615 EXOFS_DBGMSG2("that_locked_page index=0x%lx\n", 616 ZERO_PAGE(0) == page ? -1 : page->index); 617} 618 619static const struct _ore_r4w_op _r4w_op = { 620 .get_page = &__r4w_get_page, 621 .put_page = &__r4w_put_page, 622}; 623 624static int write_exec(struct page_collect *pcol) 625{ 626 struct exofs_i_info *oi = exofs_i(pcol->inode); 627 struct ore_io_state *ios; 628 struct page_collect *pcol_copy = NULL; 629 int ret; 630 631 if (!pcol->pages) 632 return 0; 633 634 BUG_ON(pcol->ios); 635 ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, false, 636 pcol->pg_first << PAGE_SHIFT, 637 pcol->length, &pcol->ios); 638 if (unlikely(ret)) 639 goto err; 640 641 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 642 if (!pcol_copy) { 643 EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n"); 644 ret = -ENOMEM; 645 goto err; 646 } 647 648 *pcol_copy = *pcol; 649 650 ios = pcol->ios; 651 ios->pages = pcol_copy->pages; 652 ios->done = writepages_done; 653 ios->r4w = &_r4w_op; 654 ios->private = pcol_copy; 655 656 /* pages ownership was passed to pcol_copy */ 657 _pcol_reset(pcol); 658 659 ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol); 660 if (unlikely(ret)) 661 goto err; 662 663 EXOFS_DBGMSG2("write_exec(0x%lx) offset=0x%llx length=0x%llx\n", 664 pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length)); 665 666 ret = ore_write(ios); 667 if (unlikely(ret)) { 668 EXOFS_ERR("write_exec: ore_write() Failed\n"); 669 goto err; 670 } 671 672 atomic_inc(&pcol->sbi->s_curr_pending); 673 return 0; 674 675err: 676 if (!pcol_copy) /* Failed before ownership transfer */ 677 pcol_copy = pcol; 678 _unlock_pcol_pages(pcol_copy, ret, WRITE); 679 pcol_free(pcol_copy); 680 kfree(pcol_copy); 681 682 return ret; 683} 684 685/* writepage_strip is called either directly from writepage() or by the VFS from 686 * within write_cache_pages(), to add one more page to be written to storage. 687 * It will try to collect as many contiguous pages as possible. If a 688 * discontinuity is encountered or it runs out of resources it will submit the 689 * previous segment and will start a new collection. 690 * Eventually caller must submit the last segment if present. 691 */ 692static int writepage_strip(struct page *page, 693 struct writeback_control *wbc_unused, void *data) 694{ 695 struct page_collect *pcol = data; 696 struct inode *inode = pcol->inode; 697 struct exofs_i_info *oi = exofs_i(inode); 698 loff_t i_size = i_size_read(inode); 699 pgoff_t end_index = i_size >> PAGE_SHIFT; 700 size_t len; 701 int ret; 702 703 BUG_ON(!PageLocked(page)); 704 705 ret = wait_obj_created(oi); 706 if (unlikely(ret)) 707 goto fail; 708 709 if (page->index < end_index) 710 /* in this case, the page is within the limits of the file */ 711 len = PAGE_SIZE; 712 else { 713 len = i_size & ~PAGE_MASK; 714 715 if (page->index > end_index || !len) { 716 /* in this case, the page is outside the limits 717 * (truncate in progress) 718 */ 719 ret = write_exec(pcol); 720 if (unlikely(ret)) 721 goto fail; 722 if (PageError(page)) 723 ClearPageError(page); 724 unlock_page(page); 725 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) " 726 "outside the limits\n", 727 inode->i_ino, page->index); 728 return 0; 729 } 730 } 731 732try_again: 733 734 if (unlikely(pcol->pg_first == -1)) { 735 pcol->pg_first = page->index; 736 } else if (unlikely((pcol->pg_first + pcol->nr_pages) != 737 page->index)) { 738 /* Discontinuity detected, split the request */ 739 ret = write_exec(pcol); 740 if (unlikely(ret)) 741 goto fail; 742 743 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n", 744 inode->i_ino, page->index); 745 goto try_again; 746 } 747 748 if (!pcol->pages) { 749 ret = pcol_try_alloc(pcol); 750 if (unlikely(ret)) 751 goto fail; 752 } 753 754 EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", 755 inode->i_ino, page->index, len); 756 757 ret = pcol_add_page(pcol, page, len); 758 if (unlikely(ret)) { 759 EXOFS_DBGMSG2("Failed pcol_add_page " 760 "nr_pages=%u total_length=0x%lx\n", 761 pcol->nr_pages, pcol->length); 762 763 /* split the request, next loop will start again */ 764 ret = write_exec(pcol); 765 if (unlikely(ret)) { 766 EXOFS_DBGMSG("write_exec failed => %d", ret); 767 goto fail; 768 } 769 770 goto try_again; 771 } 772 773 BUG_ON(PageWriteback(page)); 774 set_page_writeback(page); 775 776 return 0; 777 778fail: 779 EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n", 780 inode->i_ino, page->index, ret); 781 mapping_set_error(page->mapping, -EIO); 782 unlock_page(page); 783 return ret; 784} 785 786static int exofs_writepages(struct address_space *mapping, 787 struct writeback_control *wbc) 788{ 789 struct page_collect pcol; 790 long start, end, expected_pages; 791 int ret; 792 793 start = wbc->range_start >> PAGE_SHIFT; 794 end = (wbc->range_end == LLONG_MAX) ? 795 start + mapping->nrpages : 796 wbc->range_end >> PAGE_SHIFT; 797 798 if (start || end) 799 expected_pages = end - start + 1; 800 else 801 expected_pages = mapping->nrpages; 802 803 if (expected_pages < 32L) 804 expected_pages = 32L; 805 806 EXOFS_DBGMSG2("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx " 807 "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n", 808 mapping->host->i_ino, wbc->range_start, wbc->range_end, 809 mapping->nrpages, start, end, expected_pages); 810 811 _pcol_init(&pcol, expected_pages, mapping->host); 812 813 ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol); 814 if (unlikely(ret)) { 815 EXOFS_ERR("write_cache_pages => %d\n", ret); 816 return ret; 817 } 818 819 ret = write_exec(&pcol); 820 if (unlikely(ret)) 821 return ret; 822 823 if (wbc->sync_mode == WB_SYNC_ALL) { 824 return write_exec(&pcol); /* pump the last reminder */ 825 } else if (pcol.nr_pages) { 826 /* not SYNC let the reminder join the next writeout */ 827 unsigned i; 828 829 for (i = 0; i < pcol.nr_pages; i++) { 830 struct page *page = pcol.pages[i]; 831 832 end_page_writeback(page); 833 set_page_dirty(page); 834 unlock_page(page); 835 } 836 } 837 return 0; 838} 839 840/* 841static int exofs_writepage(struct page *page, struct writeback_control *wbc) 842{ 843 struct page_collect pcol; 844 int ret; 845 846 _pcol_init(&pcol, 1, page->mapping->host); 847 848 ret = writepage_strip(page, NULL, &pcol); 849 if (ret) { 850 EXOFS_ERR("exofs_writepage => %d\n", ret); 851 return ret; 852 } 853 854 return write_exec(&pcol); 855} 856*/ 857/* i_mutex held using inode->i_size directly */ 858static void _write_failed(struct inode *inode, loff_t to) 859{ 860 if (to > inode->i_size) 861 truncate_pagecache(inode, inode->i_size); 862} 863 864int exofs_write_begin(struct file *file, struct address_space *mapping, 865 loff_t pos, unsigned len, unsigned flags, 866 struct page **pagep, void **fsdata) 867{ 868 int ret = 0; 869 struct page *page; 870 871 page = *pagep; 872 if (page == NULL) { 873 page = grab_cache_page_write_begin(mapping, pos >> PAGE_SHIFT, 874 flags); 875 if (!page) { 876 EXOFS_DBGMSG("grab_cache_page_write_begin failed\n"); 877 return -ENOMEM; 878 } 879 *pagep = page; 880 } 881 882 /* read modify write */ 883 if (!PageUptodate(page) && (len != PAGE_SIZE)) { 884 loff_t i_size = i_size_read(mapping->host); 885 pgoff_t end_index = i_size >> PAGE_SHIFT; 886 887 if (page->index > end_index) { 888 clear_highpage(page); 889 SetPageUptodate(page); 890 } else { 891 ret = _readpage(page, true); 892 if (ret) { 893 unlock_page(page); 894 EXOFS_DBGMSG("__readpage failed\n"); 895 } 896 } 897 } 898 return ret; 899} 900 901static int exofs_write_begin_export(struct file *file, 902 struct address_space *mapping, 903 loff_t pos, unsigned len, unsigned flags, 904 struct page **pagep, void **fsdata) 905{ 906 *pagep = NULL; 907 908 return exofs_write_begin(file, mapping, pos, len, flags, pagep, 909 fsdata); 910} 911 912static int exofs_write_end(struct file *file, struct address_space *mapping, 913 loff_t pos, unsigned len, unsigned copied, 914 struct page *page, void *fsdata) 915{ 916 struct inode *inode = mapping->host; 917 loff_t last_pos = pos + copied; 918 919 if (!PageUptodate(page)) { 920 if (copied < len) { 921 _write_failed(inode, pos + len); 922 copied = 0; 923 goto out; 924 } 925 SetPageUptodate(page); 926 } 927 if (last_pos > inode->i_size) { 928 i_size_write(inode, last_pos); 929 mark_inode_dirty(inode); 930 } 931 set_page_dirty(page); 932out: 933 unlock_page(page); 934 put_page(page); 935 return copied; 936} 937 938static int exofs_releasepage(struct page *page, gfp_t gfp) 939{ 940 EXOFS_DBGMSG("page 0x%lx\n", page->index); 941 WARN_ON(1); 942 return 0; 943} 944 945static void exofs_invalidatepage(struct page *page, unsigned int offset, 946 unsigned int length) 947{ 948 EXOFS_DBGMSG("page 0x%lx offset 0x%x length 0x%x\n", 949 page->index, offset, length); 950 WARN_ON(1); 951} 952 953 954 /* TODO: Should be easy enough to do proprly */ 955static ssize_t exofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 956{ 957 return 0; 958} 959 960const struct address_space_operations exofs_aops = { 961 .readpage = exofs_readpage, 962 .readpages = exofs_readpages, 963 .writepage = NULL, 964 .writepages = exofs_writepages, 965 .write_begin = exofs_write_begin_export, 966 .write_end = exofs_write_end, 967 .releasepage = exofs_releasepage, 968 .set_page_dirty = __set_page_dirty_nobuffers, 969 .invalidatepage = exofs_invalidatepage, 970 971 /* Not implemented Yet */ 972 .bmap = NULL, /* TODO: use osd's OSD_ACT_READ_MAP */ 973 .direct_IO = exofs_direct_IO, 974 975 /* With these NULL has special meaning or default is not exported */ 976 .migratepage = NULL, 977 .launder_page = NULL, 978 .is_partially_uptodate = NULL, 979 .error_remove_page = NULL, 980}; 981 982/****************************************************************************** 983 * INODE OPERATIONS 984 *****************************************************************************/ 985 986/* 987 * Test whether an inode is a fast symlink. 988 */ 989static inline int exofs_inode_is_fast_symlink(struct inode *inode) 990{ 991 struct exofs_i_info *oi = exofs_i(inode); 992 993 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); 994} 995 996static int _do_truncate(struct inode *inode, loff_t newsize) 997{ 998 struct exofs_i_info *oi = exofs_i(inode); 999 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1000 int ret; 1001 1002 inode->i_mtime = inode->i_ctime = current_time(inode); 1003 1004 ret = ore_truncate(&sbi->layout, &oi->oc, (u64)newsize); 1005 if (likely(!ret)) 1006 truncate_setsize(inode, newsize); 1007 1008 EXOFS_DBGMSG2("(0x%lx) size=0x%llx ret=>%d\n", 1009 inode->i_ino, newsize, ret); 1010 return ret; 1011} 1012 1013/* 1014 * Set inode attributes - update size attribute on OSD if needed, 1015 * otherwise just call generic functions. 1016 */ 1017int exofs_setattr(struct dentry *dentry, struct iattr *iattr) 1018{ 1019 struct inode *inode = d_inode(dentry); 1020 int error; 1021 1022 /* if we are about to modify an object, and it hasn't been 1023 * created yet, wait 1024 */ 1025 error = wait_obj_created(exofs_i(inode)); 1026 if (unlikely(error)) 1027 return error; 1028 1029 error = setattr_prepare(dentry, iattr); 1030 if (unlikely(error)) 1031 return error; 1032 1033 if ((iattr->ia_valid & ATTR_SIZE) && 1034 iattr->ia_size != i_size_read(inode)) { 1035 error = _do_truncate(inode, iattr->ia_size); 1036 if (unlikely(error)) 1037 return error; 1038 } 1039 1040 setattr_copy(inode, iattr); 1041 mark_inode_dirty(inode); 1042 return 0; 1043} 1044 1045static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF( 1046 EXOFS_APAGE_FS_DATA, 1047 EXOFS_ATTR_INODE_FILE_LAYOUT, 1048 0); 1049static const struct osd_attr g_attr_inode_dir_layout = ATTR_DEF( 1050 EXOFS_APAGE_FS_DATA, 1051 EXOFS_ATTR_INODE_DIR_LAYOUT, 1052 0); 1053 1054/* 1055 * Read the Linux inode info from the OSD, and return it as is. In exofs the 1056 * inode info is in an application specific page/attribute of the osd-object. 1057 */ 1058static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, 1059 struct exofs_fcb *inode) 1060{ 1061 struct exofs_sb_info *sbi = sb->s_fs_info; 1062 struct osd_attr attrs[] = { 1063 [0] = g_attr_inode_data, 1064 [1] = g_attr_inode_file_layout, 1065 [2] = g_attr_inode_dir_layout, 1066 }; 1067 struct ore_io_state *ios; 1068 struct exofs_on_disk_inode_layout *layout; 1069 int ret; 1070 1071 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1072 if (unlikely(ret)) { 1073 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); 1074 return ret; 1075 } 1076 1077 attrs[1].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs); 1078 attrs[2].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs); 1079 1080 ios->in_attr = attrs; 1081 ios->in_attr_len = ARRAY_SIZE(attrs); 1082 1083 ret = ore_read(ios); 1084 if (unlikely(ret)) { 1085 EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", 1086 _LLU(oi->one_comp.obj.id), ret); 1087 memset(inode, 0, sizeof(*inode)); 1088 inode->i_mode = 0040000 | (0777 & ~022); 1089 /* If object is lost on target we might as well enable it's 1090 * delete. 1091 */ 1092 ret = 0; 1093 goto out; 1094 } 1095 1096 ret = extract_attr_from_ios(ios, &attrs[0]); 1097 if (ret) { 1098 EXOFS_ERR("%s: extract_attr 0 of inode failed\n", __func__); 1099 goto out; 1100 } 1101 WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE); 1102 memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE); 1103 1104 ret = extract_attr_from_ios(ios, &attrs[1]); 1105 if (ret) { 1106 EXOFS_ERR("%s: extract_attr 1 of inode failed\n", __func__); 1107 goto out; 1108 } 1109 if (attrs[1].len) { 1110 layout = attrs[1].val_ptr; 1111 if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { 1112 EXOFS_ERR("%s: unsupported files layout %d\n", 1113 __func__, layout->gen_func); 1114 ret = -ENOTSUPP; 1115 goto out; 1116 } 1117 } 1118 1119 ret = extract_attr_from_ios(ios, &attrs[2]); 1120 if (ret) { 1121 EXOFS_ERR("%s: extract_attr 2 of inode failed\n", __func__); 1122 goto out; 1123 } 1124 if (attrs[2].len) { 1125 layout = attrs[2].val_ptr; 1126 if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { 1127 EXOFS_ERR("%s: unsupported meta-data layout %d\n", 1128 __func__, layout->gen_func); 1129 ret = -ENOTSUPP; 1130 goto out; 1131 } 1132 } 1133 1134out: 1135 ore_put_io_state(ios); 1136 return ret; 1137} 1138 1139static void __oi_init(struct exofs_i_info *oi) 1140{ 1141 init_waitqueue_head(&oi->i_wq); 1142 oi->i_flags = 0; 1143} 1144/* 1145 * Fill in an inode read from the OSD and set it up for use 1146 */ 1147struct inode *exofs_iget(struct super_block *sb, unsigned long ino) 1148{ 1149 struct exofs_i_info *oi; 1150 struct exofs_fcb fcb; 1151 struct inode *inode; 1152 int ret; 1153 1154 inode = iget_locked(sb, ino); 1155 if (!inode) 1156 return ERR_PTR(-ENOMEM); 1157 if (!(inode->i_state & I_NEW)) 1158 return inode; 1159 oi = exofs_i(inode); 1160 __oi_init(oi); 1161 exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info, 1162 exofs_oi_objno(oi)); 1163 1164 /* read the inode from the osd */ 1165 ret = exofs_get_inode(sb, oi, &fcb); 1166 if (ret) 1167 goto bad_inode; 1168 1169 set_obj_created(oi); 1170 1171 /* copy stuff from on-disk struct to in-memory struct */ 1172 inode->i_mode = le16_to_cpu(fcb.i_mode); 1173 i_uid_write(inode, le32_to_cpu(fcb.i_uid)); 1174 i_gid_write(inode, le32_to_cpu(fcb.i_gid)); 1175 set_nlink(inode, le16_to_cpu(fcb.i_links_count)); 1176 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); 1177 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); 1178 inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime); 1179 inode->i_ctime.tv_nsec = 1180 inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0; 1181 oi->i_commit_size = le64_to_cpu(fcb.i_size); 1182 i_size_write(inode, oi->i_commit_size); 1183 inode->i_blkbits = EXOFS_BLKSHIFT; 1184 inode->i_generation = le32_to_cpu(fcb.i_generation); 1185 1186 oi->i_dir_start_lookup = 0; 1187 1188 if ((inode->i_nlink == 0) && (inode->i_mode == 0)) { 1189 ret = -ESTALE; 1190 goto bad_inode; 1191 } 1192 1193 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 1194 if (fcb.i_data[0]) 1195 inode->i_rdev = 1196 old_decode_dev(le32_to_cpu(fcb.i_data[0])); 1197 else 1198 inode->i_rdev = 1199 new_decode_dev(le32_to_cpu(fcb.i_data[1])); 1200 } else { 1201 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1202 } 1203 1204 if (S_ISREG(inode->i_mode)) { 1205 inode->i_op = &exofs_file_inode_operations; 1206 inode->i_fop = &exofs_file_operations; 1207 inode->i_mapping->a_ops = &exofs_aops; 1208 } else if (S_ISDIR(inode->i_mode)) { 1209 inode->i_op = &exofs_dir_inode_operations; 1210 inode->i_fop = &exofs_dir_operations; 1211 inode->i_mapping->a_ops = &exofs_aops; 1212 } else if (S_ISLNK(inode->i_mode)) { 1213 if (exofs_inode_is_fast_symlink(inode)) { 1214 inode->i_op = &simple_symlink_inode_operations; 1215 inode->i_link = (char *)oi->i_data; 1216 } else { 1217 inode->i_op = &page_symlink_inode_operations; 1218 inode_nohighmem(inode); 1219 inode->i_mapping->a_ops = &exofs_aops; 1220 } 1221 } else { 1222 inode->i_op = &exofs_special_inode_operations; 1223 if (fcb.i_data[0]) 1224 init_special_inode(inode, inode->i_mode, 1225 old_decode_dev(le32_to_cpu(fcb.i_data[0]))); 1226 else 1227 init_special_inode(inode, inode->i_mode, 1228 new_decode_dev(le32_to_cpu(fcb.i_data[1]))); 1229 } 1230 1231 unlock_new_inode(inode); 1232 return inode; 1233 1234bad_inode: 1235 iget_failed(inode); 1236 return ERR_PTR(ret); 1237} 1238 1239int __exofs_wait_obj_created(struct exofs_i_info *oi) 1240{ 1241 if (!obj_created(oi)) { 1242 EXOFS_DBGMSG("!obj_created\n"); 1243 BUG_ON(!obj_2bcreated(oi)); 1244 wait_event(oi->i_wq, obj_created(oi)); 1245 EXOFS_DBGMSG("wait_event done\n"); 1246 } 1247 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; 1248} 1249 1250/* 1251 * Callback function from exofs_new_inode(). The important thing is that we 1252 * set the obj_created flag so that other methods know that the object exists on 1253 * the OSD. 1254 */ 1255static void create_done(struct ore_io_state *ios, void *p) 1256{ 1257 struct inode *inode = p; 1258 struct exofs_i_info *oi = exofs_i(inode); 1259 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1260 int ret; 1261 1262 ret = ore_check_io(ios, NULL); 1263 ore_put_io_state(ios); 1264 1265 atomic_dec(&sbi->s_curr_pending); 1266 1267 if (unlikely(ret)) { 1268 EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx", 1269 _LLU(exofs_oi_objno(oi)), 1270 _LLU(oi->one_comp.obj.partition)); 1271 /*TODO: When FS is corrupted creation can fail, object already 1272 * exist. Get rid of this asynchronous creation, if exist 1273 * increment the obj counter and try the next object. Until we 1274 * succeed. All these dangling objects will be made into lost 1275 * files by chkfs.exofs 1276 */ 1277 } 1278 1279 set_obj_created(oi); 1280 1281 wake_up(&oi->i_wq); 1282} 1283 1284/* 1285 * Set up a new inode and create an object for it on the OSD 1286 */ 1287struct inode *exofs_new_inode(struct inode *dir, umode_t mode) 1288{ 1289 struct super_block *sb = dir->i_sb; 1290 struct exofs_sb_info *sbi = sb->s_fs_info; 1291 struct inode *inode; 1292 struct exofs_i_info *oi; 1293 struct ore_io_state *ios; 1294 int ret; 1295 1296 inode = new_inode(sb); 1297 if (!inode) 1298 return ERR_PTR(-ENOMEM); 1299 1300 oi = exofs_i(inode); 1301 __oi_init(oi); 1302 1303 set_obj_2bcreated(oi); 1304 1305 inode_init_owner(inode, dir, mode); 1306 inode->i_ino = sbi->s_nextid++; 1307 inode->i_blkbits = EXOFS_BLKSHIFT; 1308 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); 1309 oi->i_commit_size = inode->i_size = 0; 1310 spin_lock(&sbi->s_next_gen_lock); 1311 inode->i_generation = sbi->s_next_generation++; 1312 spin_unlock(&sbi->s_next_gen_lock); 1313 insert_inode_hash(inode); 1314 1315 exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info, 1316 exofs_oi_objno(oi)); 1317 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ 1318 1319 mark_inode_dirty(inode); 1320 1321 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1322 if (unlikely(ret)) { 1323 EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n"); 1324 return ERR_PTR(ret); 1325 } 1326 1327 ios->done = create_done; 1328 ios->private = inode; 1329 1330 ret = ore_create(ios); 1331 if (ret) { 1332 ore_put_io_state(ios); 1333 return ERR_PTR(ret); 1334 } 1335 atomic_inc(&sbi->s_curr_pending); 1336 1337 return inode; 1338} 1339 1340/* 1341 * struct to pass two arguments to update_inode's callback 1342 */ 1343struct updatei_args { 1344 struct exofs_sb_info *sbi; 1345 struct exofs_fcb fcb; 1346}; 1347 1348/* 1349 * Callback function from exofs_update_inode(). 1350 */ 1351static void updatei_done(struct ore_io_state *ios, void *p) 1352{ 1353 struct updatei_args *args = p; 1354 1355 ore_put_io_state(ios); 1356 1357 atomic_dec(&args->sbi->s_curr_pending); 1358 1359 kfree(args); 1360} 1361 1362/* 1363 * Write the inode to the OSD. Just fill up the struct, and set the attribute 1364 * synchronously or asynchronously depending on the do_sync flag. 1365 */ 1366static int exofs_update_inode(struct inode *inode, int do_sync) 1367{ 1368 struct exofs_i_info *oi = exofs_i(inode); 1369 struct super_block *sb = inode->i_sb; 1370 struct exofs_sb_info *sbi = sb->s_fs_info; 1371 struct ore_io_state *ios; 1372 struct osd_attr attr; 1373 struct exofs_fcb *fcb; 1374 struct updatei_args *args; 1375 int ret; 1376 1377 args = kzalloc(sizeof(*args), GFP_KERNEL); 1378 if (!args) { 1379 EXOFS_DBGMSG("Failed kzalloc of args\n"); 1380 return -ENOMEM; 1381 } 1382 1383 fcb = &args->fcb; 1384 1385 fcb->i_mode = cpu_to_le16(inode->i_mode); 1386 fcb->i_uid = cpu_to_le32(i_uid_read(inode)); 1387 fcb->i_gid = cpu_to_le32(i_gid_read(inode)); 1388 fcb->i_links_count = cpu_to_le16(inode->i_nlink); 1389 fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); 1390 fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec); 1391 fcb->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); 1392 oi->i_commit_size = i_size_read(inode); 1393 fcb->i_size = cpu_to_le64(oi->i_commit_size); 1394 fcb->i_generation = cpu_to_le32(inode->i_generation); 1395 1396 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 1397 if (old_valid_dev(inode->i_rdev)) { 1398 fcb->i_data[0] = 1399 cpu_to_le32(old_encode_dev(inode->i_rdev)); 1400 fcb->i_data[1] = 0; 1401 } else { 1402 fcb->i_data[0] = 0; 1403 fcb->i_data[1] = 1404 cpu_to_le32(new_encode_dev(inode->i_rdev)); 1405 fcb->i_data[2] = 0; 1406 } 1407 } else 1408 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1409 1410 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1411 if (unlikely(ret)) { 1412 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); 1413 goto free_args; 1414 } 1415 1416 attr = g_attr_inode_data; 1417 attr.val_ptr = fcb; 1418 ios->out_attr_len = 1; 1419 ios->out_attr = &attr; 1420 1421 wait_obj_created(oi); 1422 1423 if (!do_sync) { 1424 args->sbi = sbi; 1425 ios->done = updatei_done; 1426 ios->private = args; 1427 } 1428 1429 ret = ore_write(ios); 1430 if (!do_sync && !ret) { 1431 atomic_inc(&sbi->s_curr_pending); 1432 goto out; /* deallocation in updatei_done */ 1433 } 1434 1435 ore_put_io_state(ios); 1436free_args: 1437 kfree(args); 1438out: 1439 EXOFS_DBGMSG("(0x%lx) do_sync=%d ret=>%d\n", 1440 inode->i_ino, do_sync, ret); 1441 return ret; 1442} 1443 1444int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) 1445{ 1446 /* FIXME: fix fsync and use wbc->sync_mode == WB_SYNC_ALL */ 1447 return exofs_update_inode(inode, 1); 1448} 1449 1450/* 1451 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1452 * do. 1453 */ 1454static void delete_done(struct ore_io_state *ios, void *p) 1455{ 1456 struct exofs_sb_info *sbi = p; 1457 1458 ore_put_io_state(ios); 1459 1460 atomic_dec(&sbi->s_curr_pending); 1461} 1462 1463/* 1464 * Called when the refcount of an inode reaches zero. We remove the object 1465 * from the OSD here. We make sure the object was created before we try and 1466 * delete it. 1467 */ 1468void exofs_evict_inode(struct inode *inode) 1469{ 1470 struct exofs_i_info *oi = exofs_i(inode); 1471 struct super_block *sb = inode->i_sb; 1472 struct exofs_sb_info *sbi = sb->s_fs_info; 1473 struct ore_io_state *ios; 1474 int ret; 1475 1476 truncate_inode_pages_final(&inode->i_data); 1477 1478 /* TODO: should do better here */ 1479 if (inode->i_nlink || is_bad_inode(inode)) 1480 goto no_delete; 1481 1482 inode->i_size = 0; 1483 clear_inode(inode); 1484 1485 /* if we are deleting an obj that hasn't been created yet, wait. 1486 * This also makes sure that create_done cannot be called with an 1487 * already evicted inode. 1488 */ 1489 wait_obj_created(oi); 1490 /* ignore the error, attempt a remove anyway */ 1491 1492 /* Now Remove the OSD objects */ 1493 ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); 1494 if (unlikely(ret)) { 1495 EXOFS_ERR("%s: ore_get_io_state failed\n", __func__); 1496 return; 1497 } 1498 1499 ios->done = delete_done; 1500 ios->private = sbi; 1501 1502 ret = ore_remove(ios); 1503 if (ret) { 1504 EXOFS_ERR("%s: ore_remove failed\n", __func__); 1505 ore_put_io_state(ios); 1506 return; 1507 } 1508 atomic_inc(&sbi->s_curr_pending); 1509 1510 return; 1511 1512no_delete: 1513 clear_inode(inode); 1514}