Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at 4dfd459b738cf1f65b3eac4e0a9b19bc93cc91c6 1314 lines 32 kB view raw
1/* 2 * Copyright (C) 2005, 2006 3 * Avishay Traeger (avishay@gmail.com) 4 * Copyright (C) 2008, 2009 5 * Boaz Harrosh <bharrosh@panasas.com> 6 * 7 * Copyrights for code taken from ext2: 8 * Copyright (C) 1992, 1993, 1994, 1995 9 * Remy Card (card@masi.ibp.fr) 10 * Laboratoire MASI - Institut Blaise Pascal 11 * Universite Pierre et Marie Curie (Paris VI) 12 * from 13 * linux/fs/minix/inode.c 14 * Copyright (C) 1991, 1992 Linus Torvalds 15 * 16 * This file is part of exofs. 17 * 18 * exofs is free software; you can redistribute it and/or modify 19 * it under the terms of the GNU General Public License as published by 20 * the Free Software Foundation. Since it is based on ext2, and the only 21 * valid version of GPL for the Linux kernel is version 2, the only valid 22 * version of GPL for exofs is version 2. 23 * 24 * exofs is distributed in the hope that it will be useful, 25 * but WITHOUT ANY WARRANTY; without even the implied warranty of 26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 27 * GNU General Public License for more details. 28 * 29 * You should have received a copy of the GNU General Public License 30 * along with exofs; if not, write to the Free Software 31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 32 */ 33 34#include <linux/writeback.h> 35#include <linux/buffer_head.h> 36#include <scsi/scsi_device.h> 37 38#include "exofs.h" 39 40#define EXOFS_DBGMSG2(M...) do {} while (0) 41 42enum { BIO_MAX_PAGES_KMALLOC = 43 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), 44}; 45 46struct page_collect { 47 struct exofs_sb_info *sbi; 48 struct request_queue *req_q; 49 struct inode *inode; 50 unsigned expected_pages; 51 struct exofs_io_state *ios; 52 53 struct bio *bio; 54 unsigned nr_pages; 55 unsigned long length; 56 loff_t pg_first; /* keep 64bit also in 32-arches */ 57}; 58 59static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, 60 struct inode *inode) 61{ 62 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 63 64 pcol->sbi = sbi; 65 /* Create master bios on first Q, later on cloning, each clone will be 66 * allocated on it's destination Q 67 */ 68 pcol->req_q = osd_request_queue(sbi->s_ods[0]); 69 pcol->inode = inode; 70 pcol->expected_pages = expected_pages; 71 72 pcol->ios = NULL; 73 pcol->bio = NULL; 74 pcol->nr_pages = 0; 75 pcol->length = 0; 76 pcol->pg_first = -1; 77} 78 79static void _pcol_reset(struct page_collect *pcol) 80{ 81 pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); 82 83 pcol->bio = NULL; 84 pcol->nr_pages = 0; 85 pcol->length = 0; 86 pcol->pg_first = -1; 87 pcol->ios = NULL; 88 89 /* this is probably the end of the loop but in writes 90 * it might not end here. don't be left with nothing 91 */ 92 if (!pcol->expected_pages) 93 pcol->expected_pages = BIO_MAX_PAGES_KMALLOC; 94} 95 96static int pcol_try_alloc(struct page_collect *pcol) 97{ 98 int pages = min_t(unsigned, pcol->expected_pages, 99 BIO_MAX_PAGES_KMALLOC); 100 101 if (!pcol->ios) { /* First time allocate io_state */ 102 int ret = exofs_get_io_state(pcol->sbi, &pcol->ios); 103 104 if (ret) 105 return ret; 106 } 107 108 for (; pages; pages >>= 1) { 109 pcol->bio = bio_kmalloc(GFP_KERNEL, pages); 110 if (likely(pcol->bio)) 111 return 0; 112 } 113 114 EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n", 115 pcol->expected_pages); 116 return -ENOMEM; 117} 118 119static void pcol_free(struct page_collect *pcol) 120{ 121 if (pcol->bio) { 122 bio_put(pcol->bio); 123 pcol->bio = NULL; 124 } 125 126 if (pcol->ios) { 127 exofs_put_io_state(pcol->ios); 128 pcol->ios = NULL; 129 } 130} 131 132static int pcol_add_page(struct page_collect *pcol, struct page *page, 133 unsigned len) 134{ 135 int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0); 136 if (unlikely(len != added_len)) 137 return -ENOMEM; 138 139 ++pcol->nr_pages; 140 pcol->length += len; 141 return 0; 142} 143 144static int update_read_page(struct page *page, int ret) 145{ 146 if (ret == 0) { 147 /* Everything is OK */ 148 SetPageUptodate(page); 149 if (PageError(page)) 150 ClearPageError(page); 151 } else if (ret == -EFAULT) { 152 /* In this case we were trying to read something that wasn't on 153 * disk yet - return a page full of zeroes. This should be OK, 154 * because the object should be empty (if there was a write 155 * before this read, the read would be waiting with the page 156 * locked */ 157 clear_highpage(page); 158 159 SetPageUptodate(page); 160 if (PageError(page)) 161 ClearPageError(page); 162 ret = 0; /* recovered error */ 163 EXOFS_DBGMSG("recovered read error\n"); 164 } else /* Error */ 165 SetPageError(page); 166 167 return ret; 168} 169 170static void update_write_page(struct page *page, int ret) 171{ 172 if (ret) { 173 mapping_set_error(page->mapping, ret); 174 SetPageError(page); 175 } 176 end_page_writeback(page); 177} 178 179/* Called at the end of reads, to optionally unlock pages and update their 180 * status. 181 */ 182static int __readpages_done(struct page_collect *pcol, bool do_unlock) 183{ 184 struct bio_vec *bvec; 185 int i; 186 u64 resid; 187 u64 good_bytes; 188 u64 length = 0; 189 int ret = exofs_check_io(pcol->ios, &resid); 190 191 if (likely(!ret)) 192 good_bytes = pcol->length; 193 else 194 good_bytes = pcol->length - resid; 195 196 EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx" 197 " length=0x%lx nr_pages=%u\n", 198 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 199 pcol->nr_pages); 200 201 __bio_for_each_segment(bvec, pcol->bio, i, 0) { 202 struct page *page = bvec->bv_page; 203 struct inode *inode = page->mapping->host; 204 int page_stat; 205 206 if (inode != pcol->inode) 207 continue; /* osd might add more pages at end */ 208 209 if (likely(length < good_bytes)) 210 page_stat = 0; 211 else 212 page_stat = ret; 213 214 EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n", 215 inode->i_ino, page->index, 216 page_stat ? "bad_bytes" : "good_bytes"); 217 218 ret = update_read_page(page, page_stat); 219 if (do_unlock) 220 unlock_page(page); 221 length += bvec->bv_len; 222 } 223 224 pcol_free(pcol); 225 EXOFS_DBGMSG("readpages_done END\n"); 226 return ret; 227} 228 229/* callback of async reads */ 230static void readpages_done(struct exofs_io_state *ios, void *p) 231{ 232 struct page_collect *pcol = p; 233 234 __readpages_done(pcol, true); 235 atomic_dec(&pcol->sbi->s_curr_pending); 236 kfree(pcol); 237} 238 239static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) 240{ 241 struct bio_vec *bvec; 242 int i; 243 244 __bio_for_each_segment(bvec, pcol->bio, i, 0) { 245 struct page *page = bvec->bv_page; 246 247 if (rw == READ) 248 update_read_page(page, ret); 249 else 250 update_write_page(page, ret); 251 252 unlock_page(page); 253 } 254} 255 256static int read_exec(struct page_collect *pcol, bool is_sync) 257{ 258 struct exofs_i_info *oi = exofs_i(pcol->inode); 259 struct exofs_io_state *ios = pcol->ios; 260 struct page_collect *pcol_copy = NULL; 261 int ret; 262 263 if (!pcol->bio) 264 return 0; 265 266 /* see comment in _readpage() about sync reads */ 267 WARN_ON(is_sync && (pcol->nr_pages != 1)); 268 269 ios->bio = pcol->bio; 270 ios->length = pcol->length; 271 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; 272 273 if (is_sync) { 274 exofs_oi_read(oi, pcol->ios); 275 return __readpages_done(pcol, false); 276 } 277 278 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 279 if (!pcol_copy) { 280 ret = -ENOMEM; 281 goto err; 282 } 283 284 *pcol_copy = *pcol; 285 ios->done = readpages_done; 286 ios->private = pcol_copy; 287 ret = exofs_oi_read(oi, ios); 288 if (unlikely(ret)) 289 goto err; 290 291 atomic_inc(&pcol->sbi->s_curr_pending); 292 293 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", 294 ios->obj.id, _LLU(ios->offset), pcol->length); 295 296 /* pages ownership was passed to pcol_copy */ 297 _pcol_reset(pcol); 298 return 0; 299 300err: 301 if (!is_sync) 302 _unlock_pcol_pages(pcol, ret, READ); 303 304 pcol_free(pcol); 305 306 kfree(pcol_copy); 307 return ret; 308} 309 310/* readpage_strip is called either directly from readpage() or by the VFS from 311 * within read_cache_pages(), to add one more page to be read. It will try to 312 * collect as many contiguous pages as posible. If a discontinuity is 313 * encountered, or it runs out of resources, it will submit the previous segment 314 * and will start a new collection. Eventually caller must submit the last 315 * segment if present. 316 */ 317static int readpage_strip(void *data, struct page *page) 318{ 319 struct page_collect *pcol = data; 320 struct inode *inode = pcol->inode; 321 struct exofs_i_info *oi = exofs_i(inode); 322 loff_t i_size = i_size_read(inode); 323 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 324 size_t len; 325 int ret; 326 327 /* FIXME: Just for debugging, will be removed */ 328 if (PageUptodate(page)) 329 EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, 330 page->index); 331 332 if (page->index < end_index) 333 len = PAGE_CACHE_SIZE; 334 else if (page->index == end_index) 335 len = i_size & ~PAGE_CACHE_MASK; 336 else 337 len = 0; 338 339 if (!len || !obj_created(oi)) { 340 /* this will be out of bounds, or doesn't exist yet. 341 * Current page is cleared and the request is split 342 */ 343 clear_highpage(page); 344 345 SetPageUptodate(page); 346 if (PageError(page)) 347 ClearPageError(page); 348 349 unlock_page(page); 350 EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," 351 " splitting\n", inode->i_ino, page->index); 352 353 return read_exec(pcol, false); 354 } 355 356try_again: 357 358 if (unlikely(pcol->pg_first == -1)) { 359 pcol->pg_first = page->index; 360 } else if (unlikely((pcol->pg_first + pcol->nr_pages) != 361 page->index)) { 362 /* Discontinuity detected, split the request */ 363 ret = read_exec(pcol, false); 364 if (unlikely(ret)) 365 goto fail; 366 goto try_again; 367 } 368 369 if (!pcol->bio) { 370 ret = pcol_try_alloc(pcol); 371 if (unlikely(ret)) 372 goto fail; 373 } 374 375 if (len != PAGE_CACHE_SIZE) 376 zero_user(page, len, PAGE_CACHE_SIZE - len); 377 378 EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", 379 inode->i_ino, page->index, len); 380 381 ret = pcol_add_page(pcol, page, len); 382 if (ret) { 383 EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p " 384 "this_len=0x%zx nr_pages=%u length=0x%lx\n", 385 page, len, pcol->nr_pages, pcol->length); 386 387 /* split the request, and start again with current page */ 388 ret = read_exec(pcol, false); 389 if (unlikely(ret)) 390 goto fail; 391 392 goto try_again; 393 } 394 395 return 0; 396 397fail: 398 /* SetPageError(page); ??? */ 399 unlock_page(page); 400 return ret; 401} 402 403static int exofs_readpages(struct file *file, struct address_space *mapping, 404 struct list_head *pages, unsigned nr_pages) 405{ 406 struct page_collect pcol; 407 int ret; 408 409 _pcol_init(&pcol, nr_pages, mapping->host); 410 411 ret = read_cache_pages(mapping, pages, readpage_strip, &pcol); 412 if (ret) { 413 EXOFS_ERR("read_cache_pages => %d\n", ret); 414 return ret; 415 } 416 417 return read_exec(&pcol, false); 418} 419 420static int _readpage(struct page *page, bool is_sync) 421{ 422 struct page_collect pcol; 423 int ret; 424 425 _pcol_init(&pcol, 1, page->mapping->host); 426 427 /* readpage_strip might call read_exec(,is_sync==false) at several 428 * places but not if we have a single page. 429 */ 430 ret = readpage_strip(&pcol, page); 431 if (ret) { 432 EXOFS_ERR("_readpage => %d\n", ret); 433 return ret; 434 } 435 436 return read_exec(&pcol, is_sync); 437} 438 439/* 440 * We don't need the file 441 */ 442static int exofs_readpage(struct file *file, struct page *page) 443{ 444 return _readpage(page, false); 445} 446 447/* Callback for osd_write. All writes are asynchronous */ 448static void writepages_done(struct exofs_io_state *ios, void *p) 449{ 450 struct page_collect *pcol = p; 451 struct bio_vec *bvec; 452 int i; 453 u64 resid; 454 u64 good_bytes; 455 u64 length = 0; 456 int ret = exofs_check_io(ios, &resid); 457 458 atomic_dec(&pcol->sbi->s_curr_pending); 459 460 if (likely(!ret)) 461 good_bytes = pcol->length; 462 else 463 good_bytes = pcol->length - resid; 464 465 EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx" 466 " length=0x%lx nr_pages=%u\n", 467 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 468 pcol->nr_pages); 469 470 __bio_for_each_segment(bvec, pcol->bio, i, 0) { 471 struct page *page = bvec->bv_page; 472 struct inode *inode = page->mapping->host; 473 int page_stat; 474 475 if (inode != pcol->inode) 476 continue; /* osd might add more pages to a bio */ 477 478 if (likely(length < good_bytes)) 479 page_stat = 0; 480 else 481 page_stat = ret; 482 483 update_write_page(page, page_stat); 484 unlock_page(page); 485 EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", 486 inode->i_ino, page->index, page_stat); 487 488 length += bvec->bv_len; 489 } 490 491 pcol_free(pcol); 492 kfree(pcol); 493 EXOFS_DBGMSG("writepages_done END\n"); 494} 495 496static int write_exec(struct page_collect *pcol) 497{ 498 struct exofs_i_info *oi = exofs_i(pcol->inode); 499 struct exofs_io_state *ios = pcol->ios; 500 struct page_collect *pcol_copy = NULL; 501 int ret; 502 503 if (!pcol->bio) 504 return 0; 505 506 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 507 if (!pcol_copy) { 508 EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); 509 ret = -ENOMEM; 510 goto err; 511 } 512 513 *pcol_copy = *pcol; 514 515 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ 516 517 ios->bio = pcol_copy->bio; 518 ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; 519 ios->length = pcol_copy->length; 520 ios->done = writepages_done; 521 ios->private = pcol_copy; 522 523 ret = exofs_oi_write(oi, ios); 524 if (unlikely(ret)) { 525 EXOFS_ERR("write_exec: exofs_oi_write() Faild\n"); 526 goto err; 527 } 528 529 atomic_inc(&pcol->sbi->s_curr_pending); 530 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", 531 pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset), 532 pcol->length); 533 /* pages ownership was passed to pcol_copy */ 534 _pcol_reset(pcol); 535 return 0; 536 537err: 538 _unlock_pcol_pages(pcol, ret, WRITE); 539 pcol_free(pcol); 540 kfree(pcol_copy); 541 542 return ret; 543} 544 545/* writepage_strip is called either directly from writepage() or by the VFS from 546 * within write_cache_pages(), to add one more page to be written to storage. 547 * It will try to collect as many contiguous pages as possible. If a 548 * discontinuity is encountered or it runs out of resources it will submit the 549 * previous segment and will start a new collection. 550 * Eventually caller must submit the last segment if present. 551 */ 552static int writepage_strip(struct page *page, 553 struct writeback_control *wbc_unused, void *data) 554{ 555 struct page_collect *pcol = data; 556 struct inode *inode = pcol->inode; 557 struct exofs_i_info *oi = exofs_i(inode); 558 loff_t i_size = i_size_read(inode); 559 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 560 size_t len; 561 int ret; 562 563 BUG_ON(!PageLocked(page)); 564 565 ret = wait_obj_created(oi); 566 if (unlikely(ret)) 567 goto fail; 568 569 if (page->index < end_index) 570 /* in this case, the page is within the limits of the file */ 571 len = PAGE_CACHE_SIZE; 572 else { 573 len = i_size & ~PAGE_CACHE_MASK; 574 575 if (page->index > end_index || !len) { 576 /* in this case, the page is outside the limits 577 * (truncate in progress) 578 */ 579 ret = write_exec(pcol); 580 if (unlikely(ret)) 581 goto fail; 582 if (PageError(page)) 583 ClearPageError(page); 584 unlock_page(page); 585 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) " 586 "outside the limits\n", 587 inode->i_ino, page->index); 588 return 0; 589 } 590 } 591 592try_again: 593 594 if (unlikely(pcol->pg_first == -1)) { 595 pcol->pg_first = page->index; 596 } else if (unlikely((pcol->pg_first + pcol->nr_pages) != 597 page->index)) { 598 /* Discontinuity detected, split the request */ 599 ret = write_exec(pcol); 600 if (unlikely(ret)) 601 goto fail; 602 603 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n", 604 inode->i_ino, page->index); 605 goto try_again; 606 } 607 608 if (!pcol->bio) { 609 ret = pcol_try_alloc(pcol); 610 if (unlikely(ret)) 611 goto fail; 612 } 613 614 EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", 615 inode->i_ino, page->index, len); 616 617 ret = pcol_add_page(pcol, page, len); 618 if (unlikely(ret)) { 619 EXOFS_DBGMSG("Failed pcol_add_page " 620 "nr_pages=%u total_length=0x%lx\n", 621 pcol->nr_pages, pcol->length); 622 623 /* split the request, next loop will start again */ 624 ret = write_exec(pcol); 625 if (unlikely(ret)) { 626 EXOFS_DBGMSG("write_exec faild => %d", ret); 627 goto fail; 628 } 629 630 goto try_again; 631 } 632 633 BUG_ON(PageWriteback(page)); 634 set_page_writeback(page); 635 636 return 0; 637 638fail: 639 EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n", 640 inode->i_ino, page->index, ret); 641 set_bit(AS_EIO, &page->mapping->flags); 642 unlock_page(page); 643 return ret; 644} 645 646static int exofs_writepages(struct address_space *mapping, 647 struct writeback_control *wbc) 648{ 649 struct page_collect pcol; 650 long start, end, expected_pages; 651 int ret; 652 653 start = wbc->range_start >> PAGE_CACHE_SHIFT; 654 end = (wbc->range_end == LLONG_MAX) ? 655 start + mapping->nrpages : 656 wbc->range_end >> PAGE_CACHE_SHIFT; 657 658 if (start || end) 659 expected_pages = end - start + 1; 660 else 661 expected_pages = mapping->nrpages; 662 663 if (expected_pages < 32L) 664 expected_pages = 32L; 665 666 EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx " 667 "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n", 668 mapping->host->i_ino, wbc->range_start, wbc->range_end, 669 mapping->nrpages, start, end, expected_pages); 670 671 _pcol_init(&pcol, expected_pages, mapping->host); 672 673 ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol); 674 if (ret) { 675 EXOFS_ERR("write_cache_pages => %d\n", ret); 676 return ret; 677 } 678 679 return write_exec(&pcol); 680} 681 682static int exofs_writepage(struct page *page, struct writeback_control *wbc) 683{ 684 struct page_collect pcol; 685 int ret; 686 687 _pcol_init(&pcol, 1, page->mapping->host); 688 689 ret = writepage_strip(page, NULL, &pcol); 690 if (ret) { 691 EXOFS_ERR("exofs_writepage => %d\n", ret); 692 return ret; 693 } 694 695 return write_exec(&pcol); 696} 697 698int exofs_write_begin(struct file *file, struct address_space *mapping, 699 loff_t pos, unsigned len, unsigned flags, 700 struct page **pagep, void **fsdata) 701{ 702 int ret = 0; 703 struct page *page; 704 705 page = *pagep; 706 if (page == NULL) { 707 ret = simple_write_begin(file, mapping, pos, len, flags, pagep, 708 fsdata); 709 if (ret) { 710 EXOFS_DBGMSG("simple_write_begin faild\n"); 711 return ret; 712 } 713 714 page = *pagep; 715 } 716 717 /* read modify write */ 718 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { 719 ret = _readpage(page, true); 720 if (ret) { 721 /*SetPageError was done by _readpage. Is it ok?*/ 722 unlock_page(page); 723 EXOFS_DBGMSG("__readpage_filler faild\n"); 724 } 725 } 726 727 return ret; 728} 729 730static int exofs_write_begin_export(struct file *file, 731 struct address_space *mapping, 732 loff_t pos, unsigned len, unsigned flags, 733 struct page **pagep, void **fsdata) 734{ 735 *pagep = NULL; 736 737 return exofs_write_begin(file, mapping, pos, len, flags, pagep, 738 fsdata); 739} 740 741static int exofs_write_end(struct file *file, struct address_space *mapping, 742 loff_t pos, unsigned len, unsigned copied, 743 struct page *page, void *fsdata) 744{ 745 struct inode *inode = mapping->host; 746 /* According to comment in simple_write_end i_mutex is held */ 747 loff_t i_size = inode->i_size; 748 int ret; 749 750 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata); 751 if (i_size != inode->i_size) 752 mark_inode_dirty(inode); 753 return ret; 754} 755 756const struct address_space_operations exofs_aops = { 757 .readpage = exofs_readpage, 758 .readpages = exofs_readpages, 759 .writepage = exofs_writepage, 760 .writepages = exofs_writepages, 761 .write_begin = exofs_write_begin_export, 762 .write_end = exofs_write_end, 763}; 764 765/****************************************************************************** 766 * INODE OPERATIONS 767 *****************************************************************************/ 768 769/* 770 * Test whether an inode is a fast symlink. 771 */ 772static inline int exofs_inode_is_fast_symlink(struct inode *inode) 773{ 774 struct exofs_i_info *oi = exofs_i(inode); 775 776 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); 777} 778 779/* 780 * get_block_t - Fill in a buffer_head 781 * An OSD takes care of block allocation so we just fake an allocation by 782 * putting in the inode's sector_t in the buffer_head. 783 * TODO: What about the case of create==0 and @iblock does not exist in the 784 * object? 785 */ 786static int exofs_get_block(struct inode *inode, sector_t iblock, 787 struct buffer_head *bh_result, int create) 788{ 789 map_bh(bh_result, inode->i_sb, iblock); 790 return 0; 791} 792 793const struct osd_attr g_attr_logical_length = ATTR_DEF( 794 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); 795 796static int _do_truncate(struct inode *inode) 797{ 798 struct exofs_i_info *oi = exofs_i(inode); 799 loff_t isize = i_size_read(inode); 800 int ret; 801 802 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 803 804 nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); 805 806 ret = exofs_oi_truncate(oi, (u64)isize); 807 EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize); 808 return ret; 809} 810 811/* 812 * Truncate a file to the specified size - all we have to do is set the size 813 * attribute. We make sure the object exists first. 814 */ 815void exofs_truncate(struct inode *inode) 816{ 817 struct exofs_i_info *oi = exofs_i(inode); 818 int ret; 819 820 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) 821 || S_ISLNK(inode->i_mode))) 822 return; 823 if (exofs_inode_is_fast_symlink(inode)) 824 return; 825 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 826 return; 827 828 /* if we are about to truncate an object, and it hasn't been 829 * created yet, wait 830 */ 831 if (unlikely(wait_obj_created(oi))) 832 goto fail; 833 834 ret = _do_truncate(inode); 835 if (ret) 836 goto fail; 837 838out: 839 mark_inode_dirty(inode); 840 return; 841fail: 842 make_bad_inode(inode); 843 goto out; 844} 845 846/* 847 * Set inode attributes - just call generic functions. 848 */ 849int exofs_setattr(struct dentry *dentry, struct iattr *iattr) 850{ 851 struct inode *inode = dentry->d_inode; 852 int error; 853 854 error = inode_change_ok(inode, iattr); 855 if (error) 856 return error; 857 858 error = inode_setattr(inode, iattr); 859 return error; 860} 861 862/* 863 * Read an inode from the OSD, and return it as is. We also return the size 864 * attribute in the 'obj_size' argument. 865 */ 866static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, 867 struct exofs_fcb *inode, uint64_t *obj_size) 868{ 869 struct exofs_sb_info *sbi = sb->s_fs_info; 870 struct osd_attr attrs[2]; 871 struct exofs_io_state *ios; 872 int ret; 873 874 *obj_size = ~0; 875 ret = exofs_get_io_state(sbi, &ios); 876 if (unlikely(ret)) { 877 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 878 return ret; 879 } 880 881 ios->obj.id = exofs_oi_objno(oi); 882 exofs_make_credential(oi->i_cred, &ios->obj); 883 ios->cred = oi->i_cred; 884 885 attrs[0] = g_attr_inode_data; 886 attrs[1] = g_attr_logical_length; 887 ios->in_attr = attrs; 888 ios->in_attr_len = ARRAY_SIZE(attrs); 889 890 ret = exofs_sbi_read(ios); 891 if (ret) 892 goto out; 893 894 ret = extract_attr_from_ios(ios, &attrs[0]); 895 if (ret) { 896 EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__); 897 goto out; 898 } 899 WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE); 900 memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE); 901 902 ret = extract_attr_from_ios(ios, &attrs[1]); 903 if (ret) { 904 EXOFS_ERR("%s: extract_attr of logical_length failed\n", 905 __func__); 906 goto out; 907 } 908 *obj_size = get_unaligned_be64(attrs[1].val_ptr); 909 910out: 911 exofs_put_io_state(ios); 912 return ret; 913} 914 915static void __oi_init(struct exofs_i_info *oi) 916{ 917 init_waitqueue_head(&oi->i_wq); 918 oi->i_flags = 0; 919} 920/* 921 * Fill in an inode read from the OSD and set it up for use 922 */ 923struct inode *exofs_iget(struct super_block *sb, unsigned long ino) 924{ 925 struct exofs_i_info *oi; 926 struct exofs_fcb fcb; 927 struct inode *inode; 928 uint64_t obj_size; 929 int ret; 930 931 inode = iget_locked(sb, ino); 932 if (!inode) 933 return ERR_PTR(-ENOMEM); 934 if (!(inode->i_state & I_NEW)) 935 return inode; 936 oi = exofs_i(inode); 937 __oi_init(oi); 938 939 /* read the inode from the osd */ 940 ret = exofs_get_inode(sb, oi, &fcb, &obj_size); 941 if (ret) 942 goto bad_inode; 943 944 set_obj_created(oi); 945 946 /* copy stuff from on-disk struct to in-memory struct */ 947 inode->i_mode = le16_to_cpu(fcb.i_mode); 948 inode->i_uid = le32_to_cpu(fcb.i_uid); 949 inode->i_gid = le32_to_cpu(fcb.i_gid); 950 inode->i_nlink = le16_to_cpu(fcb.i_links_count); 951 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); 952 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); 953 inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime); 954 inode->i_ctime.tv_nsec = 955 inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0; 956 oi->i_commit_size = le64_to_cpu(fcb.i_size); 957 i_size_write(inode, oi->i_commit_size); 958 inode->i_blkbits = EXOFS_BLKSHIFT; 959 inode->i_generation = le32_to_cpu(fcb.i_generation); 960 961 if ((inode->i_size != obj_size) && 962 (!exofs_inode_is_fast_symlink(inode))) { 963 EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n", 964 inode->i_size, _LLU(obj_size)); 965 /* FIXME: call exofs_inode_recovery() */ 966 } 967 968 oi->i_dir_start_lookup = 0; 969 970 if ((inode->i_nlink == 0) && (inode->i_mode == 0)) { 971 ret = -ESTALE; 972 goto bad_inode; 973 } 974 975 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 976 if (fcb.i_data[0]) 977 inode->i_rdev = 978 old_decode_dev(le32_to_cpu(fcb.i_data[0])); 979 else 980 inode->i_rdev = 981 new_decode_dev(le32_to_cpu(fcb.i_data[1])); 982 } else { 983 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 984 } 985 986 if (S_ISREG(inode->i_mode)) { 987 inode->i_op = &exofs_file_inode_operations; 988 inode->i_fop = &exofs_file_operations; 989 inode->i_mapping->a_ops = &exofs_aops; 990 } else if (S_ISDIR(inode->i_mode)) { 991 inode->i_op = &exofs_dir_inode_operations; 992 inode->i_fop = &exofs_dir_operations; 993 inode->i_mapping->a_ops = &exofs_aops; 994 } else if (S_ISLNK(inode->i_mode)) { 995 if (exofs_inode_is_fast_symlink(inode)) 996 inode->i_op = &exofs_fast_symlink_inode_operations; 997 else { 998 inode->i_op = &exofs_symlink_inode_operations; 999 inode->i_mapping->a_ops = &exofs_aops; 1000 } 1001 } else { 1002 inode->i_op = &exofs_special_inode_operations; 1003 if (fcb.i_data[0]) 1004 init_special_inode(inode, inode->i_mode, 1005 old_decode_dev(le32_to_cpu(fcb.i_data[0]))); 1006 else 1007 init_special_inode(inode, inode->i_mode, 1008 new_decode_dev(le32_to_cpu(fcb.i_data[1]))); 1009 } 1010 1011 unlock_new_inode(inode); 1012 return inode; 1013 1014bad_inode: 1015 iget_failed(inode); 1016 return ERR_PTR(ret); 1017} 1018 1019int __exofs_wait_obj_created(struct exofs_i_info *oi) 1020{ 1021 if (!obj_created(oi)) { 1022 BUG_ON(!obj_2bcreated(oi)); 1023 wait_event(oi->i_wq, obj_created(oi)); 1024 } 1025 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; 1026} 1027/* 1028 * Callback function from exofs_new_inode(). The important thing is that we 1029 * set the obj_created flag so that other methods know that the object exists on 1030 * the OSD. 1031 */ 1032static void create_done(struct exofs_io_state *ios, void *p) 1033{ 1034 struct inode *inode = p; 1035 struct exofs_i_info *oi = exofs_i(inode); 1036 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1037 int ret; 1038 1039 ret = exofs_check_io(ios, NULL); 1040 exofs_put_io_state(ios); 1041 1042 atomic_dec(&sbi->s_curr_pending); 1043 1044 if (unlikely(ret)) { 1045 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", 1046 _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid)); 1047 /*TODO: When FS is corrupted creation can fail, object already 1048 * exist. Get rid of this asynchronous creation, if exist 1049 * increment the obj counter and try the next object. Until we 1050 * succeed. All these dangling objects will be made into lost 1051 * files by chkfs.exofs 1052 */ 1053 } 1054 1055 set_obj_created(oi); 1056 1057 atomic_dec(&inode->i_count); 1058 wake_up(&oi->i_wq); 1059} 1060 1061/* 1062 * Set up a new inode and create an object for it on the OSD 1063 */ 1064struct inode *exofs_new_inode(struct inode *dir, int mode) 1065{ 1066 struct super_block *sb; 1067 struct inode *inode; 1068 struct exofs_i_info *oi; 1069 struct exofs_sb_info *sbi; 1070 struct exofs_io_state *ios; 1071 int ret; 1072 1073 sb = dir->i_sb; 1074 inode = new_inode(sb); 1075 if (!inode) 1076 return ERR_PTR(-ENOMEM); 1077 1078 oi = exofs_i(inode); 1079 __oi_init(oi); 1080 1081 set_obj_2bcreated(oi); 1082 1083 sbi = sb->s_fs_info; 1084 1085 sb->s_dirt = 1; 1086 inode->i_uid = current->cred->fsuid; 1087 if (dir->i_mode & S_ISGID) { 1088 inode->i_gid = dir->i_gid; 1089 if (S_ISDIR(mode)) 1090 mode |= S_ISGID; 1091 } else { 1092 inode->i_gid = current->cred->fsgid; 1093 } 1094 inode->i_mode = mode; 1095 1096 inode->i_ino = sbi->s_nextid++; 1097 inode->i_blkbits = EXOFS_BLKSHIFT; 1098 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1099 oi->i_commit_size = inode->i_size = 0; 1100 spin_lock(&sbi->s_next_gen_lock); 1101 inode->i_generation = sbi->s_next_generation++; 1102 spin_unlock(&sbi->s_next_gen_lock); 1103 insert_inode_hash(inode); 1104 1105 mark_inode_dirty(inode); 1106 1107 ret = exofs_get_io_state(sbi, &ios); 1108 if (unlikely(ret)) { 1109 EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); 1110 return ERR_PTR(ret); 1111 } 1112 1113 ios->obj.id = exofs_oi_objno(oi); 1114 exofs_make_credential(oi->i_cred, &ios->obj); 1115 1116 /* increment the refcount so that the inode will still be around when we 1117 * reach the callback 1118 */ 1119 atomic_inc(&inode->i_count); 1120 1121 ios->done = create_done; 1122 ios->private = inode; 1123 ios->cred = oi->i_cred; 1124 ret = exofs_sbi_create(ios); 1125 if (ret) { 1126 atomic_dec(&inode->i_count); 1127 exofs_put_io_state(ios); 1128 return ERR_PTR(ret); 1129 } 1130 atomic_inc(&sbi->s_curr_pending); 1131 1132 return inode; 1133} 1134 1135/* 1136 * struct to pass two arguments to update_inode's callback 1137 */ 1138struct updatei_args { 1139 struct exofs_sb_info *sbi; 1140 struct exofs_fcb fcb; 1141}; 1142 1143/* 1144 * Callback function from exofs_update_inode(). 1145 */ 1146static void updatei_done(struct exofs_io_state *ios, void *p) 1147{ 1148 struct updatei_args *args = p; 1149 1150 exofs_put_io_state(ios); 1151 1152 atomic_dec(&args->sbi->s_curr_pending); 1153 1154 kfree(args); 1155} 1156 1157/* 1158 * Write the inode to the OSD. Just fill up the struct, and set the attribute 1159 * synchronously or asynchronously depending on the do_sync flag. 1160 */ 1161static int exofs_update_inode(struct inode *inode, int do_sync) 1162{ 1163 struct exofs_i_info *oi = exofs_i(inode); 1164 struct super_block *sb = inode->i_sb; 1165 struct exofs_sb_info *sbi = sb->s_fs_info; 1166 struct exofs_io_state *ios; 1167 struct osd_attr attr; 1168 struct exofs_fcb *fcb; 1169 struct updatei_args *args; 1170 int ret; 1171 1172 args = kzalloc(sizeof(*args), GFP_KERNEL); 1173 if (!args) 1174 return -ENOMEM; 1175 1176 fcb = &args->fcb; 1177 1178 fcb->i_mode = cpu_to_le16(inode->i_mode); 1179 fcb->i_uid = cpu_to_le32(inode->i_uid); 1180 fcb->i_gid = cpu_to_le32(inode->i_gid); 1181 fcb->i_links_count = cpu_to_le16(inode->i_nlink); 1182 fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); 1183 fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec); 1184 fcb->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); 1185 oi->i_commit_size = i_size_read(inode); 1186 fcb->i_size = cpu_to_le64(oi->i_commit_size); 1187 fcb->i_generation = cpu_to_le32(inode->i_generation); 1188 1189 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 1190 if (old_valid_dev(inode->i_rdev)) { 1191 fcb->i_data[0] = 1192 cpu_to_le32(old_encode_dev(inode->i_rdev)); 1193 fcb->i_data[1] = 0; 1194 } else { 1195 fcb->i_data[0] = 0; 1196 fcb->i_data[1] = 1197 cpu_to_le32(new_encode_dev(inode->i_rdev)); 1198 fcb->i_data[2] = 0; 1199 } 1200 } else 1201 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1202 1203 ret = exofs_get_io_state(sbi, &ios); 1204 if (unlikely(ret)) { 1205 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 1206 goto free_args; 1207 } 1208 1209 attr = g_attr_inode_data; 1210 attr.val_ptr = fcb; 1211 ios->out_attr_len = 1; 1212 ios->out_attr = &attr; 1213 1214 if (!obj_created(oi)) { 1215 EXOFS_DBGMSG("!obj_created\n"); 1216 BUG_ON(!obj_2bcreated(oi)); 1217 wait_event(oi->i_wq, obj_created(oi)); 1218 EXOFS_DBGMSG("wait_event done\n"); 1219 } 1220 1221 if (!do_sync) { 1222 args->sbi = sbi; 1223 ios->done = updatei_done; 1224 ios->private = args; 1225 } 1226 1227 ret = exofs_oi_write(oi, ios); 1228 if (!do_sync && !ret) { 1229 atomic_inc(&sbi->s_curr_pending); 1230 goto out; /* deallocation in updatei_done */ 1231 } 1232 1233 exofs_put_io_state(ios); 1234free_args: 1235 kfree(args); 1236out: 1237 EXOFS_DBGMSG("ret=>%d\n", ret); 1238 return ret; 1239} 1240 1241int exofs_write_inode(struct inode *inode, int wait) 1242{ 1243 return exofs_update_inode(inode, wait); 1244} 1245 1246/* 1247 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1248 * do. 1249 */ 1250static void delete_done(struct exofs_io_state *ios, void *p) 1251{ 1252 struct exofs_sb_info *sbi = p; 1253 1254 exofs_put_io_state(ios); 1255 1256 atomic_dec(&sbi->s_curr_pending); 1257} 1258 1259/* 1260 * Called when the refcount of an inode reaches zero. We remove the object 1261 * from the OSD here. We make sure the object was created before we try and 1262 * delete it. 1263 */ 1264void exofs_delete_inode(struct inode *inode) 1265{ 1266 struct exofs_i_info *oi = exofs_i(inode); 1267 struct super_block *sb = inode->i_sb; 1268 struct exofs_sb_info *sbi = sb->s_fs_info; 1269 struct exofs_io_state *ios; 1270 int ret; 1271 1272 truncate_inode_pages(&inode->i_data, 0); 1273 1274 if (is_bad_inode(inode)) 1275 goto no_delete; 1276 1277 mark_inode_dirty(inode); 1278 exofs_update_inode(inode, inode_needs_sync(inode)); 1279 1280 inode->i_size = 0; 1281 if (inode->i_blocks) 1282 exofs_truncate(inode); 1283 1284 clear_inode(inode); 1285 1286 ret = exofs_get_io_state(sbi, &ios); 1287 if (unlikely(ret)) { 1288 EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); 1289 return; 1290 } 1291 1292 /* if we are deleting an obj that hasn't been created yet, wait */ 1293 if (!obj_created(oi)) { 1294 BUG_ON(!obj_2bcreated(oi)); 1295 wait_event(oi->i_wq, obj_created(oi)); 1296 } 1297 1298 ios->obj.id = exofs_oi_objno(oi); 1299 ios->done = delete_done; 1300 ios->private = sbi; 1301 ios->cred = oi->i_cred; 1302 ret = exofs_sbi_remove(ios); 1303 if (ret) { 1304 EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); 1305 exofs_put_io_state(ios); 1306 return; 1307 } 1308 atomic_inc(&sbi->s_curr_pending); 1309 1310 return; 1311 1312no_delete: 1313 clear_inode(inode); 1314}