commit cdb897e3279ad1677138d6bdf1cfaf1393718a08

+1 -1

drivers/block/rbd.c

··· 3435 3435 struct rbd_device *rbd_dev = container_of(to_delayed_work(work), 3436 3436 struct rbd_device, lock_dwork); 3437 3437 enum rbd_lock_state lock_state; 3438 - int ret; 3438 + int ret = 0; 3439 3439 3440 3440 dout("%s rbd_dev %p\n", __func__, rbd_dev); 3441 3441 again:

+224 -179

fs/ceph/addr.c

··· 152 152 153 153 ceph_invalidate_fscache_page(inode, page); 154 154 155 + WARN_ON(!PageLocked(page)); 155 156 if (!PagePrivate(page)) 156 157 return; 157 - 158 - /* 159 - * We can get non-dirty pages here due to races between 160 - * set_page_dirty and truncate_complete_page; just spit out a 161 - * warning, in case we end up with accounting problems later. 162 - */ 163 - if (!PageDirty(page)) 164 - pr_err("%p invalidatepage %p page not dirty\n", inode, page); 165 158 166 159 ClearPageChecked(page); 167 160 ··· 448 455 if (rc == 0) 449 456 goto out; 450 457 451 - if (fsc->mount_options->rsize >= PAGE_SIZE) 452 - max = (fsc->mount_options->rsize + PAGE_SIZE - 1) 453 - >> PAGE_SHIFT; 454 - 455 - dout("readpages %p file %p nr_pages %d max %d\n", inode, 456 - file, nr_pages, 457 - max); 458 + max = fsc->mount_options->rsize >> PAGE_SHIFT; 459 + dout("readpages %p file %p nr_pages %d max %d\n", 460 + inode, file, nr_pages, max); 458 461 while (!list_empty(page_list)) { 459 462 rc = start_read(inode, page_list, max); 460 463 if (rc < 0) ··· 463 474 return rc; 464 475 } 465 476 477 + struct ceph_writeback_ctl 478 + { 479 + loff_t i_size; 480 + u64 truncate_size; 481 + u32 truncate_seq; 482 + bool size_stable; 483 + bool head_snapc; 484 + }; 485 + 466 486 /* 467 487 * Get ref for the oldest snapc for an inode with dirty data... that is, the 468 488 * only snap context we are allowed to write back. 469 489 */ 470 - static struct ceph_snap_context *get_oldest_context(struct inode *inode, 471 - loff_t *snap_size, 472 - u64 *truncate_size, 473 - u32 *truncate_seq) 490 + static struct ceph_snap_context * 491 + get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl, 492 + struct ceph_snap_context *page_snapc) 474 493 { 475 494 struct ceph_inode_info *ci = ceph_inode(inode); 476 495 struct ceph_snap_context *snapc = NULL; ··· 488 491 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 489 492 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, 490 493 capsnap->context, capsnap->dirty_pages); 491 - if (capsnap->dirty_pages) { 492 - snapc = ceph_get_snap_context(capsnap->context); 493 - if (snap_size) 494 - *snap_size = capsnap->size; 495 - if (truncate_size) 496 - *truncate_size = capsnap->truncate_size; 497 - if (truncate_seq) 498 - *truncate_seq = capsnap->truncate_seq; 499 - break; 494 + if (!capsnap->dirty_pages) 495 + continue; 496 + 497 + /* get i_size, truncate_{seq,size} for page_snapc? */ 498 + if (snapc && capsnap->context != page_snapc) 499 + continue; 500 + 501 + if (ctl) { 502 + if (capsnap->writing) { 503 + ctl->i_size = i_size_read(inode); 504 + ctl->size_stable = false; 505 + } else { 506 + ctl->i_size = capsnap->size; 507 + ctl->size_stable = true; 508 + } 509 + ctl->truncate_size = capsnap->truncate_size; 510 + ctl->truncate_seq = capsnap->truncate_seq; 511 + ctl->head_snapc = false; 500 512 } 513 + 514 + if (snapc) 515 + break; 516 + 517 + snapc = ceph_get_snap_context(capsnap->context); 518 + if (!page_snapc || 519 + page_snapc == snapc || 520 + page_snapc->seq > snapc->seq) 521 + break; 501 522 } 502 523 if (!snapc && ci->i_wrbuffer_ref_head) { 503 524 snapc = ceph_get_snap_context(ci->i_head_snapc); 504 525 dout(" head snapc %p has %d dirty pages\n", 505 526 snapc, ci->i_wrbuffer_ref_head); 506 - if (truncate_size) 507 - *truncate_size = ci->i_truncate_size; 508 - if (truncate_seq) 509 - *truncate_seq = ci->i_truncate_seq; 527 + if (ctl) { 528 + ctl->i_size = i_size_read(inode); 529 + ctl->truncate_size = ci->i_truncate_size; 530 + ctl->truncate_seq = ci->i_truncate_seq; 531 + ctl->size_stable = false; 532 + ctl->head_snapc = true; 533 + } 510 534 } 511 535 spin_unlock(&ci->i_ceph_lock); 512 536 return snapc; 537 + } 538 + 539 + static u64 get_writepages_data_length(struct inode *inode, 540 + struct page *page, u64 start) 541 + { 542 + struct ceph_inode_info *ci = ceph_inode(inode); 543 + struct ceph_snap_context *snapc = page_snap_context(page); 544 + struct ceph_cap_snap *capsnap = NULL; 545 + u64 end = i_size_read(inode); 546 + 547 + if (snapc != ci->i_head_snapc) { 548 + bool found = false; 549 + spin_lock(&ci->i_ceph_lock); 550 + list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 551 + if (capsnap->context == snapc) { 552 + if (!capsnap->writing) 553 + end = capsnap->size; 554 + found = true; 555 + break; 556 + } 557 + } 558 + spin_unlock(&ci->i_ceph_lock); 559 + WARN_ON(!found); 560 + } 561 + if (end > page_offset(page) + PAGE_SIZE) 562 + end = page_offset(page) + PAGE_SIZE; 563 + return end > start ? end - start : 0; 513 564 } 514 565 515 566 /* ··· 571 526 struct inode *inode; 572 527 struct ceph_inode_info *ci; 573 528 struct ceph_fs_client *fsc; 574 - struct ceph_osd_client *osdc; 575 529 struct ceph_snap_context *snapc, *oldest; 576 530 loff_t page_off = page_offset(page); 577 - loff_t snap_size = -1; 578 531 long writeback_stat; 579 - u64 truncate_size; 580 - u32 truncate_seq; 581 532 int err, len = PAGE_SIZE; 533 + struct ceph_writeback_ctl ceph_wbc; 582 534 583 535 dout("writepage %p idx %lu\n", page, page->index); 584 536 585 537 inode = page->mapping->host; 586 538 ci = ceph_inode(inode); 587 539 fsc = ceph_inode_to_client(inode); 588 - osdc = &fsc->client->osdc; 589 540 590 541 /* verify this is a writeable snap context */ 591 542 snapc = page_snap_context(page); 592 - if (snapc == NULL) { 543 + if (!snapc) { 593 544 dout("writepage %p page %p not dirty?\n", inode, page); 594 545 return 0; 595 546 } 596 - oldest = get_oldest_context(inode, &snap_size, 597 - &truncate_size, &truncate_seq); 547 + oldest = get_oldest_context(inode, &ceph_wbc, snapc); 598 548 if (snapc->seq > oldest->seq) { 599 549 dout("writepage %p page %p snapc %p not writeable - noop\n", 600 550 inode, page, snapc); ··· 601 561 } 602 562 ceph_put_snap_context(oldest); 603 563 604 - if (snap_size == -1) 605 - snap_size = i_size_read(inode); 606 - 607 564 /* is this a partial page at end of file? */ 608 - if (page_off >= snap_size) { 609 - dout("%p page eof %llu\n", page, snap_size); 565 + if (page_off >= ceph_wbc.i_size) { 566 + dout("%p page eof %llu\n", page, ceph_wbc.i_size); 567 + page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); 610 568 return 0; 611 569 } 612 570 613 - if (snap_size < page_off + len) 614 - len = snap_size - page_off; 571 + if (ceph_wbc.i_size < page_off + len) 572 + len = ceph_wbc.i_size - page_off; 615 573 616 - dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", 617 - inode, page, page->index, page_off, len, snapc); 574 + dout("writepage %p page %p index %lu on %llu~%u snapc %p seq %lld\n", 575 + inode, page, page->index, page_off, len, snapc, snapc->seq); 618 576 619 577 writeback_stat = atomic_long_inc_return(&fsc->writeback_count); 620 578 if (writeback_stat > ··· 620 582 set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); 621 583 622 584 set_page_writeback(page); 623 - err = ceph_osdc_writepages(osdc, ceph_vino(inode), 624 - &ci->i_layout, snapc, 625 - page_off, len, 626 - truncate_seq, truncate_size, 585 + err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode), 586 + &ci->i_layout, snapc, page_off, len, 587 + ceph_wbc.truncate_seq, 588 + ceph_wbc.truncate_size, 627 589 &inode->i_mtime, &page, 1); 628 590 if (err < 0) { 629 591 struct writeback_control tmp_wbc; ··· 784 746 struct ceph_inode_info *ci = ceph_inode(inode); 785 747 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 786 748 struct ceph_vino vino = ceph_vino(inode); 787 - pgoff_t index, start, end; 788 - int range_whole = 0; 789 - int should_loop = 1; 790 - pgoff_t max_pages = 0, max_pages_ever = 0; 749 + pgoff_t index, start_index, end = -1; 791 750 struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc; 792 751 struct pagevec pvec; 793 - int done = 0; 794 752 int rc = 0; 795 753 unsigned int wsize = i_blocksize(inode); 796 754 struct ceph_osd_request *req = NULL; 797 - int do_sync = 0; 798 - loff_t snap_size, i_size; 799 - u64 truncate_size; 800 - u32 truncate_seq; 755 + struct ceph_writeback_ctl ceph_wbc; 756 + bool should_loop, range_whole = false; 757 + bool stop, done = false; 801 758 802 - /* 803 - * Include a 'sync' in the OSD request if this is a data 804 - * integrity write (e.g., O_SYNC write or fsync()), or if our 805 - * cap is being revoked. 806 - */ 807 - if ((wbc->sync_mode == WB_SYNC_ALL) || 808 - ceph_caps_revoking(ci, CEPH_CAP_FILE_BUFFER)) 809 - do_sync = 1; 810 - dout("writepages_start %p dosync=%d (mode=%s)\n", 811 - inode, do_sync, 759 + dout("writepages_start %p (mode=%s)\n", inode, 812 760 wbc->sync_mode == WB_SYNC_NONE ? "NONE" : 813 761 (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); 814 762 ··· 807 783 mapping_set_error(mapping, -EIO); 808 784 return -EIO; /* we're in a forced umount, don't write! */ 809 785 } 810 - if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) 786 + if (fsc->mount_options->wsize < wsize) 811 787 wsize = fsc->mount_options->wsize; 812 - if (wsize < PAGE_SIZE) 813 - wsize = PAGE_SIZE; 814 - max_pages_ever = wsize >> PAGE_SHIFT; 815 788 816 789 pagevec_init(&pvec, 0); 817 790 818 - /* where to start/end? */ 819 - if (wbc->range_cyclic) { 820 - start = mapping->writeback_index; /* Start from prev offset */ 821 - end = -1; 822 - dout(" cyclic, start at %lu\n", start); 823 - } else { 824 - start = wbc->range_start >> PAGE_SHIFT; 825 - end = wbc->range_end >> PAGE_SHIFT; 826 - if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 827 - range_whole = 1; 828 - should_loop = 0; 829 - dout(" not cyclic, %lu to %lu\n", start, end); 830 - } 831 - index = start; 791 + start_index = wbc->range_cyclic ? mapping->writeback_index : 0; 792 + index = start_index; 832 793 833 794 retry: 834 795 /* find oldest snap context with dirty data */ 835 - ceph_put_snap_context(snapc); 836 - snap_size = -1; 837 - snapc = get_oldest_context(inode, &snap_size, 838 - &truncate_size, &truncate_seq); 796 + snapc = get_oldest_context(inode, &ceph_wbc, NULL); 839 797 if (!snapc) { 840 798 /* hmm, why does writepages get called when there 841 799 is no dirty data? */ ··· 827 821 dout(" oldest snapc is %p seq %lld (%d snaps)\n", 828 822 snapc, snapc->seq, snapc->num_snaps); 829 823 830 - i_size = i_size_read(inode); 831 - 832 - if (last_snapc && snapc != last_snapc) { 833 - /* if we switched to a newer snapc, restart our scan at the 834 - * start of the original file range. */ 835 - dout(" snapc differs from last pass, restarting at %lu\n", 836 - index); 837 - index = start; 824 + should_loop = false; 825 + if (ceph_wbc.head_snapc && snapc != last_snapc) { 826 + /* where to start/end? */ 827 + if (wbc->range_cyclic) { 828 + index = start_index; 829 + end = -1; 830 + if (index > 0) 831 + should_loop = true; 832 + dout(" cyclic, start at %lu\n", index); 833 + } else { 834 + index = wbc->range_start >> PAGE_SHIFT; 835 + end = wbc->range_end >> PAGE_SHIFT; 836 + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 837 + range_whole = true; 838 + dout(" not cyclic, %lu to %lu\n", index, end); 839 + } 840 + } else if (!ceph_wbc.head_snapc) { 841 + /* Do not respect wbc->range_{start,end}. Dirty pages 842 + * in that range can be associated with newer snapc. 843 + * They are not writeable until we write all dirty pages 844 + * associated with 'snapc' get written */ 845 + if (index > 0 || wbc->sync_mode != WB_SYNC_NONE) 846 + should_loop = true; 847 + dout(" non-head snapc, range whole\n"); 838 848 } 849 + 850 + ceph_put_snap_context(last_snapc); 839 851 last_snapc = snapc; 840 852 841 - while (!done && index <= end) { 842 - unsigned i; 843 - int first; 844 - pgoff_t strip_unit_end = 0; 853 + stop = false; 854 + while (!stop && index <= end) { 845 855 int num_ops = 0, op_idx; 846 - int pvec_pages, locked_pages = 0; 856 + unsigned i, pvec_pages, max_pages, locked_pages = 0; 847 857 struct page **pages = NULL, **data_pages; 848 858 mempool_t *pool = NULL; /* Becomes non-null if mempool used */ 849 859 struct page *page; 850 - int want; 860 + pgoff_t strip_unit_end = 0; 851 861 u64 offset = 0, len = 0; 852 862 853 - max_pages = max_pages_ever; 863 + max_pages = wsize >> PAGE_SHIFT; 854 864 855 865 get_more_pages: 856 - first = -1; 857 - want = min(end - index, 858 - min((pgoff_t)PAGEVEC_SIZE, 859 - max_pages - (pgoff_t)locked_pages) - 1) 860 - + 1; 866 + pvec_pages = min_t(unsigned, PAGEVEC_SIZE, 867 + max_pages - locked_pages); 868 + if (end - index < (u64)(pvec_pages - 1)) 869 + pvec_pages = (unsigned)(end - index) + 1; 870 + 861 871 pvec_pages = pagevec_lookup_tag(&pvec, mapping, &index, 862 872 PAGECACHE_TAG_DIRTY, 863 - want); 873 + pvec_pages); 864 874 dout("pagevec_lookup_tag got %d\n", pvec_pages); 865 875 if (!pvec_pages && !locked_pages) 866 876 break; ··· 893 871 unlikely(page->mapping != mapping)) { 894 872 dout("!dirty or !mapping %p\n", page); 895 873 unlock_page(page); 896 - break; 874 + continue; 897 875 } 898 - if (!wbc->range_cyclic && page->index > end) { 876 + if (page->index > end) { 899 877 dout("end of range %p\n", page); 900 - done = 1; 878 + /* can't be range_cyclic (1st pass) because 879 + * end == -1 in that case. */ 880 + stop = true; 881 + if (ceph_wbc.head_snapc) 882 + done = true; 901 883 unlock_page(page); 902 884 break; 903 885 } ··· 910 884 unlock_page(page); 911 885 break; 912 886 } 913 - if (wbc->sync_mode != WB_SYNC_NONE) { 914 - dout("waiting on writeback %p\n", page); 915 - wait_on_page_writeback(page); 916 - } 917 - if (page_offset(page) >= 918 - (snap_size == -1 ? i_size : snap_size)) { 919 - dout("%p page eof %llu\n", page, 920 - (snap_size == -1 ? i_size : snap_size)); 921 - done = 1; 887 + if (page_offset(page) >= ceph_wbc.i_size) { 888 + dout("%p page eof %llu\n", 889 + page, ceph_wbc.i_size); 890 + /* not done if range_cyclic */ 891 + stop = true; 922 892 unlock_page(page); 923 893 break; 924 894 } 925 895 if (PageWriteback(page)) { 926 - dout("%p under writeback\n", page); 927 - unlock_page(page); 928 - break; 896 + if (wbc->sync_mode == WB_SYNC_NONE) { 897 + dout("%p under writeback\n", page); 898 + unlock_page(page); 899 + continue; 900 + } 901 + dout("waiting on writeback %p\n", page); 902 + wait_on_page_writeback(page); 929 903 } 930 904 931 905 /* only if matching snap context */ 932 906 pgsnapc = page_snap_context(page); 933 - if (pgsnapc->seq > snapc->seq) { 934 - dout("page snapc %p %lld > oldest %p %lld\n", 907 + if (pgsnapc != snapc) { 908 + dout("page snapc %p %lld != oldest %p %lld\n", 935 909 pgsnapc, pgsnapc->seq, snapc, snapc->seq); 936 910 unlock_page(page); 937 - if (!locked_pages) 938 - continue; /* keep looking for snap */ 939 - break; 911 + continue; 940 912 } 941 913 942 914 if (!clear_page_dirty_for_io(page)) { 943 915 dout("%p !clear_page_dirty_for_io\n", page); 944 916 unlock_page(page); 945 - break; 917 + continue; 946 918 } 947 919 948 920 /* ··· 966 942 break; 967 943 } 968 944 969 - num_ops = 1 + do_sync; 945 + num_ops = 1; 970 946 strip_unit_end = page->index + 971 947 ((len - 1) >> PAGE_SHIFT); 972 948 ··· 996 972 } 997 973 998 974 /* note position of first page in pvec */ 999 - if (first < 0) 1000 - first = i; 1001 975 dout("%p will write page %p idx %lu\n", 1002 976 inode, page, page->index); 1003 977 ··· 1006 984 BLK_RW_ASYNC); 1007 985 } 1008 986 1009 - pages[locked_pages] = page; 1010 - locked_pages++; 987 + 988 + pages[locked_pages++] = page; 989 + pvec.pages[i] = NULL; 990 + 1011 991 len += PAGE_SIZE; 1012 992 } 1013 993 ··· 1017 993 if (!locked_pages) 1018 994 goto release_pvec_pages; 1019 995 if (i) { 1020 - int j; 1021 - BUG_ON(!locked_pages || first < 0); 996 + unsigned j, n = 0; 997 + /* shift unused page to beginning of pvec */ 998 + for (j = 0; j < pvec_pages; j++) { 999 + if (!pvec.pages[j]) 1000 + continue; 1001 + if (n < j) 1002 + pvec.pages[n] = pvec.pages[j]; 1003 + n++; 1004 + } 1005 + pvec.nr = n; 1022 1006 1023 1007 if (pvec_pages && i == pvec_pages && 1024 1008 locked_pages < max_pages) { 1025 1009 dout("reached end pvec, trying for more\n"); 1026 - pagevec_reinit(&pvec); 1010 + pagevec_release(&pvec); 1027 1011 goto get_more_pages; 1028 1012 } 1029 - 1030 - /* shift unused pages over in the pvec... we 1031 - * will need to release them below. */ 1032 - for (j = i; j < pvec_pages; j++) { 1033 - dout(" pvec leftover page %p\n", pvec.pages[j]); 1034 - pvec.pages[j-i+first] = pvec.pages[j]; 1035 - } 1036 - pvec.nr -= i-first; 1037 1013 } 1038 1014 1039 1015 new_request: ··· 1043 1019 req = ceph_osdc_new_request(&fsc->client->osdc, 1044 1020 &ci->i_layout, vino, 1045 1021 offset, &len, 0, num_ops, 1046 - CEPH_OSD_OP_WRITE, 1047 - CEPH_OSD_FLAG_WRITE, 1048 - snapc, truncate_seq, 1049 - truncate_size, false); 1022 + CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, 1023 + snapc, ceph_wbc.truncate_seq, 1024 + ceph_wbc.truncate_size, false); 1050 1025 if (IS_ERR(req)) { 1051 1026 req = ceph_osdc_new_request(&fsc->client->osdc, 1052 1027 &ci->i_layout, vino, ··· 1054 1031 CEPH_OSD_SLAB_OPS), 1055 1032 CEPH_OSD_OP_WRITE, 1056 1033 CEPH_OSD_FLAG_WRITE, 1057 - snapc, truncate_seq, 1058 - truncate_size, true); 1034 + snapc, ceph_wbc.truncate_seq, 1035 + ceph_wbc.truncate_size, true); 1059 1036 BUG_ON(IS_ERR(req)); 1060 1037 } 1061 1038 BUG_ON(len < page_offset(pages[locked_pages - 1]) + ··· 1071 1048 for (i = 0; i < locked_pages; i++) { 1072 1049 u64 cur_offset = page_offset(pages[i]); 1073 1050 if (offset + len != cur_offset) { 1074 - if (op_idx + do_sync + 1 == req->r_num_ops) 1051 + if (op_idx + 1 == req->r_num_ops) 1075 1052 break; 1076 1053 osd_req_op_extent_dup_last(req, op_idx, 1077 1054 cur_offset - offset); ··· 1092 1069 len += PAGE_SIZE; 1093 1070 } 1094 1071 1095 - if (snap_size != -1) { 1096 - len = min(len, snap_size - offset); 1072 + if (ceph_wbc.size_stable) { 1073 + len = min(len, ceph_wbc.i_size - offset); 1097 1074 } else if (i == locked_pages) { 1098 1075 /* writepages_finish() clears writeback pages 1099 1076 * according to the data length, so make sure 1100 1077 * data length covers all locked pages */ 1101 1078 u64 min_len = len + 1 - PAGE_SIZE; 1102 - len = min(len, (u64)i_size_read(inode) - offset); 1079 + len = get_writepages_data_length(inode, pages[i - 1], 1080 + offset); 1103 1081 len = max(len, min_len); 1104 1082 } 1105 1083 dout("writepages got pages at %llu~%llu\n", offset, len); ··· 1109 1085 0, !!pool, false); 1110 1086 osd_req_op_extent_update(req, op_idx, len); 1111 1087 1112 - if (do_sync) { 1113 - op_idx++; 1114 - osd_req_op_init(req, op_idx, CEPH_OSD_OP_STARTSYNC, 0); 1115 - } 1116 1088 BUG_ON(op_idx + 1 != req->r_num_ops); 1117 1089 1118 1090 pool = NULL; 1119 1091 if (i < locked_pages) { 1120 1092 BUG_ON(num_ops <= req->r_num_ops); 1121 1093 num_ops -= req->r_num_ops; 1122 - num_ops += do_sync; 1123 1094 locked_pages -= i; 1124 1095 1125 1096 /* allocate new pages array for next request */ ··· 1146 1127 if (pages) 1147 1128 goto new_request; 1148 1129 1149 - if (wbc->nr_to_write <= 0) 1150 - done = 1; 1130 + /* 1131 + * We stop writing back only if we are not doing 1132 + * integrity sync. In case of integrity sync we have to 1133 + * keep going until we have written all the pages 1134 + * we tagged for writeback prior to entering this loop. 1135 + */ 1136 + if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) 1137 + done = stop = true; 1151 1138 1152 1139 release_pvec_pages: 1153 1140 dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr, 1154 1141 pvec.nr ? pvec.pages[0] : NULL); 1155 1142 pagevec_release(&pvec); 1156 - 1157 - if (locked_pages && !done) 1158 - goto retry; 1159 1143 } 1160 1144 1161 1145 if (should_loop && !done) { 1162 1146 /* more to do; loop back to beginning of file */ 1163 1147 dout("writepages looping back to beginning of file\n"); 1164 - should_loop = 0; 1148 + end = start_index - 1; /* OK even when start_index == 0 */ 1149 + 1150 + /* to write dirty pages associated with next snapc, 1151 + * we need to wait until current writes complete */ 1152 + if (wbc->sync_mode != WB_SYNC_NONE && 1153 + start_index == 0 && /* all dirty pages were checked */ 1154 + !ceph_wbc.head_snapc) { 1155 + struct page *page; 1156 + unsigned i, nr; 1157 + index = 0; 1158 + while ((index <= end) && 1159 + (nr = pagevec_lookup_tag(&pvec, mapping, &index, 1160 + PAGECACHE_TAG_WRITEBACK, 1161 + PAGEVEC_SIZE))) { 1162 + for (i = 0; i < nr; i++) { 1163 + page = pvec.pages[i]; 1164 + if (page_snap_context(page) != snapc) 1165 + continue; 1166 + wait_on_page_writeback(page); 1167 + } 1168 + pagevec_release(&pvec); 1169 + cond_resched(); 1170 + } 1171 + } 1172 + 1173 + start_index = 0; 1165 1174 index = 0; 1166 1175 goto retry; 1167 1176 } ··· 1199 1152 1200 1153 out: 1201 1154 ceph_osdc_put_request(req); 1202 - ceph_put_snap_context(snapc); 1203 - dout("writepages done, rc = %d\n", rc); 1155 + ceph_put_snap_context(last_snapc); 1156 + dout("writepages dend - startone, rc = %d\n", rc); 1204 1157 return rc; 1205 1158 } 1206 1159 ··· 1212 1165 static int context_is_writeable_or_written(struct inode *inode, 1213 1166 struct ceph_snap_context *snapc) 1214 1167 { 1215 - struct ceph_snap_context *oldest = get_oldest_context(inode, NULL, 1216 - NULL, NULL); 1168 + struct ceph_snap_context *oldest = get_oldest_context(inode, NULL, NULL); 1217 1169 int ret = !oldest || snapc->seq <= oldest->seq; 1218 1170 1219 1171 ceph_put_snap_context(oldest); ··· 1257 1211 * this page is already dirty in another (older) snap 1258 1212 * context! is it writeable now? 1259 1213 */ 1260 - oldest = get_oldest_context(inode, NULL, NULL, NULL); 1261 - 1214 + oldest = get_oldest_context(inode, NULL, NULL); 1262 1215 if (snapc->seq > oldest->seq) { 1263 1216 ceph_put_snap_context(oldest); 1264 1217 dout(" page %p snapc %p not current or oldest\n",

+1 -1

fs/ceph/cache.c

··· 209 209 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 210 210 211 211 /* No caching for filesystem */ 212 - if (fsc->fscache == NULL) 212 + if (!fsc->fscache) 213 213 return; 214 214 215 215 /* Only cache for regular files that are read only */

+23 -17

fs/ceph/caps.c

··· 490 490 } 491 491 492 492 /* 493 - * if we are newly issued FILE_SHARED, mark dir not complete; we 494 - * don't know what happened to this directory while we didn't 495 - * have the cap. 493 + * If FILE_SHARED is newly issued, mark dir not complete. We don't 494 + * know what happened to this directory while we didn't have the cap. 495 + * If FILE_SHARED is being revoked, also mark dir not complete. It 496 + * stops on-going cached readdir. 496 497 */ 497 - if ((issued & CEPH_CAP_FILE_SHARED) && 498 - (had & CEPH_CAP_FILE_SHARED) == 0) { 499 - ci->i_shared_gen++; 498 + if ((issued & CEPH_CAP_FILE_SHARED) != (had & CEPH_CAP_FILE_SHARED)) { 499 + if (issued & CEPH_CAP_FILE_SHARED) 500 + ci->i_shared_gen++; 500 501 if (S_ISDIR(ci->vfs_inode.i_mode)) { 501 502 dout(" marking %p NOT complete\n", &ci->vfs_inode); 502 503 __ceph_dir_clear_complete(ci); ··· 612 611 } 613 612 614 613 if (flags & CEPH_CAP_FLAG_AUTH) { 615 - if (ci->i_auth_cap == NULL || 614 + if (!ci->i_auth_cap || 616 615 ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) { 617 616 ci->i_auth_cap = cap; 618 617 cap->mds_wanted = wanted; ··· 729 728 struct ceph_mds_session *s = cap->session; 730 729 731 730 spin_lock(&s->s_cap_lock); 732 - if (s->s_cap_iterator == NULL) { 731 + if (!s->s_cap_iterator) { 733 732 dout("__touch_cap %p cap %p mds%d\n", &cap->ci->vfs_inode, cap, 734 733 s->s_mds); 735 734 list_move_tail(&cap->session_caps, &s->s_caps); ··· 1249 1248 arg.mode = inode->i_mode; 1250 1249 1251 1250 arg.inline_data = ci->i_inline_version != CEPH_INLINE_NONE; 1252 - arg.flags = 0; 1251 + if (list_empty(&ci->i_cap_snaps)) 1252 + arg.flags = CEPH_CLIENT_CAPS_NO_CAPSNAP; 1253 + else 1254 + arg.flags = CEPH_CLIENT_CAPS_PENDING_CAPSNAP; 1253 1255 if (sync) 1254 1256 arg.flags |= CEPH_CLIENT_CAPS_SYNC; 1255 1257 ··· 1458 1454 goto retry; 1459 1455 } 1460 1456 1457 + // make sure flushsnap messages are sent in proper order. 1458 + if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) { 1459 + __kick_flushing_caps(mdsc, session, ci, 0); 1460 + ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; 1461 + } 1462 + 1461 1463 __ceph_flush_snaps(ci, session); 1462 1464 out: 1463 1465 spin_unlock(&ci->i_ceph_lock); 1464 1466 1465 1467 if (psession) { 1466 1468 *psession = session; 1467 - } else { 1469 + } else if (session) { 1468 1470 mutex_unlock(&session->s_mutex); 1469 1471 ceph_put_mds_session(session); 1470 1472 } ··· 1911 1901 (ci->i_ceph_flags & 1912 1902 (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS))) { 1913 1903 if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) { 1914 - spin_lock(&mdsc->cap_dirty_lock); 1915 - oldest_flush_tid = __get_oldest_flush_tid(mdsc); 1916 - spin_unlock(&mdsc->cap_dirty_lock); 1917 - __kick_flushing_caps(mdsc, session, ci, 1918 - oldest_flush_tid); 1904 + __kick_flushing_caps(mdsc, session, ci, 0); 1919 1905 ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; 1920 1906 } 1921 1907 if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) ··· 2116 2110 2117 2111 dout("fsync %p%s\n", inode, datasync ? " datasync" : ""); 2118 2112 2119 - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 2113 + ret = file_write_and_wait_range(file, start, end); 2120 2114 if (ret < 0) 2121 2115 goto out; 2122 2116 ··· 3428 3422 tcap = __get_cap_for_mds(ci, target); 3429 3423 if (tcap) { 3430 3424 /* already have caps from the target */ 3431 - if (tcap->cap_id != t_cap_id || 3425 + if (tcap->cap_id == t_cap_id && 3432 3426 ceph_seq_cmp(tcap->seq, t_seq) < 0) { 3433 3427 dout(" updating import cap %p mds%d\n", tcap, target); 3434 3428 tcap->cap_id = t_cap_id;

+1 -1

fs/ceph/debugfs.c

··· 24 24 struct ceph_fs_client *fsc = s->private; 25 25 struct ceph_mdsmap *mdsmap; 26 26 27 - if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL) 27 + if (!fsc->mdsc || !fsc->mdsc->mdsmap) 28 28 return 0; 29 29 mdsmap = fsc->mdsc->mdsmap; 30 30 seq_printf(s, "epoch %d\n", mdsmap->m_epoch);

+4 -2

fs/ceph/dir.c

··· 377 377 } 378 378 /* hints to request -> mds selection code */ 379 379 req->r_direct_mode = USE_AUTH_MDS; 380 - req->r_direct_hash = ceph_frag_value(frag); 381 - __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); 380 + if (op == CEPH_MDS_OP_READDIR) { 381 + req->r_direct_hash = ceph_frag_value(frag); 382 + __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); 383 + } 382 384 if (fi->last_name) { 383 385 req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL); 384 386 if (!req->r_path2) {

+25 -25

fs/ceph/file.c

··· 175 175 dout("init_file %p %p 0%o (regular)\n", inode, file, 176 176 inode->i_mode); 177 177 cf = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); 178 - if (cf == NULL) { 178 + if (!cf) { 179 179 ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ 180 180 return -ENOMEM; 181 181 } ··· 562 562 ssize_t ret; 563 563 size_t len = iov_iter_count(to); 564 564 565 - dout("sync_read on file %p %llu~%u %s\n", file, off, 566 - (unsigned)len, 565 + dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len, 567 566 (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); 568 567 569 568 if (!len) ··· 787 788 goto out; 788 789 } 789 790 790 - req->r_flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE; 791 + req->r_flags = /* CEPH_OSD_FLAG_ORDERSNAP | */ CEPH_OSD_FLAG_WRITE; 791 792 ceph_oloc_copy(&req->r_base_oloc, &orig_req->r_base_oloc); 792 793 ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid); 793 794 ··· 799 800 } 800 801 801 802 req->r_ops[0] = orig_req->r_ops[0]; 802 - osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); 803 803 804 804 req->r_mtime = aio_req->mtime; 805 805 req->r_data_offset = req->r_ops[0].extent.offset; ··· 845 847 if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP) 846 848 return -EROFS; 847 849 848 - dout("sync_direct_read_write (%s) on file %p %lld~%u\n", 849 - (write ? "write" : "read"), file, pos, (unsigned)count); 850 + dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n", 851 + (write ? "write" : "read"), file, pos, (unsigned)count, 852 + snapc, snapc->seq); 850 853 851 854 ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count); 852 855 if (ret < 0) ··· 860 861 if (ret2 < 0) 861 862 dout("invalidate_inode_pages2_range returned %d\n", ret2); 862 863 863 - flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE; 864 + flags = /* CEPH_OSD_FLAG_ORDERSNAP | */ CEPH_OSD_FLAG_WRITE; 864 865 } else { 865 866 flags = CEPH_OSD_FLAG_READ; 866 867 } ··· 873 874 vino = ceph_vino(inode); 874 875 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 875 876 vino, pos, &size, 0, 876 - /*include a 'startsync' command*/ 877 - write ? 2 : 1, 877 + 1, 878 878 write ? CEPH_OSD_OP_WRITE : 879 879 CEPH_OSD_OP_READ, 880 880 flags, snapc, ··· 884 886 ret = PTR_ERR(req); 885 887 break; 886 888 } 889 + 890 + if (write) 891 + size = min_t(u64, size, fsc->mount_options->wsize); 892 + else 893 + size = min_t(u64, size, fsc->mount_options->rsize); 887 894 888 895 len = size; 889 896 pages = dio_get_pages_alloc(iter, len, &start, &num_pages); ··· 925 922 truncate_inode_pages_range(inode->i_mapping, pos, 926 923 (pos+len) | (PAGE_SIZE - 1)); 927 924 928 - osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); 929 925 req->r_mtime = mtime; 930 926 } 931 927 ··· 1050 1048 if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) 1051 1049 return -EROFS; 1052 1050 1053 - dout("sync_write on file %p %lld~%u\n", file, pos, (unsigned)count); 1051 + dout("sync_write on file %p %lld~%u snapc %p seq %lld\n", 1052 + file, pos, (unsigned)count, snapc, snapc->seq); 1054 1053 1055 1054 ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count); 1056 1055 if (ret < 0) ··· 1063 1060 if (ret < 0) 1064 1061 dout("invalidate_inode_pages2_range returned %d\n", ret); 1065 1062 1066 - flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE; 1063 + flags = /* CEPH_OSD_FLAG_ORDERSNAP | */ CEPH_OSD_FLAG_WRITE; 1067 1064 1068 1065 while ((len = iov_iter_count(from)) > 0) { 1069 1066 size_t left; ··· 1310 1307 if (!prealloc_cf) 1311 1308 return -ENOMEM; 1312 1309 1310 + retry_snap: 1313 1311 inode_lock(inode); 1314 1312 1315 1313 /* We can write back this queue in page reclaim */ ··· 1342 1338 goto out; 1343 1339 } 1344 1340 1345 - retry_snap: 1346 1341 /* FIXME: not complete since it doesn't account for being at quota */ 1347 1342 if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) { 1348 1343 err = -ENOSPC; ··· 1390 1387 &prealloc_cf); 1391 1388 else 1392 1389 written = ceph_sync_write(iocb, &data, pos, snapc); 1393 - if (written == -EOLDSNAPC) { 1394 - dout("aio_write %p %llx.%llx %llu~%u" 1395 - "got EOLDSNAPC, retrying\n", 1396 - inode, ceph_vinop(inode), 1397 - pos, (unsigned)count); 1398 - inode_lock(inode); 1399 - goto retry_snap; 1400 - } 1401 1390 if (written > 0) 1402 1391 iov_iter_advance(from, written); 1403 1392 ceph_put_snap_context(snapc); ··· 1423 1428 ceph_cap_string(got)); 1424 1429 ceph_put_cap_refs(ci, got); 1425 1430 1431 + if (written == -EOLDSNAPC) { 1432 + dout("aio_write %p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n", 1433 + inode, ceph_vinop(inode), pos, (unsigned)count); 1434 + goto retry_snap; 1435 + } 1436 + 1426 1437 if (written >= 0) { 1427 1438 if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_NEARFULL)) 1428 1439 iocb->ki_flags |= IOCB_DSYNC; 1429 - 1430 1440 written = generic_write_sync(iocb, written); 1431 1441 } 1432 1442 ··· 1481 1481 offset += file->f_pos; 1482 1482 break; 1483 1483 case SEEK_DATA: 1484 - if (offset >= i_size) { 1484 + if (offset < 0 || offset >= i_size) { 1485 1485 ret = -ENXIO; 1486 1486 goto out; 1487 1487 } 1488 1488 break; 1489 1489 case SEEK_HOLE: 1490 - if (offset >= i_size) { 1490 + if (offset < 0 || offset >= i_size) { 1491 1491 ret = -ENXIO; 1492 1492 goto out; 1493 1493 }

+28 -25

fs/ceph/inode.c

··· 52 52 ino_t t = ceph_vino_to_ino(vino); 53 53 54 54 inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino); 55 - if (inode == NULL) 55 + if (!inode) 56 56 return ERR_PTR(-ENOMEM); 57 57 if (inode->i_state & I_NEW) { 58 58 dout("get_inode created new inode %p %llx.%llx ino %llx\n", ··· 133 133 } 134 134 135 135 frag = kmalloc(sizeof(*frag), GFP_NOFS); 136 - if (!frag) { 137 - pr_err("__get_or_create_frag ENOMEM on %p %llx.%llx " 138 - "frag %x\n", &ci->vfs_inode, 139 - ceph_vinop(&ci->vfs_inode), f); 136 + if (!frag) 140 137 return ERR_PTR(-ENOMEM); 141 - } 138 + 142 139 frag->frag = f; 143 140 frag->split_by = 0; 144 141 frag->mds = -1; ··· 1067 1070 spin_unlock(&dentry->d_lock); 1068 1071 if (old_lease_session) 1069 1072 ceph_put_mds_session(old_lease_session); 1070 - return; 1071 1073 } 1072 1074 1073 1075 /* ··· 1173 1177 dn = d_alloc(parent, &dname); 1174 1178 dout("d_alloc %p '%.*s' = %p\n", parent, 1175 1179 dname.len, dname.name, dn); 1176 - if (dn == NULL) { 1180 + if (!dn) { 1177 1181 dput(parent); 1178 1182 err = -ENOMEM; 1179 1183 goto done; ··· 1473 1477 struct dentry *dn; 1474 1478 struct inode *in; 1475 1479 int err = 0, skipped = 0, ret, i; 1476 - struct inode *snapdir = NULL; 1477 1480 struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; 1478 1481 u32 frag = le32_to_cpu(rhead->args.readdir.frag); 1479 1482 u32 last_hash = 0; ··· 1505 1510 } 1506 1511 1507 1512 if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) { 1508 - snapdir = ceph_get_snapdir(d_inode(parent)); 1509 - parent = d_find_alias(snapdir); 1510 1513 dout("readdir_prepopulate %d items under SNAPDIR dn %p\n", 1511 1514 rinfo->dir_nr, parent); 1512 1515 } else { ··· 1512 1519 rinfo->dir_nr, parent); 1513 1520 if (rinfo->dir_dir) 1514 1521 ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir); 1515 - } 1516 1522 1517 - if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 && 1518 - !(rinfo->hash_order && last_hash)) { 1519 - /* note dir version at start of readdir so we can tell 1520 - * if any dentries get dropped */ 1521 - req->r_dir_release_cnt = atomic64_read(&ci->i_release_count); 1522 - req->r_dir_ordered_cnt = atomic64_read(&ci->i_ordered_count); 1523 - req->r_readdir_cache_idx = 0; 1523 + if (ceph_frag_is_leftmost(frag) && 1524 + req->r_readdir_offset == 2 && 1525 + !(rinfo->hash_order && last_hash)) { 1526 + /* note dir version at start of readdir so we can 1527 + * tell if any dentries get dropped */ 1528 + req->r_dir_release_cnt = 1529 + atomic64_read(&ci->i_release_count); 1530 + req->r_dir_ordered_cnt = 1531 + atomic64_read(&ci->i_ordered_count); 1532 + req->r_readdir_cache_idx = 0; 1533 + } 1524 1534 } 1525 1535 1526 1536 cache_ctl.index = req->r_readdir_cache_idx; ··· 1562 1566 dn = d_alloc(parent, &dname); 1563 1567 dout("d_alloc %p '%.*s' = %p\n", parent, 1564 1568 dname.len, dname.name, dn); 1565 - if (dn == NULL) { 1569 + if (!dn) { 1566 1570 dout("d_alloc badness\n"); 1567 1571 err = -ENOMEM; 1568 1572 goto out; ··· 1646 1650 req->r_readdir_cache_idx = cache_ctl.index; 1647 1651 } 1648 1652 ceph_readdir_cache_release(&cache_ctl); 1649 - if (snapdir) { 1650 - iput(snapdir); 1651 - dput(parent); 1652 - } 1653 1653 dout("readdir_prepopulate done\n"); 1654 1654 return err; 1655 1655 } ··· 1833 1841 * possibly truncate them.. so write AND block! 1834 1842 */ 1835 1843 if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) { 1844 + struct ceph_cap_snap *capsnap; 1845 + to = ci->i_truncate_size; 1846 + list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 1847 + // MDS should have revoked Frw caps 1848 + WARN_ON_ONCE(capsnap->writing); 1849 + if (capsnap->dirty_pages && capsnap->size > to) 1850 + to = capsnap->size; 1851 + } 1852 + spin_unlock(&ci->i_ceph_lock); 1836 1853 dout("__do_pending_vmtruncate %p flushing snaps first\n", 1837 1854 inode); 1838 - spin_unlock(&ci->i_ceph_lock); 1855 + 1856 + truncate_pagecache(inode, to); 1857 + 1839 1858 filemap_write_and_wait_range(&inode->i_data, 0, 1840 1859 inode->i_sb->s_maxbytes); 1841 1860 goto retry;

+22 -15

fs/ceph/mds_client.c

··· 408 408 { 409 409 struct ceph_mds_session *session; 410 410 411 - if (mds >= mdsc->max_sessions || mdsc->sessions[mds] == NULL) 411 + if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) 412 412 return NULL; 413 413 session = mdsc->sessions[mds]; 414 414 dout("lookup_mds_session %p %d\n", session, ··· 483 483 484 484 dout("register_session realloc to %d\n", newmax); 485 485 sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); 486 - if (sa == NULL) 486 + if (!sa) 487 487 goto fail_realloc; 488 488 if (mdsc->sessions) { 489 489 memcpy(sa, mdsc->sessions, ··· 731 731 732 732 inode = NULL; 733 733 if (req->r_inode) { 734 - inode = req->r_inode; 735 - ihold(inode); 736 - } else if (req->r_dentry) { 734 + if (ceph_snap(req->r_inode) != CEPH_SNAPDIR) { 735 + inode = req->r_inode; 736 + ihold(inode); 737 + } else { 738 + /* req->r_dentry is non-null for LSSNAP request. 739 + * fall-thru */ 740 + WARN_ON_ONCE(!req->r_dentry); 741 + } 742 + } 743 + if (!inode && req->r_dentry) { 737 744 /* ignore race with rename; old or new d_parent is okay */ 738 745 struct dentry *parent; 739 746 struct inode *dir; ··· 893 886 894 887 /* Calculate serialized length of metadata */ 895 888 metadata_bytes = 4; /* map length */ 896 - for (i = 0; metadata[i][0] != NULL; ++i) { 889 + for (i = 0; metadata[i][0]; ++i) { 897 890 metadata_bytes += 8 + strlen(metadata[i][0]) + 898 891 strlen(metadata[i][1]); 899 892 metadata_key_count++; ··· 926 919 ceph_encode_32(&p, metadata_key_count); 927 920 928 921 /* Two length-prefixed strings for each entry in the map */ 929 - for (i = 0; metadata[i][0] != NULL; ++i) { 922 + for (i = 0; metadata[i][0]; ++i) { 930 923 size_t const key_len = strlen(metadata[i][0]); 931 924 size_t const val_len = strlen(metadata[i][1]); 932 925 ··· 1129 1122 1130 1123 spin_lock(&session->s_cap_lock); 1131 1124 p = p->next; 1132 - if (cap->ci == NULL) { 1125 + if (!cap->ci) { 1133 1126 dout("iterate_session_caps finishing cap %p removal\n", 1134 1127 cap); 1135 1128 BUG_ON(cap->session != session); ··· 1755 1748 int len, pos; 1756 1749 unsigned seq; 1757 1750 1758 - if (dentry == NULL) 1751 + if (!dentry) 1759 1752 return ERR_PTR(-EINVAL); 1760 1753 1761 1754 retry: ··· 1778 1771 len--; /* no leading '/' */ 1779 1772 1780 1773 path = kmalloc(len+1, GFP_NOFS); 1781 - if (path == NULL) 1774 + if (!path) 1782 1775 return ERR_PTR(-ENOMEM); 1783 1776 pos = len; 1784 1777 path[pos] = 0; /* trailing null */ ··· 2882 2875 } 2883 2876 2884 2877 if (list_empty(&ci->i_cap_snaps)) { 2885 - snap_follows = 0; 2878 + snap_follows = ci->i_head_snapc ? ci->i_head_snapc->seq : 0; 2886 2879 } else { 2887 2880 struct ceph_cap_snap *capsnap = 2888 2881 list_first_entry(&ci->i_cap_snaps, ··· 3140 3133 newmap->m_epoch, oldmap->m_epoch); 3141 3134 3142 3135 for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) { 3143 - if (mdsc->sessions[i] == NULL) 3136 + if (!mdsc->sessions[i]) 3144 3137 continue; 3145 3138 s = mdsc->sessions[i]; 3146 3139 oldstate = ceph_mdsmap_get_state(oldmap, i); ··· 3287 3280 mutex_lock(&session->s_mutex); 3288 3281 session->s_seq++; 3289 3282 3290 - if (inode == NULL) { 3283 + if (!inode) { 3291 3284 dout("handle_lease no inode %llx\n", vino.ino); 3292 3285 goto release; 3293 3286 } ··· 3445 3438 3446 3439 for (i = 0; i < mdsc->max_sessions; i++) { 3447 3440 struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i); 3448 - if (s == NULL) 3441 + if (!s) 3449 3442 continue; 3450 3443 if (s->s_state == CEPH_MDS_SESSION_CLOSING) { 3451 3444 dout("resending session close request for mds%d\n", ··· 3497 3490 fsc->mdsc = mdsc; 3498 3491 mutex_init(&mdsc->mutex); 3499 3492 mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); 3500 - if (mdsc->mdsmap == NULL) { 3493 + if (!mdsc->mdsmap) { 3501 3494 kfree(mdsc); 3502 3495 return -ENOMEM; 3503 3496 }

+3 -3

fs/ceph/mdsmap.c

··· 112 112 u16 mdsmap_ev; 113 113 114 114 m = kzalloc(sizeof(*m), GFP_NOFS); 115 - if (m == NULL) 115 + if (!m) 116 116 return ERR_PTR(-ENOMEM); 117 117 118 118 ceph_decode_need(p, end, 1 + 1, bad); ··· 138 138 m->m_num_mds = m->m_max_mds; 139 139 140 140 m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS); 141 - if (m->m_info == NULL) 141 + if (!m->m_info) 142 142 goto nomem; 143 143 144 144 /* pick out active nodes from mds_info (state > 0) */ ··· 232 232 if (num_export_targets) { 233 233 info->export_targets = kcalloc(num_export_targets, 234 234 sizeof(u32), GFP_NOFS); 235 - if (info->export_targets == NULL) 235 + if (!info->export_targets) 236 236 goto nomem; 237 237 for (j = 0; j < num_export_targets; j++) 238 238 info->export_targets[j] =

+16 -21

fs/ceph/snap.c

··· 299 299 /* 300 300 * build the snap context for a given realm. 301 301 */ 302 - static int build_snap_context(struct ceph_snap_realm *realm) 302 + static int build_snap_context(struct ceph_snap_realm *realm, 303 + struct list_head* dirty_realms) 303 304 { 304 305 struct ceph_snap_realm *parent = realm->parent; 305 306 struct ceph_snap_context *snapc; ··· 314 313 */ 315 314 if (parent) { 316 315 if (!parent->cached_context) { 317 - err = build_snap_context(parent); 316 + err = build_snap_context(parent, dirty_realms); 318 317 if (err) 319 318 goto fail; 320 319 } ··· 333 332 " (unchanged)\n", 334 333 realm->ino, realm, realm->cached_context, 335 334 realm->cached_context->seq, 336 - (unsigned int) realm->cached_context->num_snaps); 335 + (unsigned int)realm->cached_context->num_snaps); 337 336 return 0; 338 337 } 339 338 ··· 374 373 realm->ino, realm, snapc, snapc->seq, 375 374 (unsigned int) snapc->num_snaps); 376 375 377 - ceph_put_snap_context(realm->cached_context); 376 + if (realm->cached_context) { 377 + ceph_put_snap_context(realm->cached_context); 378 + /* queue realm for cap_snap creation */ 379 + list_add_tail(&realm->dirty_item, dirty_realms); 380 + } 378 381 realm->cached_context = snapc; 379 382 return 0; 380 383 ··· 399 394 /* 400 395 * rebuild snap context for the given realm and all of its children. 401 396 */ 402 - static void rebuild_snap_realms(struct ceph_snap_realm *realm) 397 + static void rebuild_snap_realms(struct ceph_snap_realm *realm, 398 + struct list_head *dirty_realms) 403 399 { 404 400 struct ceph_snap_realm *child; 405 401 406 402 dout("rebuild_snap_realms %llx %p\n", realm->ino, realm); 407 - build_snap_context(realm); 403 + build_snap_context(realm, dirty_realms); 408 404 409 405 list_for_each_entry(child, &realm->children, child_item) 410 - rebuild_snap_realms(child); 406 + rebuild_snap_realms(child, dirty_realms); 411 407 } 412 408 413 409 ··· 630 624 { 631 625 struct ceph_inode_info *ci; 632 626 struct inode *lastinode = NULL; 633 - struct ceph_snap_realm *child; 634 627 635 628 dout("queue_realm_cap_snaps %p %llx inodes\n", realm, realm->ino); 636 629 637 630 spin_lock(&realm->inodes_with_caps_lock); 638 - list_for_each_entry(ci, &realm->inodes_with_caps, 639 - i_snap_realm_item) { 631 + list_for_each_entry(ci, &realm->inodes_with_caps, i_snap_realm_item) { 640 632 struct inode *inode = igrab(&ci->vfs_inode); 641 633 if (!inode) 642 634 continue; ··· 647 643 spin_unlock(&realm->inodes_with_caps_lock); 648 644 iput(lastinode); 649 645 650 - list_for_each_entry(child, &realm->children, child_item) { 651 - dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n", 652 - realm, realm->ino, child, child->ino); 653 - list_del_init(&child->dirty_item); 654 - list_add(&child->dirty_item, &realm->dirty_item); 655 - } 656 - 657 - list_del_init(&realm->dirty_item); 658 646 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); 659 647 } 660 648 ··· 717 721 if (err < 0) 718 722 goto fail; 719 723 720 - /* queue realm for cap_snap creation */ 721 - list_add(&realm->dirty_item, &dirty_realms); 722 724 if (realm->seq > mdsc->last_snap_seq) 723 725 mdsc->last_snap_seq = realm->seq; 724 726 ··· 735 741 736 742 /* invalidate when we reach the _end_ (root) of the trace */ 737 743 if (invalidate && p >= e) 738 - rebuild_snap_realms(realm); 744 + rebuild_snap_realms(realm, &dirty_realms); 739 745 740 746 if (!first_realm) 741 747 first_realm = realm; ··· 752 758 while (!list_empty(&dirty_realms)) { 753 759 realm = list_first_entry(&dirty_realms, struct ceph_snap_realm, 754 760 dirty_item); 761 + list_del_init(&realm->dirty_item); 755 762 queue_realm_cap_snaps(realm); 756 763 } 757 764

+42 -34

fs/ceph/super.c

··· 49 49 struct ceph_statfs st; 50 50 u64 fsid; 51 51 int err; 52 + u64 data_pool; 53 + 54 + if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 55 + data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 56 + } else { 57 + data_pool = CEPH_NOPOOL; 58 + } 52 59 53 60 dout("statfs\n"); 54 - err = ceph_monc_do_statfs(&fsc->client->monc, &st); 61 + err = ceph_monc_do_statfs(&fsc->client->monc, data_pool, &st); 55 62 if (err < 0) 56 63 return err; 57 64 ··· 120 113 Opt_rasize, 121 114 Opt_caps_wanted_delay_min, 122 115 Opt_caps_wanted_delay_max, 123 - Opt_cap_release_safety, 124 116 Opt_readdir_max_entries, 125 117 Opt_readdir_max_bytes, 126 118 Opt_congestion_kb, ··· 158 152 {Opt_rasize, "rasize=%d"}, 159 153 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 160 154 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 161 - {Opt_cap_release_safety, "cap_release_safety=%d"}, 162 155 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 163 156 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, 164 157 {Opt_congestion_kb, "write_congestion_kb=%d"}, ··· 240 235 break; 241 236 /* misc */ 242 237 case Opt_wsize: 243 - fsopt->wsize = intval; 238 + if (intval < PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE) 239 + return -EINVAL; 240 + fsopt->wsize = ALIGN(intval, PAGE_SIZE); 244 241 break; 245 242 case Opt_rsize: 246 - fsopt->rsize = intval; 243 + if (intval < PAGE_SIZE || intval > CEPH_MAX_READ_SIZE) 244 + return -EINVAL; 245 + fsopt->rsize = ALIGN(intval, PAGE_SIZE); 247 246 break; 248 247 case Opt_rasize: 249 - fsopt->rasize = intval; 248 + if (intval < 0) 249 + return -EINVAL; 250 + fsopt->rasize = ALIGN(intval + PAGE_SIZE - 1, PAGE_SIZE); 250 251 break; 251 252 case Opt_caps_wanted_delay_min: 253 + if (intval < 1) 254 + return -EINVAL; 252 255 fsopt->caps_wanted_delay_min = intval; 253 256 break; 254 257 case Opt_caps_wanted_delay_max: 258 + if (intval < 1) 259 + return -EINVAL; 255 260 fsopt->caps_wanted_delay_max = intval; 256 261 break; 257 262 case Opt_readdir_max_entries: 263 + if (intval < 1) 264 + return -EINVAL; 258 265 fsopt->max_readdir = intval; 259 266 break; 260 267 case Opt_readdir_max_bytes: 268 + if (intval < PAGE_SIZE && intval != 0) 269 + return -EINVAL; 261 270 fsopt->max_readdir_bytes = intval; 262 271 break; 263 272 case Opt_congestion_kb: 273 + if (intval < 1024) /* at least 1M */ 274 + return -EINVAL; 264 275 fsopt->congestion_kb = intval; 265 276 break; 266 277 case Opt_dirstat: ··· 413 392 fsopt->sb_flags = flags; 414 393 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 415 394 416 - fsopt->rsize = CEPH_RSIZE_DEFAULT; 395 + fsopt->wsize = CEPH_MAX_WRITE_SIZE; 396 + fsopt->rsize = CEPH_MAX_READ_SIZE; 417 397 fsopt->rasize = CEPH_RASIZE_DEFAULT; 418 398 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 419 399 if (!fsopt->snapdir_name) { ··· 424 402 425 403 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 426 404 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 427 - fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; 428 405 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 429 406 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 430 407 fsopt->congestion_kb = default_congestion_kb(); ··· 529 508 seq_printf(m, ",mds_namespace=%s", fsopt->mds_namespace); 530 509 if (fsopt->wsize) 531 510 seq_printf(m, ",wsize=%d", fsopt->wsize); 532 - if (fsopt->rsize != CEPH_RSIZE_DEFAULT) 511 + if (fsopt->rsize != CEPH_MAX_READ_SIZE) 533 512 seq_printf(m, ",rsize=%d", fsopt->rsize); 534 513 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 535 514 seq_printf(m, ",rasize=%d", fsopt->rasize); ··· 541 520 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 542 521 seq_printf(m, ",caps_wanted_delay_max=%d", 543 522 fsopt->caps_wanted_delay_max); 544 - if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) 545 - seq_printf(m, ",cap_release_safety=%d", 546 - fsopt->cap_release_safety); 547 523 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 548 524 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); 549 525 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) ··· 594 576 } 595 577 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 596 578 597 - if (fsopt->mds_namespace == NULL) { 579 + if (!fsopt->mds_namespace) { 598 580 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 599 581 0, true); 600 582 } else { ··· 615 597 * to be processed in parallel, limit concurrency. 616 598 */ 617 599 fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); 618 - if (fsc->wb_wq == NULL) 600 + if (!fsc->wb_wq) 619 601 goto fail_client; 620 602 fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); 621 - if (fsc->pg_inv_wq == NULL) 603 + if (!fsc->pg_inv_wq) 622 604 goto fail_wb_wq; 623 605 fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1); 624 - if (fsc->trunc_wq == NULL) 606 + if (!fsc->trunc_wq) 625 607 goto fail_pg_inv_wq; 626 608 627 609 /* set up mempools */ ··· 692 674 __alignof__(struct ceph_inode_info), 693 675 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 694 676 SLAB_ACCOUNT, ceph_inode_init_once); 695 - if (ceph_inode_cachep == NULL) 677 + if (!ceph_inode_cachep) 696 678 return -ENOMEM; 697 679 698 680 ceph_cap_cachep = KMEM_CACHE(ceph_cap, 699 681 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 700 - if (ceph_cap_cachep == NULL) 682 + if (!ceph_cap_cachep) 701 683 goto bad_cap; 702 684 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 703 685 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 704 - if (ceph_cap_flush_cachep == NULL) 686 + if (!ceph_cap_flush_cachep) 705 687 goto bad_cap_flush; 706 688 707 689 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 708 690 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 709 - if (ceph_dentry_cachep == NULL) 691 + if (!ceph_dentry_cachep) 710 692 goto bad_dentry; 711 693 712 694 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 713 695 714 - if (ceph_file_cachep == NULL) 696 + if (!ceph_file_cachep) 715 697 goto bad_file; 716 698 717 699 if ((error = ceph_fscache_register())) ··· 965 947 return err; 966 948 967 949 /* set ra_pages based on rasize mount option? */ 968 - if (fsc->mount_options->rasize >= PAGE_SIZE) 969 - sb->s_bdi->ra_pages = 970 - (fsc->mount_options->rasize + PAGE_SIZE - 1) 971 - >> PAGE_SHIFT; 972 - else 973 - sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; 950 + sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 974 951 975 - if (fsc->mount_options->rsize > fsc->mount_options->rasize && 976 - fsc->mount_options->rsize >= PAGE_SIZE) 977 - sb->s_bdi->io_pages = 978 - (fsc->mount_options->rsize + PAGE_SIZE - 1) 979 - >> PAGE_SHIFT; 980 - else if (fsc->mount_options->rsize == 0) 981 - sb->s_bdi->io_pages = ULONG_MAX; 952 + /* set io_pages based on max osd read size */ 953 + sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 982 954 983 955 return 0; 984 956 }

+14 -2

fs/ceph/super.h

··· 46 46 #define ceph_test_mount_opt(fsc, opt) \ 47 47 (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) 48 48 49 - #define CEPH_RSIZE_DEFAULT (64*1024*1024) /* max read size */ 49 + /* max size of osd read request, limited by libceph */ 50 + #define CEPH_MAX_READ_SIZE CEPH_MSG_MAX_DATA_LEN 51 + /* osd has a configurable limitaion of max write size. 52 + * CEPH_MSG_MAX_DATA_LEN should be small enough. */ 53 + #define CEPH_MAX_WRITE_SIZE CEPH_MSG_MAX_DATA_LEN 50 54 #define CEPH_RASIZE_DEFAULT (8192*1024) /* max readahead */ 51 55 #define CEPH_MAX_READDIR_DEFAULT 1024 52 56 #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) 53 57 #define CEPH_SNAPDIRNAME_DEFAULT ".snap" 58 + 59 + /* 60 + * Delay telling the MDS we no longer want caps, in case we reopen 61 + * the file. Delay a minimum amount of time, even if we send a cap 62 + * message for some other reason. Otherwise, take the oppotunity to 63 + * update the mds to avoid sending another message later. 64 + */ 65 + #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ 66 + #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ 54 67 55 68 struct ceph_mount_options { 56 69 int flags; ··· 74 61 int rasize; /* max readahead */ 75 62 int congestion_kb; /* max writeback in flight */ 76 63 int caps_wanted_delay_min, caps_wanted_delay_max; 77 - int cap_release_safety; 78 64 int max_readdir; /* max readdir result (entires) */ 79 65 int max_readdir_bytes; /* max readdir result (bytes) */ 80 66

+4 -4

fs/ceph/xattr.c

··· 777 777 spin_unlock(&ci->i_ceph_lock); 778 778 779 779 /* security module gets xattr while filling trace */ 780 - if (current->journal_info != NULL) { 780 + if (current->journal_info) { 781 781 pr_warn_ratelimited("sync getxattr %p " 782 782 "during filling trace\n", inode); 783 783 return -EBUSY; ··· 809 809 810 810 memcpy(value, xattr->val, xattr->val_len); 811 811 812 - if (current->journal_info != NULL && 812 + if (current->journal_info && 813 813 !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) 814 814 ci->i_ceph_flags |= CEPH_I_SEC_INITED; 815 815 out: ··· 1058 1058 up_read(&mdsc->snap_rwsem); 1059 1059 1060 1060 /* security module set xattr while filling trace */ 1061 - if (current->journal_info != NULL) { 1061 + if (current->journal_info) { 1062 1062 pr_warn_ratelimited("sync setxattr %p " 1063 1063 "during filling trace\n", inode); 1064 1064 err = -EBUSY; ··· 1108 1108 { 1109 1109 struct ceph_inode_info *ci; 1110 1110 bool ret; 1111 - if (in->i_security == NULL) 1111 + if (!in->i_security) 1112 1112 return false; 1113 1113 ci = ceph_inode(in); 1114 1114 spin_lock(&ci->i_ceph_lock);

+5 -1

include/linux/ceph/ceph_fs.h

··· 167 167 struct ceph_mon_statfs { 168 168 struct ceph_mon_request_header monhdr; 169 169 struct ceph_fsid fsid; 170 + __u8 contains_data_pool; 171 + __le64 data_pool; 170 172 } __attribute__ ((packed)); 171 173 172 174 struct ceph_statfs { ··· 671 669 extern const char *ceph_cap_op_name(int op); 672 670 673 671 /* flags field in client cap messages (version >= 10) */ 674 - #define CEPH_CLIENT_CAPS_SYNC (0x1) 672 + #define CEPH_CLIENT_CAPS_SYNC (1<<0) 673 + #define CEPH_CLIENT_CAPS_NO_CAPSNAP (1<<1) 674 + #define CEPH_CLIENT_CAPS_PENDING_CAPSNAP (1<<2); 675 675 676 676 /* 677 677 * caps message, used for capability callbacks, acks, requests, etc.

-11

include/linux/ceph/libceph.h

··· 84 84 85 85 #define CEPH_AUTH_NAME_DEFAULT "guest" 86 86 87 - /* 88 - * Delay telling the MDS we no longer want caps, in case we reopen 89 - * the file. Delay a minimum amount of time, even if we send a cap 90 - * message for some other reason. Otherwise, take the oppotunity to 91 - * update the mds to avoid sending another message later. 92 - */ 93 - #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ 94 - #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ 95 - 96 - #define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) 97 - 98 87 /* mount state */ 99 88 enum { 100 89 CEPH_MOUNT_MOUNTING,

+2 -2

include/linux/ceph/mon_client.h

··· 133 133 extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, 134 134 unsigned long timeout); 135 135 136 - extern int ceph_monc_do_statfs(struct ceph_mon_client *monc, 137 - struct ceph_statfs *buf); 136 + int ceph_monc_do_statfs(struct ceph_mon_client *monc, u64 data_pool, 137 + struct ceph_statfs *buf); 138 138 139 139 int ceph_monc_get_version(struct ceph_mon_client *monc, const char *what, 140 140 u64 *newest);

-1

include/linux/ceph/rados.h

··· 230 230 \ 231 231 /* fancy write */ \ 232 232 f(APPEND, __CEPH_OSD_OP(WR, DATA, 6), "append") \ 233 - f(STARTSYNC, __CEPH_OSD_OP(WR, DATA, 7), "startsync") \ 234 233 f(SETTRUNC, __CEPH_OSD_OP(WR, DATA, 8), "settrunc") \ 235 234 f(TRIMTRUNC, __CEPH_OSD_OP(WR, DATA, 9), "trimtrunc") \ 236 235 \

+5 -1

net/ceph/mon_client.c

··· 676 676 /* 677 677 * Do a synchronous statfs(). 678 678 */ 679 - int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) 679 + int ceph_monc_do_statfs(struct ceph_mon_client *monc, u64 data_pool, 680 + struct ceph_statfs *buf) 680 681 { 681 682 struct ceph_mon_generic_request *req; 682 683 struct ceph_mon_statfs *h; ··· 697 696 goto out; 698 697 699 698 req->u.st = buf; 699 + req->request->hdr.version = cpu_to_le16(2); 700 700 701 701 mutex_lock(&monc->mutex); 702 702 register_generic_request(req); ··· 707 705 h->monhdr.session_mon = cpu_to_le16(-1); 708 706 h->monhdr.session_mon_tid = 0; 709 707 h->fsid = monc->monmap->fsid; 708 + h->contains_data_pool = (data_pool != CEPH_NOPOOL); 709 + h->data_pool = cpu_to_le64(data_pool); 710 710 send_generic_request(monc, req); 711 711 mutex_unlock(&monc->mutex); 712 712

-5

net/ceph/osd_client.c

··· 863 863 dst->cls.method_len = src->cls.method_len; 864 864 dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); 865 865 break; 866 - case CEPH_OSD_OP_STARTSYNC: 867 - break; 868 866 case CEPH_OSD_OP_WATCH: 869 867 dst->watch.cookie = cpu_to_le64(src->watch.cookie); 870 868 dst->watch.ver = cpu_to_le64(0); ··· 914 916 * if the file was recently truncated, we include information about its 915 917 * old and new size so that the object can be updated appropriately. (we 916 918 * avoid synchronously deleting truncated objects because it's slow.) 917 - * 918 - * if @do_sync, include a 'startsync' command so that the osd will flush 919 - * data quickly. 920 919 */ 921 920 struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, 922 921 struct ceph_file_layout *layout,