Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: avoid picking MDS that is not active
ceph: avoid immediate cap check after import
ceph: fix flushing of caps vs cap import
ceph: fix erroneous cap flush to non-auth mds
ceph: fix cap_wanted_delay_{min,max} mount option initialization
ceph: fix xattr rbtree search
ceph: fix getattr on directory when using norbytes

+54 -14
+37 -6
fs/ceph/caps.c
··· 1560 /* NOTE: no side-effects allowed, until we take s_mutex */ 1561 1562 revoking = cap->implemented & ~cap->issued; 1563 - if (revoking) 1564 - dout(" mds%d revoking %s\n", cap->mds, 1565 - ceph_cap_string(revoking)); 1566 1567 if (cap == ci->i_auth_cap && 1568 (cap->issued & CEPH_CAP_FILE_WR)) { ··· 1659 1660 if (cap == ci->i_auth_cap && ci->i_dirty_caps) 1661 flushing = __mark_caps_flushing(inode, session); 1662 1663 mds = cap->mds; /* remember mds, so we don't repeat */ 1664 sent++; ··· 1940 cap, session->s_mds); 1941 spin_unlock(&inode->i_lock); 1942 } 1943 } 1944 } 1945 ··· 2719 ceph_add_cap(inode, session, cap_id, -1, 2720 issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, 2721 NULL /* no caps context */); 2722 - try_flush_caps(inode, session, NULL); 2723 up_read(&mdsc->snap_rwsem); 2724 2725 /* make sure we re-request max_size, if necessary */ ··· 2817 case CEPH_CAP_OP_IMPORT: 2818 handle_cap_import(mdsc, inode, h, session, 2819 snaptrace, snaptrace_len); 2820 - ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, 2821 - session); 2822 goto done_unlocked; 2823 } 2824
··· 1560 /* NOTE: no side-effects allowed, until we take s_mutex */ 1561 1562 revoking = cap->implemented & ~cap->issued; 1563 + dout(" mds%d cap %p issued %s implemented %s revoking %s\n", 1564 + cap->mds, cap, ceph_cap_string(cap->issued), 1565 + ceph_cap_string(cap->implemented), 1566 + ceph_cap_string(revoking)); 1567 1568 if (cap == ci->i_auth_cap && 1569 (cap->issued & CEPH_CAP_FILE_WR)) { ··· 1658 1659 if (cap == ci->i_auth_cap && ci->i_dirty_caps) 1660 flushing = __mark_caps_flushing(inode, session); 1661 + else 1662 + flushing = 0; 1663 1664 mds = cap->mds; /* remember mds, so we don't repeat */ 1665 sent++; ··· 1937 cap, session->s_mds); 1938 spin_unlock(&inode->i_lock); 1939 } 1940 + } 1941 + } 1942 + 1943 + static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, 1944 + struct ceph_mds_session *session, 1945 + struct inode *inode) 1946 + { 1947 + struct ceph_inode_info *ci = ceph_inode(inode); 1948 + struct ceph_cap *cap; 1949 + int delayed = 0; 1950 + 1951 + spin_lock(&inode->i_lock); 1952 + cap = ci->i_auth_cap; 1953 + dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, 1954 + ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); 1955 + __ceph_flush_snaps(ci, &session, 1); 1956 + if (ci->i_flushing_caps) { 1957 + delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, 1958 + __ceph_caps_used(ci), 1959 + __ceph_caps_wanted(ci), 1960 + cap->issued | cap->implemented, 1961 + ci->i_flushing_caps, NULL); 1962 + if (delayed) { 1963 + spin_lock(&inode->i_lock); 1964 + __cap_delay_requeue(mdsc, ci); 1965 + spin_unlock(&inode->i_lock); 1966 + } 1967 + } else { 1968 + spin_unlock(&inode->i_lock); 1969 } 1970 } 1971 ··· 2687 ceph_add_cap(inode, session, cap_id, -1, 2688 issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, 2689 NULL /* no caps context */); 2690 + kick_flushing_inode_caps(mdsc, session, inode); 2691 up_read(&mdsc->snap_rwsem); 2692 2693 /* make sure we re-request max_size, if necessary */ ··· 2785 case CEPH_CAP_OP_IMPORT: 2786 handle_cap_import(mdsc, inode, h, session, 2787 snaptrace, snaptrace_len); 2788 + ceph_check_caps(ceph_inode(inode), 0, session); 2789 goto done_unlocked; 2790 } 2791
+5 -5
fs/ceph/inode.c
··· 710 ci->i_ceph_flags |= CEPH_I_COMPLETE; 711 ci->i_max_offset = 2; 712 } 713 - 714 - /* it may be better to set st_size in getattr instead? */ 715 - if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) 716 - inode->i_size = ci->i_rbytes; 717 break; 718 default: 719 pr_err("fill_inode %llx.%llx BAD mode 0%o\n", ··· 1815 else 1816 stat->dev = 0; 1817 if (S_ISDIR(inode->i_mode)) { 1818 - stat->size = ci->i_rbytes; 1819 stat->blocks = 0; 1820 stat->blksize = 65536; 1821 }
··· 710 ci->i_ceph_flags |= CEPH_I_COMPLETE; 711 ci->i_max_offset = 2; 712 } 713 break; 714 default: 715 pr_err("fill_inode %llx.%llx BAD mode 0%o\n", ··· 1819 else 1820 stat->dev = 0; 1821 if (S_ISDIR(inode->i_mode)) { 1822 + if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), 1823 + RBYTES)) 1824 + stat->size = ci->i_rbytes; 1825 + else 1826 + stat->size = ci->i_files + ci->i_subdirs; 1827 stat->blocks = 0; 1828 stat->blksize = 65536; 1829 }
+7 -3
fs/ceph/mds_client.c
··· 693 dout("choose_mds %p %llx.%llx " 694 "frag %u mds%d (%d/%d)\n", 695 inode, ceph_vinop(inode), 696 - frag.frag, frag.mds, 697 (int)r, frag.ndist); 698 - return mds; 699 } 700 701 /* since this file/dir wasn't known to be ··· 710 dout("choose_mds %p %llx.%llx " 711 "frag %u mds%d (auth)\n", 712 inode, ceph_vinop(inode), frag.frag, mds); 713 - return mds; 714 } 715 } 716 }
··· 693 dout("choose_mds %p %llx.%llx " 694 "frag %u mds%d (%d/%d)\n", 695 inode, ceph_vinop(inode), 696 + frag.frag, mds, 697 (int)r, frag.ndist); 698 + if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= 699 + CEPH_MDS_STATE_ACTIVE) 700 + return mds; 701 } 702 703 /* since this file/dir wasn't known to be ··· 708 dout("choose_mds %p %llx.%llx " 709 "frag %u mds%d (auth)\n", 710 inode, ceph_vinop(inode), frag.frag, mds); 711 + if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= 712 + CEPH_MDS_STATE_ACTIVE) 713 + return mds; 714 } 715 } 716 }
+2
fs/ceph/super.c
··· 290 291 fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; 292 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 293 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; 294 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 295 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
··· 290 291 fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; 292 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 293 + fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 294 + fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 295 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; 296 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 297 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
+3
fs/ceph/xattr.c
··· 219 struct rb_node **p; 220 struct rb_node *parent = NULL; 221 struct ceph_inode_xattr *xattr = NULL; 222 int c; 223 224 p = &ci->i_xattrs.index.rb_node; ··· 227 parent = *p; 228 xattr = rb_entry(parent, struct ceph_inode_xattr, node); 229 c = strncmp(name, xattr->name, xattr->name_len); 230 if (c < 0) 231 p = &(*p)->rb_left; 232 else if (c > 0)
··· 219 struct rb_node **p; 220 struct rb_node *parent = NULL; 221 struct ceph_inode_xattr *xattr = NULL; 222 + int name_len = strlen(name); 223 int c; 224 225 p = &ci->i_xattrs.index.rb_node; ··· 226 parent = *p; 227 xattr = rb_entry(parent, struct ceph_inode_xattr, node); 228 c = strncmp(name, xattr->name, xattr->name_len); 229 + if (c == 0 && name_len > xattr->name_len) 230 + c = 1; 231 if (c < 0) 232 p = &(*p)->rb_left; 233 else if (c > 0)