ceph: add dir_layout to inode

Add a ceph_dir_layout to the inode, and calculate dentry hash values based
on the parent directory's specified dir_hash function. This is needed
because the old default Linux dcache hash function is extremely week and
leads to a poor distribution of files among dir fragments.

Signed-off-by: Sage Weil <sage@newdream.net>

Sage Weil 6c0f3af7 3c0eee3f

+41 -4
+20
fs/ceph/dir.c
··· 1216 1216 } 1217 1217 } 1218 1218 1219 + /* 1220 + * Return name hash for a given dentry. This is dependent on 1221 + * the parent directory's hash function. 1222 + */ 1223 + unsigned ceph_dentry_hash(struct dentry *dn) 1224 + { 1225 + struct inode *dir = dn->d_parent->d_inode; 1226 + struct ceph_inode_info *dci = ceph_inode(dir); 1227 + 1228 + switch (dci->i_dir_layout.dl_dir_hash) { 1229 + case 0: /* for backward compat */ 1230 + case CEPH_STR_HASH_LINUX: 1231 + return dn->d_name.hash; 1232 + 1233 + default: 1234 + return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, 1235 + dn->d_name.name, dn->d_name.len); 1236 + } 1237 + } 1238 + 1219 1239 const struct file_operations ceph_dir_fops = { 1220 1240 .read = ceph_read_dir, 1221 1241 .readdir = ceph_readdir,
+1 -1
fs/ceph/export.c
··· 59 59 dout("encode_fh %p connectable\n", dentry); 60 60 cfh->ino = ceph_ino(dentry->d_inode); 61 61 cfh->parent_ino = ceph_ino(parent->d_inode); 62 - cfh->parent_name_hash = parent->d_name.hash; 62 + cfh->parent_name_hash = ceph_dentry_hash(parent); 63 63 *max_len = connected_handle_length; 64 64 type = 2; 65 65 } else if (*max_len >= handle_length) {
+2
fs/ceph/inode.c
··· 297 297 ci->i_release_count = 0; 298 298 ci->i_symlink = NULL; 299 299 300 + memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); 301 + 300 302 ci->i_fragtree = RB_ROOT; 301 303 mutex_init(&ci->i_fragtree_mutex); 302 304
+2
fs/ceph/super.h
··· 239 239 unsigned i_ceph_flags; 240 240 unsigned long i_release_count; 241 241 242 + struct ceph_dir_layout i_dir_layout; 242 243 struct ceph_file_layout i_layout; 243 244 char *i_symlink; 244 245 ··· 769 768 extern void ceph_dentry_lru_touch(struct dentry *dn); 770 769 extern void ceph_dentry_lru_del(struct dentry *dn); 771 770 extern void ceph_invalidate_dentry_lease(struct dentry *dentry); 771 + extern unsigned ceph_dentry_hash(struct dentry *dn); 772 772 773 773 /* 774 774 * our d_ops vary depending on whether the inode is live,
+13 -3
include/linux/ceph/ceph_fs.h
··· 43 43 #define CEPH_FEATURE_NOSRCADDR (1<<1) 44 44 #define CEPH_FEATURE_MONCLOCKCHECK (1<<2) 45 45 #define CEPH_FEATURE_FLOCK (1<<3) 46 + #define CEPH_FEATURE_SUBSCRIBE2 (1<<4) 47 + #define CEPH_FEATURE_MONNAMES (1<<5) 48 + #define CEPH_FEATURE_RECONNECT_SEQ (1<<6) 49 + #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) 46 50 47 51 48 52 /* ··· 59 55 __le32 fl_stripe_count; /* over this many objects */ 60 56 __le32 fl_object_size; /* until objects are this big, then move to 61 57 new objects */ 62 - __le32 fl_cas_hash; /* 0 = none; 1 = sha256 */ 58 + __le32 fl_cas_hash; /* UNUSED. 0 = none; 1 = sha256 */ 63 59 64 60 /* pg -> disk layout */ 65 - __le32 fl_object_stripe_unit; /* for per-object parity, if any */ 61 + __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ 66 62 67 63 /* object -> pg layout */ 68 64 __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ ··· 73 69 74 70 int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); 75 71 72 + struct ceph_dir_layout { 73 + __u8 dl_dir_hash; /* see ceph_hash.h for ids */ 74 + __u8 dl_unused1; 75 + __u16 dl_unused2; 76 + __u32 dl_unused3; 77 + } __attribute__ ((packed)); 76 78 77 79 /* crypto algorithms */ 78 80 #define CEPH_CRYPTO_NONE 0x0 ··· 467 457 struct ceph_timespec rctime; 468 458 struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */ 469 459 } __attribute__ ((packed)); 470 - /* followed by frag array, then symlink string, then xattr blob */ 460 + /* followed by frag array, symlink string, dir layout, xattr blob */ 471 461 472 462 /* reply_lease follows dname, and reply_inode */ 473 463 struct ceph_mds_reply_lease {
+3
net/ceph/ceph_hash.c
··· 1 1 2 2 #include <linux/ceph/types.h> 3 + #include <linux/module.h> 3 4 4 5 /* 5 6 * Robert Jenkin's hash function. ··· 105 104 return -1; 106 105 } 107 106 } 107 + EXPORT_SYMBOL(ceph_str_hash); 108 108 109 109 const char *ceph_str_hash_name(int type) 110 110 { ··· 118 116 return "unknown"; 119 117 } 120 118 } 119 + EXPORT_SYMBOL(ceph_str_hash_name);