ocfs2: Cache system inodes of other slots.

Durring orphan scan, if we are slot 0, and we are replaying
orphan_dir:0001, the general process is that for every file
in this dir:
1. we will iget orphan_dir:0001, since there is no inode for it.
we will have to create an inode and read it from the disk.
2. do the normal work, such as delete_inode and remove it from
the dir if it is allowed.
3. call iput orphan_dir:0001 when we are done. In this case,
since we have no dcache for this inode, i_count will
reach 0, and VFS will have to call clear_inode and in
ocfs2_clear_inode we will checkpoint the inode which will let
ocfs2_cmt and journald begin to work.
4. We loop back to 1 for the next file.

So you see, actually for every deleted file, we have to read the
orphan dir from the disk and checkpoint the journal. It is very
time consuming and cause a lot of journal checkpoint I/O.
A better solution is that we can have another reference for these
inodes in ocfs2_super. So if there is no other race among
nodes(which will let dlmglue to checkpoint the inode), for step 3,
clear_inode won't be called and for step 1, we may only need to
read the inode for the 1st time. This is a big win for us.

So this patch will try to cache system inodes of other slots so
that we will have one more reference for these inodes and avoid
the extra inode read and journal checkpoint.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>

authored by Tao Ma and committed by Joel Becker b4d693fc a33f13ef

+73 -15
+2 -1
fs/ocfs2/ocfs2.h
··· 284 struct super_block *sb; 285 struct inode *root_inode; 286 struct inode *sys_root_inode; 287 - struct inode *system_inodes[NUM_SYSTEM_INODES]; 288 289 struct ocfs2_slot_info *slot_info; 290
··· 284 struct super_block *sb; 285 struct inode *root_inode; 286 struct inode *sys_root_inode; 287 + struct inode *global_system_inodes[NUM_GLOBAL_SYSTEM_INODES]; 288 + struct inode **local_system_inodes; 289 290 struct ocfs2_slot_info *slot_info; 291
+5
fs/ocfs2/ocfs2_fs.h
··· 309 USER_QUOTA_SYSTEM_INODE, 310 GROUP_QUOTA_SYSTEM_INODE, 311 #define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE 312 ORPHAN_DIR_SYSTEM_INODE, 313 EXTENT_ALLOC_SYSTEM_INODE, 314 INODE_ALLOC_SYSTEM_INODE, ··· 318 TRUNCATE_LOG_SYSTEM_INODE, 319 LOCAL_USER_QUOTA_SYSTEM_INODE, 320 LOCAL_GROUP_QUOTA_SYSTEM_INODE, 321 NUM_SYSTEM_INODES 322 }; 323 324 static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { 325 /* Global system inodes (single copy) */
··· 309 USER_QUOTA_SYSTEM_INODE, 310 GROUP_QUOTA_SYSTEM_INODE, 311 #define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE 312 + #define OCFS2_FIRST_LOCAL_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE 313 ORPHAN_DIR_SYSTEM_INODE, 314 EXTENT_ALLOC_SYSTEM_INODE, 315 INODE_ALLOC_SYSTEM_INODE, ··· 317 TRUNCATE_LOG_SYSTEM_INODE, 318 LOCAL_USER_QUOTA_SYSTEM_INODE, 319 LOCAL_GROUP_QUOTA_SYSTEM_INODE, 320 + #define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE 321 NUM_SYSTEM_INODES 322 }; 323 + #define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE 324 + #define NUM_LOCAL_SYSTEM_INODES \ 325 + (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE) 326 327 static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { 328 /* Global system inodes (single copy) */
+17 -3
fs/ocfs2/super.c
··· 514 515 mlog_entry_void(); 516 517 - for (i = 0; i < NUM_SYSTEM_INODES; i++) { 518 - inode = osb->system_inodes[i]; 519 if (inode) { 520 iput(inode); 521 - osb->system_inodes[i] = NULL; 522 } 523 } 524 ··· 534 osb->root_inode = NULL; 535 } 536 537 mlog_exit(0); 538 } 539
··· 514 515 mlog_entry_void(); 516 517 + for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) { 518 + inode = osb->global_system_inodes[i]; 519 if (inode) { 520 iput(inode); 521 + osb->global_system_inodes[i] = NULL; 522 } 523 } 524 ··· 534 osb->root_inode = NULL; 535 } 536 537 + if (!osb->local_system_inodes) 538 + goto out; 539 + 540 + for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) { 541 + if (osb->local_system_inodes[i]) { 542 + iput(osb->local_system_inodes[i]); 543 + osb->local_system_inodes[i] = NULL; 544 + } 545 + } 546 + 547 + kfree(osb->local_system_inodes); 548 + osb->local_system_inodes = NULL; 549 + 550 + out: 551 mlog_exit(0); 552 } 553
+49 -11
fs/ocfs2/sysfile.c
··· 44 int type, 45 u32 slot); 46 47 - static inline int is_global_system_inode(int type); 48 - static inline int is_in_system_inode_array(struct ocfs2_super *osb, 49 - int type, 50 - u32 slot); 51 - 52 #ifdef CONFIG_DEBUG_LOCK_ALLOC 53 static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; 54 #endif ··· 54 type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; 55 } 56 57 - static inline int is_in_system_inode_array(struct ocfs2_super *osb, 58 - int type, 59 - u32 slot) 60 { 61 - return slot == osb->slot_num || is_global_system_inode(type); 62 } 63 64 struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, ··· 110 struct inode **arr = NULL; 111 112 /* avoid the lookup if cached in local system file array */ 113 - if (is_in_system_inode_array(osb, type, slot)) 114 - arr = &(osb->system_inodes[type]); 115 116 if (arr && ((inode = *arr) != NULL)) { 117 /* get a ref in addition to the array ref */
··· 44 int type, 45 u32 slot); 46 47 #ifdef CONFIG_DEBUG_LOCK_ALLOC 48 static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; 49 #endif ··· 59 type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; 60 } 61 62 + static struct inode **get_local_system_inode(struct ocfs2_super *osb, 63 + int type, 64 + u32 slot) 65 { 66 + int index; 67 + struct inode **local_system_inodes, **free = NULL; 68 + 69 + BUG_ON(slot == OCFS2_INVALID_SLOT); 70 + BUG_ON(type < OCFS2_FIRST_LOCAL_SYSTEM_INODE || 71 + type > OCFS2_LAST_LOCAL_SYSTEM_INODE); 72 + 73 + spin_lock(&osb->osb_lock); 74 + local_system_inodes = osb->local_system_inodes; 75 + spin_unlock(&osb->osb_lock); 76 + 77 + if (unlikely(!local_system_inodes)) { 78 + local_system_inodes = kzalloc(sizeof(struct inode *) * 79 + NUM_LOCAL_SYSTEM_INODES * 80 + osb->max_slots, 81 + GFP_NOFS); 82 + if (!local_system_inodes) { 83 + mlog_errno(-ENOMEM); 84 + /* 85 + * return NULL here so that ocfs2_get_sytem_file_inodes 86 + * will try to create an inode and use it. We will try 87 + * to initialize local_system_inodes next time. 88 + */ 89 + return NULL; 90 + } 91 + 92 + spin_lock(&osb->osb_lock); 93 + if (osb->local_system_inodes) { 94 + /* Someone has initialized it for us. */ 95 + free = local_system_inodes; 96 + local_system_inodes = osb->local_system_inodes; 97 + } else 98 + osb->local_system_inodes = local_system_inodes; 99 + spin_unlock(&osb->osb_lock); 100 + if (unlikely(free)) 101 + kfree(free); 102 + } 103 + 104 + index = (slot * NUM_LOCAL_SYSTEM_INODES) + 105 + (type - OCFS2_FIRST_LOCAL_SYSTEM_INODE); 106 + 107 + return &local_system_inodes[index]; 108 } 109 110 struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, ··· 74 struct inode **arr = NULL; 75 76 /* avoid the lookup if cached in local system file array */ 77 + if (is_global_system_inode(type)) { 78 + arr = &(osb->global_system_inodes[type]); 79 + } else 80 + arr = get_local_system_inode(osb, type, slot); 81 82 if (arr && ((inode = *arr) != NULL)) { 83 /* get a ref in addition to the array ref */