Merge git://git.kernel.org/pub/scm/linux/kernel/git/hch/xfs-icache-races

* git://git.kernel.org/pub/scm/linux/kernel/git/hch/xfs-icache-races:
xfs: fix freeing of inodes not yet added to the inode cache
vfs: add __destroy_inode
vfs: fix inode_init_always calling convention

+99 -103
+24 -16
fs/inode.c
··· 120 120 * These are initializations that need to be done on every inode 121 121 * allocation as the fields are not initialised by slab allocation. 122 122 */ 123 - struct inode *inode_init_always(struct super_block *sb, struct inode *inode) 123 + int inode_init_always(struct super_block *sb, struct inode *inode) 124 124 { 125 125 static const struct address_space_operations empty_aops; 126 126 static struct inode_operations empty_iops; 127 127 static const struct file_operations empty_fops; 128 - 129 128 struct address_space *const mapping = &inode->i_data; 130 129 131 130 inode->i_sb = sb; ··· 151 152 inode->dirtied_when = 0; 152 153 153 154 if (security_inode_alloc(inode)) 154 - goto out_free_inode; 155 + goto out; 155 156 156 157 /* allocate and initialize an i_integrity */ 157 158 if (ima_inode_alloc(inode)) ··· 197 198 inode->i_fsnotify_mask = 0; 198 199 #endif 199 200 200 - return inode; 201 + return 0; 201 202 202 203 out_free_security: 203 204 security_inode_free(inode); 204 - out_free_inode: 205 - if (inode->i_sb->s_op->destroy_inode) 206 - inode->i_sb->s_op->destroy_inode(inode); 207 - else 208 - kmem_cache_free(inode_cachep, (inode)); 209 - return NULL; 205 + out: 206 + return -ENOMEM; 210 207 } 211 208 EXPORT_SYMBOL(inode_init_always); 212 209 ··· 215 220 else 216 221 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); 217 222 218 - if (inode) 219 - return inode_init_always(sb, inode); 220 - return NULL; 223 + if (!inode) 224 + return NULL; 225 + 226 + if (unlikely(inode_init_always(sb, inode))) { 227 + if (inode->i_sb->s_op->destroy_inode) 228 + inode->i_sb->s_op->destroy_inode(inode); 229 + else 230 + kmem_cache_free(inode_cachep, inode); 231 + return NULL; 232 + } 233 + 234 + return inode; 221 235 } 222 236 223 - void destroy_inode(struct inode *inode) 237 + void __destroy_inode(struct inode *inode) 224 238 { 225 239 BUG_ON(inode_has_buffers(inode)); 226 240 ima_inode_free(inode); ··· 241 237 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) 242 238 posix_acl_release(inode->i_default_acl); 243 239 #endif 240 + } 241 + EXPORT_SYMBOL(__destroy_inode); 242 + 243 + void destroy_inode(struct inode *inode) 244 + { 245 + __destroy_inode(inode); 244 246 if (inode->i_sb->s_op->destroy_inode) 245 247 inode->i_sb->s_op->destroy_inode(inode); 246 248 else 247 249 kmem_cache_free(inode_cachep, (inode)); 248 250 } 249 - EXPORT_SYMBOL(destroy_inode); 250 - 251 251 252 252 /* 253 253 * These are initializations that only need to be done
+73 -69
fs/xfs/xfs_iget.c
··· 64 64 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); 65 65 if (!ip) 66 66 return NULL; 67 + if (inode_init_always(mp->m_super, VFS_I(ip))) { 68 + kmem_zone_free(xfs_inode_zone, ip); 69 + return NULL; 70 + } 67 71 68 72 ASSERT(atomic_read(&ip->i_iocount) == 0); 69 73 ASSERT(atomic_read(&ip->i_pincount) == 0); ··· 109 105 #ifdef XFS_DIR2_TRACE 110 106 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); 111 107 #endif 112 - /* 113 - * Now initialise the VFS inode. We do this after the xfs_inode 114 - * initialisation as internal failures will result in ->destroy_inode 115 - * being called and that will pass down through the reclaim path and 116 - * free the XFS inode. This path requires the XFS inode to already be 117 - * initialised. Hence if this call fails, the xfs_inode has already 118 - * been freed and we should not reference it at all in the error 119 - * handling. 120 - */ 121 - if (!inode_init_always(mp->m_super, VFS_I(ip))) 122 - return NULL; 123 108 124 109 /* prevent anyone from using this yet */ 125 110 VFS_I(ip)->i_state = I_NEW|I_LOCK; 126 111 127 112 return ip; 113 + } 114 + 115 + STATIC void 116 + xfs_inode_free( 117 + struct xfs_inode *ip) 118 + { 119 + switch (ip->i_d.di_mode & S_IFMT) { 120 + case S_IFREG: 121 + case S_IFDIR: 122 + case S_IFLNK: 123 + xfs_idestroy_fork(ip, XFS_DATA_FORK); 124 + break; 125 + } 126 + 127 + if (ip->i_afp) 128 + xfs_idestroy_fork(ip, XFS_ATTR_FORK); 129 + 130 + #ifdef XFS_INODE_TRACE 131 + ktrace_free(ip->i_trace); 132 + #endif 133 + #ifdef XFS_BMAP_TRACE 134 + ktrace_free(ip->i_xtrace); 135 + #endif 136 + #ifdef XFS_BTREE_TRACE 137 + ktrace_free(ip->i_btrace); 138 + #endif 139 + #ifdef XFS_RW_TRACE 140 + ktrace_free(ip->i_rwtrace); 141 + #endif 142 + #ifdef XFS_ILOCK_TRACE 143 + ktrace_free(ip->i_lock_trace); 144 + #endif 145 + #ifdef XFS_DIR2_TRACE 146 + ktrace_free(ip->i_dir_trace); 147 + #endif 148 + 149 + if (ip->i_itemp) { 150 + /* 151 + * Only if we are shutting down the fs will we see an 152 + * inode still in the AIL. If it is there, we should remove 153 + * it to prevent a use-after-free from occurring. 154 + */ 155 + xfs_log_item_t *lip = &ip->i_itemp->ili_item; 156 + struct xfs_ail *ailp = lip->li_ailp; 157 + 158 + ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || 159 + XFS_FORCED_SHUTDOWN(ip->i_mount)); 160 + if (lip->li_flags & XFS_LI_IN_AIL) { 161 + spin_lock(&ailp->xa_lock); 162 + if (lip->li_flags & XFS_LI_IN_AIL) 163 + xfs_trans_ail_delete(ailp, lip); 164 + else 165 + spin_unlock(&ailp->xa_lock); 166 + } 167 + xfs_inode_item_destroy(ip); 168 + ip->i_itemp = NULL; 169 + } 170 + 171 + /* asserts to verify all state is correct here */ 172 + ASSERT(atomic_read(&ip->i_iocount) == 0); 173 + ASSERT(atomic_read(&ip->i_pincount) == 0); 174 + ASSERT(!spin_is_locked(&ip->i_flags_lock)); 175 + ASSERT(completion_done(&ip->i_flush)); 176 + 177 + kmem_zone_free(xfs_inode_zone, ip); 128 178 } 129 179 130 180 /* ··· 225 167 * errors cleanly, then tag it so it can be set up correctly 226 168 * later. 227 169 */ 228 - if (!inode_init_always(mp->m_super, VFS_I(ip))) { 170 + if (inode_init_always(mp->m_super, VFS_I(ip))) { 229 171 error = ENOMEM; 230 172 goto out_error; 231 173 } ··· 357 299 if (lock_flags) 358 300 xfs_iunlock(ip, lock_flags); 359 301 out_destroy: 360 - xfs_destroy_inode(ip); 302 + __destroy_inode(VFS_I(ip)); 303 + xfs_inode_free(ip); 361 304 return error; 362 305 } 363 306 ··· 563 504 xfs_qm_dqdetach(ip); 564 505 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 565 506 566 - switch (ip->i_d.di_mode & S_IFMT) { 567 - case S_IFREG: 568 - case S_IFDIR: 569 - case S_IFLNK: 570 - xfs_idestroy_fork(ip, XFS_DATA_FORK); 571 - break; 572 - } 573 - 574 - if (ip->i_afp) 575 - xfs_idestroy_fork(ip, XFS_ATTR_FORK); 576 - 577 - #ifdef XFS_INODE_TRACE 578 - ktrace_free(ip->i_trace); 579 - #endif 580 - #ifdef XFS_BMAP_TRACE 581 - ktrace_free(ip->i_xtrace); 582 - #endif 583 - #ifdef XFS_BTREE_TRACE 584 - ktrace_free(ip->i_btrace); 585 - #endif 586 - #ifdef XFS_RW_TRACE 587 - ktrace_free(ip->i_rwtrace); 588 - #endif 589 - #ifdef XFS_ILOCK_TRACE 590 - ktrace_free(ip->i_lock_trace); 591 - #endif 592 - #ifdef XFS_DIR2_TRACE 593 - ktrace_free(ip->i_dir_trace); 594 - #endif 595 - if (ip->i_itemp) { 596 - /* 597 - * Only if we are shutting down the fs will we see an 598 - * inode still in the AIL. If it is there, we should remove 599 - * it to prevent a use-after-free from occurring. 600 - */ 601 - xfs_log_item_t *lip = &ip->i_itemp->ili_item; 602 - struct xfs_ail *ailp = lip->li_ailp; 603 - 604 - ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || 605 - XFS_FORCED_SHUTDOWN(ip->i_mount)); 606 - if (lip->li_flags & XFS_LI_IN_AIL) { 607 - spin_lock(&ailp->xa_lock); 608 - if (lip->li_flags & XFS_LI_IN_AIL) 609 - xfs_trans_ail_delete(ailp, lip); 610 - else 611 - spin_unlock(&ailp->xa_lock); 612 - } 613 - xfs_inode_item_destroy(ip); 614 - ip->i_itemp = NULL; 615 - } 616 - /* asserts to verify all state is correct here */ 617 - ASSERT(atomic_read(&ip->i_iocount) == 0); 618 - ASSERT(atomic_read(&ip->i_pincount) == 0); 619 - ASSERT(!spin_is_locked(&ip->i_flags_lock)); 620 - ASSERT(completion_done(&ip->i_flush)); 621 - kmem_zone_free(xfs_inode_zone, ip); 507 + xfs_inode_free(ip); 622 508 } 623 509 624 510 /*
-17
fs/xfs/xfs_inode.h
··· 310 310 } 311 311 312 312 /* 313 - * Get rid of a partially initialized inode. 314 - * 315 - * We have to go through destroy_inode to make sure allocations 316 - * from init_inode_always like the security data are undone. 317 - * 318 - * We mark the inode bad so that it takes the short cut in 319 - * the reclaim path instead of going through the flush path 320 - * which doesn't make sense for an inode that has never seen the 321 - * light of day. 322 - */ 323 - static inline void xfs_destroy_inode(struct xfs_inode *ip) 324 - { 325 - make_bad_inode(VFS_I(ip)); 326 - return destroy_inode(VFS_I(ip)); 327 - } 328 - 329 - /* 330 313 * i_flags helper functions 331 314 */ 332 315 static inline void
+2 -1
include/linux/fs.h
··· 2137 2137 2138 2138 extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); 2139 2139 2140 - extern struct inode * inode_init_always(struct super_block *, struct inode *); 2140 + extern int inode_init_always(struct super_block *, struct inode *); 2141 2141 extern void inode_init_once(struct inode *); 2142 2142 extern void inode_add_to_lists(struct super_block *, struct inode *); 2143 2143 extern void iput(struct inode *); ··· 2164 2164 extern void iget_failed(struct inode *); 2165 2165 extern void clear_inode(struct inode *); 2166 2166 extern void destroy_inode(struct inode *); 2167 + extern void __destroy_inode(struct inode *); 2167 2168 extern struct inode *new_inode(struct super_block *); 2168 2169 extern int should_remove_suid(struct dentry *); 2169 2170 extern int file_remove_suid(struct file *);