Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

f2fs: avoid deadlock on init_inode_metadata

Previously, init_inode_metadata does not hold any parent directory's inode
page. So, f2fs_init_acl can grab its parent inode page without any problem.
But, when we use inline_dentry, that page is grabbed during f2fs_add_link,
so that we can fall into deadlock condition like below.

INFO: task mknod:11006 blocked for more than 120 seconds.
Tainted: G OE 3.17.0-rc1+ #13
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
mknod D ffff88003fc94580 0 11006 11004 0x00000000
ffff880007717b10 0000000000000002 ffff88003c323220 ffff880007717fd8
0000000000014580 0000000000014580 ffff88003daecb30 ffff88003c323220
ffff88003fc94e80 ffff88003ffbb4e8 ffff880007717ba0 0000000000000002
Call Trace:
[<ffffffff8173dc40>] ? bit_wait+0x50/0x50
[<ffffffff8173d4cd>] io_schedule+0x9d/0x130
[<ffffffff8173dc6c>] bit_wait_io+0x2c/0x50
[<ffffffff8173da3b>] __wait_on_bit_lock+0x4b/0xb0
[<ffffffff811640a7>] __lock_page+0x67/0x70
[<ffffffff810acf50>] ? autoremove_wake_function+0x40/0x40
[<ffffffff811652cc>] pagecache_get_page+0x14c/0x1e0
[<ffffffffa029afa9>] get_node_page+0x59/0x130 [f2fs]
[<ffffffffa02a63ad>] read_all_xattrs+0x24d/0x430 [f2fs]
[<ffffffffa02a6ca2>] f2fs_getxattr+0x52/0xe0 [f2fs]
[<ffffffffa02a7481>] f2fs_get_acl+0x41/0x2d0 [f2fs]
[<ffffffff8122d847>] get_acl+0x47/0x70
[<ffffffff8122db5a>] posix_acl_create+0x5a/0x150
[<ffffffffa02a7759>] f2fs_init_acl+0x29/0xcb [f2fs]
[<ffffffffa0286a8d>] init_inode_metadata+0x5d/0x340 [f2fs]
[<ffffffffa029253a>] f2fs_add_inline_entry+0x12a/0x2e0 [f2fs]
[<ffffffffa0286ea5>] __f2fs_add_link+0x45/0x4a0 [f2fs]
[<ffffffffa028b5b6>] ? f2fs_new_inode+0x146/0x220 [f2fs]
[<ffffffffa028b816>] f2fs_mknod+0x86/0xf0 [f2fs]
[<ffffffff811e3ec1>] vfs_mknod+0xe1/0x160
[<ffffffff811e4b26>] SyS_mknod+0x1f6/0x200
[<ffffffff81741d7f>] tracesys+0xe1/0xe6

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

+157 -22
+138 -6
fs/f2fs/acl.c
··· 162 162 return ERR_PTR(-EINVAL); 163 163 } 164 164 165 - struct posix_acl *f2fs_get_acl(struct inode *inode, int type) 165 + static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, 166 + struct page *dpage) 166 167 { 167 168 int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; 168 169 void *value = NULL; ··· 173 172 if (type == ACL_TYPE_ACCESS) 174 173 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; 175 174 176 - retval = f2fs_getxattr(inode, name_index, "", NULL, 0); 175 + retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage); 177 176 if (retval > 0) { 178 177 value = kmalloc(retval, GFP_F2FS_ZERO); 179 178 if (!value) 180 179 return ERR_PTR(-ENOMEM); 181 - retval = f2fs_getxattr(inode, name_index, "", value, retval); 180 + retval = f2fs_getxattr(inode, name_index, "", value, 181 + retval, dpage); 182 182 } 183 183 184 184 if (retval > 0) ··· 194 192 set_cached_acl(inode, type, acl); 195 193 196 194 return acl; 195 + } 196 + 197 + struct posix_acl *f2fs_get_acl(struct inode *inode, int type) 198 + { 199 + return __f2fs_get_acl(inode, type, NULL); 197 200 } 198 201 199 202 static int __f2fs_set_acl(struct inode *inode, int type, ··· 256 249 return __f2fs_set_acl(inode, type, acl, NULL); 257 250 } 258 251 259 - int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage) 252 + /* 253 + * Most part of f2fs_acl_clone, f2fs_acl_create_masq, f2fs_acl_create 254 + * are copied from posix_acl.c 255 + */ 256 + static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl, 257 + gfp_t flags) 260 258 { 261 - struct posix_acl *default_acl, *acl; 259 + struct posix_acl *clone = NULL; 260 + 261 + if (acl) { 262 + int size = sizeof(struct posix_acl) + acl->a_count * 263 + sizeof(struct posix_acl_entry); 264 + clone = kmemdup(acl, size, flags); 265 + if (clone) 266 + atomic_set(&clone->a_refcount, 1); 267 + } 268 + return clone; 269 + } 270 + 271 + static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) 272 + { 273 + struct posix_acl_entry *pa, *pe; 274 + struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; 275 + umode_t mode = *mode_p; 276 + int not_equiv = 0; 277 + 278 + /* assert(atomic_read(acl->a_refcount) == 1); */ 279 + 280 + FOREACH_ACL_ENTRY(pa, acl, pe) { 281 + switch(pa->e_tag) { 282 + case ACL_USER_OBJ: 283 + pa->e_perm &= (mode >> 6) | ~S_IRWXO; 284 + mode &= (pa->e_perm << 6) | ~S_IRWXU; 285 + break; 286 + 287 + case ACL_USER: 288 + case ACL_GROUP: 289 + not_equiv = 1; 290 + break; 291 + 292 + case ACL_GROUP_OBJ: 293 + group_obj = pa; 294 + break; 295 + 296 + case ACL_OTHER: 297 + pa->e_perm &= mode | ~S_IRWXO; 298 + mode &= pa->e_perm | ~S_IRWXO; 299 + break; 300 + 301 + case ACL_MASK: 302 + mask_obj = pa; 303 + not_equiv = 1; 304 + break; 305 + 306 + default: 307 + return -EIO; 308 + } 309 + } 310 + 311 + if (mask_obj) { 312 + mask_obj->e_perm &= (mode >> 3) | ~S_IRWXO; 313 + mode &= (mask_obj->e_perm << 3) | ~S_IRWXG; 314 + } else { 315 + if (!group_obj) 316 + return -EIO; 317 + group_obj->e_perm &= (mode >> 3) | ~S_IRWXO; 318 + mode &= (group_obj->e_perm << 3) | ~S_IRWXG; 319 + } 320 + 321 + *mode_p = (*mode_p & ~S_IRWXUGO) | mode; 322 + return not_equiv; 323 + } 324 + 325 + static int f2fs_acl_create(struct inode *dir, umode_t *mode, 326 + struct posix_acl **default_acl, struct posix_acl **acl, 327 + struct page *dpage) 328 + { 329 + struct posix_acl *p; 330 + int ret; 331 + 332 + if (S_ISLNK(*mode) || !IS_POSIXACL(dir)) 333 + goto no_acl; 334 + 335 + p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage); 336 + if (IS_ERR(p)) { 337 + if (p == ERR_PTR(-EOPNOTSUPP)) 338 + goto apply_umask; 339 + return PTR_ERR(p); 340 + } 341 + 342 + if (!p) 343 + goto apply_umask; 344 + 345 + *acl = f2fs_acl_clone(p, GFP_NOFS); 346 + if (!*acl) 347 + return -ENOMEM; 348 + 349 + ret = f2fs_acl_create_masq(*acl, mode); 350 + if (ret < 0) { 351 + posix_acl_release(*acl); 352 + return -ENOMEM; 353 + } 354 + 355 + if (ret == 0) { 356 + posix_acl_release(*acl); 357 + *acl = NULL; 358 + } 359 + 360 + if (!S_ISDIR(*mode)) { 361 + posix_acl_release(p); 362 + *default_acl = NULL; 363 + } else { 364 + *default_acl = p; 365 + } 366 + return 0; 367 + 368 + apply_umask: 369 + *mode &= ~current_umask(); 370 + no_acl: 371 + *default_acl = NULL; 372 + *acl = NULL; 373 + return 0; 374 + } 375 + 376 + int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, 377 + struct page *dpage) 378 + { 379 + struct posix_acl *default_acl = NULL, *acl = NULL; 262 380 int error = 0; 263 381 264 - error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); 382 + error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage); 265 383 if (error) 266 384 return error; 267 385
+3 -2
fs/f2fs/acl.h
··· 38 38 39 39 extern struct posix_acl *f2fs_get_acl(struct inode *, int); 40 40 extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type); 41 - extern int f2fs_init_acl(struct inode *, struct inode *, struct page *); 41 + extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, 42 + struct page *); 42 43 #else 43 44 #define f2fs_check_acl NULL 44 45 #define f2fs_get_acl NULL 45 46 #define f2fs_set_acl NULL 46 47 47 48 static inline int f2fs_init_acl(struct inode *inode, struct inode *dir, 48 - struct page *page) 49 + struct page *ipage, struct page *dpage) 49 50 { 50 51 return 0; 51 52 }
+5 -5
fs/f2fs/dir.c
··· 363 363 return 0; 364 364 } 365 365 366 - struct page *init_inode_metadata(struct inode *inode, 367 - struct inode *dir, const struct qstr *name) 366 + struct page *init_inode_metadata(struct inode *inode, struct inode *dir, 367 + const struct qstr *name, struct page *dpage) 368 368 { 369 369 struct page *page; 370 370 int err; ··· 380 380 goto error; 381 381 } 382 382 383 - err = f2fs_init_acl(inode, dir, page); 383 + err = f2fs_init_acl(inode, dir, page, dpage); 384 384 if (err) 385 385 goto put_error; 386 386 ··· 541 541 f2fs_wait_on_page_writeback(dentry_page, DATA); 542 542 543 543 down_write(&F2FS_I(inode)->i_sem); 544 - page = init_inode_metadata(inode, dir, name); 544 + page = init_inode_metadata(inode, dir, name, NULL); 545 545 if (IS_ERR(page)) { 546 546 err = PTR_ERR(page); 547 547 goto fail; ··· 580 580 int err = 0; 581 581 582 582 down_write(&F2FS_I(inode)->i_sem); 583 - page = init_inode_metadata(inode, dir, NULL); 583 + page = init_inode_metadata(inode, dir, NULL, NULL); 584 584 if (IS_ERR(page)) { 585 585 err = PTR_ERR(page); 586 586 goto fail;
+1 -1
fs/f2fs/f2fs.h
··· 1247 1247 struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, const void *, 1248 1248 struct f2fs_dir_entry *, __u8 (*)[F2FS_SLOT_LEN]); 1249 1249 struct page *init_inode_metadata(struct inode *, struct inode *, 1250 - const struct qstr *); 1250 + const struct qstr *, struct page *); 1251 1251 void update_parent_metadata(struct inode *, struct inode *, unsigned int); 1252 1252 int room_for_filename(const void *, int, int); 1253 1253 void f2fs_drop_nlink(struct inode *, struct inode *, struct page *);
+3 -3
fs/f2fs/inline.c
··· 427 427 goto out; 428 428 } 429 429 430 - f2fs_wait_on_page_writeback(ipage, NODE); 431 - 432 430 down_write(&F2FS_I(inode)->i_sem); 433 - page = init_inode_metadata(inode, dir, name); 431 + page = init_inode_metadata(inode, dir, name, ipage); 434 432 if (IS_ERR(page)) { 435 433 err = PTR_ERR(page); 436 434 goto fail; 437 435 } 436 + 437 + f2fs_wait_on_page_writeback(ipage, NODE); 438 438 de = &dentry_blk->dentry[bit_pos]; 439 439 de->hash_code = name_hash; 440 440 de->name_len = cpu_to_le16(namelen);
+3 -3
fs/f2fs/xattr.c
··· 83 83 } 84 84 if (strcmp(name, "") == 0) 85 85 return -EINVAL; 86 - return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); 86 + return f2fs_getxattr(dentry->d_inode, type, name, buffer, size, NULL); 87 87 } 88 88 89 89 static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, ··· 398 398 } 399 399 400 400 int f2fs_getxattr(struct inode *inode, int index, const char *name, 401 - void *buffer, size_t buffer_size) 401 + void *buffer, size_t buffer_size, struct page *ipage) 402 402 { 403 403 struct f2fs_xattr_entry *entry; 404 404 void *base_addr; ··· 412 412 if (len > F2FS_NAME_LEN) 413 413 return -ERANGE; 414 414 415 - base_addr = read_all_xattrs(inode, NULL); 415 + base_addr = read_all_xattrs(inode, ipage); 416 416 if (!base_addr) 417 417 return -ENOMEM; 418 418
+4 -2
fs/f2fs/xattr.h
··· 115 115 116 116 extern int f2fs_setxattr(struct inode *, int, const char *, 117 117 const void *, size_t, struct page *, int); 118 - extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); 118 + extern int f2fs_getxattr(struct inode *, int, const char *, void *, 119 + size_t, struct page *); 119 120 extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); 120 121 #else 121 122 ··· 127 126 return -EOPNOTSUPP; 128 127 } 129 128 static inline int f2fs_getxattr(struct inode *inode, int index, 130 - const char *name, void *buffer, size_t buffer_size) 129 + const char *name, void *buffer, 130 + size_t buffer_size, struct page *dpage) 131 131 { 132 132 return -EOPNOTSUPP; 133 133 }