commit d39dd11c3e6a7af5c20bfac40594db36cf270f42 · tjh.dev/kernel

+1 -1

Documentation/filesystems/Locking

··· 128 destroy_inode: 129 dirty_inode: (must not sleep) 130 write_inode: 131 - drop_inode: !!!inode_lock!!! 132 evict_inode: 133 put_super: write 134 write_super: read

··· 128 destroy_inode: 129 dirty_inode: (must not sleep) 130 write_inode: 131 + drop_inode: !!!inode->i_lock!!! 132 evict_inode: 133 put_super: write 134 write_super: read

+11 -5

Documentation/filesystems/porting

··· 298 remaining links or not. Caller does *not* evict the pagecache or inode-associated 299 metadata buffers; getting rid of those is responsibility of method, as it had 300 been for ->delete_inode(). 301 - ->drop_inode() returns int now; it's called on final iput() with inode_lock 302 - held and it returns true if filesystems wants the inode to be dropped. As before, 303 - generic_drop_inode() is still the default and it's been updated appropriately. 304 - generic_delete_inode() is also alive and it consists simply of return 1. Note that 305 - all actual eviction work is done by caller after ->drop_inode() returns. 306 clear_inode() is gone; use end_writeback() instead. As before, it must 307 be called exactly once on each call of ->evict_inode() (as it used to be for 308 each call of ->delete_inode()). Unlike before, if you are using inode-associated ··· 397 Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set, 398 so the i_size should not change when hole punching, even when puching the end of 399 a file off. 400 401 -- 402 [mandatory]

··· 298 remaining links or not. Caller does *not* evict the pagecache or inode-associated 299 metadata buffers; getting rid of those is responsibility of method, as it had 300 been for ->delete_inode(). 301 + 302 + ->drop_inode() returns int now; it's called on final iput() with 303 + inode->i_lock held and it returns true if filesystems wants the inode to be 304 + dropped. As before, generic_drop_inode() is still the default and it's been 305 + updated appropriately. generic_delete_inode() is also alive and it consists 306 + simply of return 1. Note that all actual eviction work is done by caller after 307 + ->drop_inode() returns. 308 + 309 clear_inode() is gone; use end_writeback() instead. As before, it must 310 be called exactly once on each call of ->evict_inode() (as it used to be for 311 each call of ->delete_inode()). Unlike before, if you are using inode-associated ··· 394 Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set, 395 so the i_size should not change when hole punching, even when puching the end of 396 a file off. 397 + 398 + -- 399 + [mandatory] 400 401 -- 402 [mandatory]

+1 -1

Documentation/filesystems/vfs.txt

··· 254 should be synchronous or not, not all filesystems check this flag. 255 256 drop_inode: called when the last access to the inode is dropped, 257 - with the inode_lock spinlock held. 258 259 This method should be either NULL (normal UNIX filesystem 260 semantics) or "generic_delete_inode" (for filesystems that do not

··· 254 should be synchronous or not, not all filesystems check this flag. 255 256 drop_inode: called when the last access to the inode is dropped, 257 + with the inode->i_lock spinlock held. 258 259 This method should be either NULL (normal UNIX filesystem 260 semantics) or "generic_delete_inode" (for filesystems that do not

-2

fs/autofs4/autofs_i.h

··· 61 current->pid, __func__, ##args); \ 62 } while (0) 63 64 - extern spinlock_t autofs4_lock; 65 - 66 /* Unified info structure. This is pointed to by both the dentry and 67 inode structures. Each file in the filesystem has an instance of this 68 structure. It holds a reference to the dentry, so dentries are never

··· 61 current->pid, __func__, ##args); \ 62 } while (0) 63 64 /* Unified info structure. This is pointed to by both the dentry and 65 inode structures. Each file in the filesystem has an instance of this 66 structure. It holds a reference to the dentry, so dentries are never

+4

fs/autofs4/dev-ioctl.c

··· 372 return -EBUSY; 373 } else { 374 struct file *pipe = fget(pipefd); 375 if (!pipe->f_op || !pipe->f_op->write) { 376 err = -EPIPE; 377 fput(pipe);

··· 372 return -EBUSY; 373 } else { 374 struct file *pipe = fget(pipefd); 375 + if (!pipe) { 376 + err = -EBADF; 377 + goto out; 378 + } 379 if (!pipe->f_op || !pipe->f_op->write) { 380 err = -EPIPE; 381 fput(pipe);

+63 -21

fs/autofs4/expire.c

··· 87 } 88 89 /* 90 * Calculate and dget next entry in top down tree traversal. 91 */ 92 static struct dentry *get_next_positive_dentry(struct dentry *prev, 93 struct dentry *root) 94 { 95 struct list_head *next; 96 struct dentry *p, *ret; 97 98 if (prev == NULL) 99 return dget(root); 100 101 - spin_lock(&autofs4_lock); 102 relock: 103 p = prev; 104 spin_lock(&p->d_lock); ··· 162 163 if (p == root) { 164 spin_unlock(&p->d_lock); 165 - spin_unlock(&autofs4_lock); 166 dput(prev); 167 return NULL; 168 } ··· 192 dget_dlock(ret); 193 spin_unlock(&ret->d_lock); 194 spin_unlock(&p->d_lock); 195 - spin_unlock(&autofs4_lock); 196 197 dput(prev); 198 ··· 342 spin_lock(&sbi->fs_lock); 343 ino = autofs4_dentry_ino(root); 344 /* No point expiring a pending mount */ 345 - if (ino->flags & AUTOFS_INF_PENDING) { 346 - spin_unlock(&sbi->fs_lock); 347 - return NULL; 348 - } 349 - managed_dentry_set_transit(root); 350 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { 351 struct autofs_info *ino = autofs4_dentry_ino(root); 352 ino->flags |= AUTOFS_INF_EXPIRING; ··· 351 spin_unlock(&sbi->fs_lock); 352 return root; 353 } 354 - managed_dentry_clear_transit(root); 355 spin_unlock(&sbi->fs_lock); 356 dput(root); 357 ··· 385 timeout = sbi->exp_timeout; 386 387 dentry = NULL; 388 - while ((dentry = get_next_positive_dentry(dentry, root))) { 389 spin_lock(&sbi->fs_lock); 390 ino = autofs4_dentry_ino(dentry); 391 /* No point expiring a pending mount */ 392 if (ino->flags & AUTOFS_INF_PENDING) 393 - goto cont; 394 - managed_dentry_set_transit(dentry); 395 396 /* 397 * Case 1: (i) indirect mount or top level pseudo direct mount ··· 450 } 451 } 452 next: 453 - managed_dentry_clear_transit(dentry); 454 - cont: 455 spin_unlock(&sbi->fs_lock); 456 } 457 return NULL; ··· 461 ino->flags |= AUTOFS_INF_EXPIRING; 462 init_completion(&ino->expire_complete); 463 spin_unlock(&sbi->fs_lock); 464 - spin_lock(&autofs4_lock); 465 spin_lock(&expired->d_parent->d_lock); 466 spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); 467 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); 468 spin_unlock(&expired->d_lock); 469 spin_unlock(&expired->d_parent->d_lock); 470 - spin_unlock(&autofs4_lock); 471 return expired; 472 } 473 ··· 530 spin_lock(&sbi->fs_lock); 531 ino = autofs4_dentry_ino(dentry); 532 ino->flags &= ~AUTOFS_INF_EXPIRING; 533 - if (!d_unhashed(dentry)) 534 - managed_dentry_clear_transit(dentry); 535 complete_all(&ino->expire_complete); 536 spin_unlock(&sbi->fs_lock); 537 ··· 557 spin_lock(&sbi->fs_lock); 558 ino->flags &= ~AUTOFS_INF_EXPIRING; 559 spin_lock(&dentry->d_lock); 560 - if (ret) 561 - __managed_dentry_clear_transit(dentry); 562 - else { 563 if ((IS_ROOT(dentry) || 564 (autofs_type_indirect(sbi->type) && 565 IS_ROOT(dentry->d_parent))) &&

··· 87 } 88 89 /* 90 + * Calculate and dget next entry in the subdirs list under root. 91 + */ 92 + static struct dentry *get_next_positive_subdir(struct dentry *prev, 93 + struct dentry *root) 94 + { 95 + struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); 96 + struct list_head *next; 97 + struct dentry *p, *q; 98 + 99 + spin_lock(&sbi->lookup_lock); 100 + 101 + if (prev == NULL) { 102 + spin_lock(&root->d_lock); 103 + prev = dget_dlock(root); 104 + next = prev->d_subdirs.next; 105 + p = prev; 106 + goto start; 107 + } 108 + 109 + p = prev; 110 + spin_lock(&p->d_lock); 111 + again: 112 + next = p->d_u.d_child.next; 113 + start: 114 + if (next == &root->d_subdirs) { 115 + spin_unlock(&p->d_lock); 116 + spin_unlock(&sbi->lookup_lock); 117 + dput(prev); 118 + return NULL; 119 + } 120 + 121 + q = list_entry(next, struct dentry, d_u.d_child); 122 + 123 + spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); 124 + /* Negative dentry - try next */ 125 + if (!simple_positive(q)) { 126 + spin_unlock(&p->d_lock); 127 + p = q; 128 + goto again; 129 + } 130 + dget_dlock(q); 131 + spin_unlock(&q->d_lock); 132 + spin_unlock(&p->d_lock); 133 + spin_unlock(&sbi->lookup_lock); 134 + 135 + dput(prev); 136 + 137 + return q; 138 + } 139 + 140 + /* 141 * Calculate and dget next entry in top down tree traversal. 142 */ 143 static struct dentry *get_next_positive_dentry(struct dentry *prev, 144 struct dentry *root) 145 { 146 + struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); 147 struct list_head *next; 148 struct dentry *p, *ret; 149 150 if (prev == NULL) 151 return dget(root); 152 153 + spin_lock(&sbi->lookup_lock); 154 relock: 155 p = prev; 156 spin_lock(&p->d_lock); ··· 110 111 if (p == root) { 112 spin_unlock(&p->d_lock); 113 + spin_unlock(&sbi->lookup_lock); 114 dput(prev); 115 return NULL; 116 } ··· 140 dget_dlock(ret); 141 spin_unlock(&ret->d_lock); 142 spin_unlock(&p->d_lock); 143 + spin_unlock(&sbi->lookup_lock); 144 145 dput(prev); 146 ··· 290 spin_lock(&sbi->fs_lock); 291 ino = autofs4_dentry_ino(root); 292 /* No point expiring a pending mount */ 293 + if (ino->flags & AUTOFS_INF_PENDING) 294 + goto out; 295 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { 296 struct autofs_info *ino = autofs4_dentry_ino(root); 297 ino->flags |= AUTOFS_INF_EXPIRING; ··· 302 spin_unlock(&sbi->fs_lock); 303 return root; 304 } 305 + out: 306 spin_unlock(&sbi->fs_lock); 307 dput(root); 308 ··· 336 timeout = sbi->exp_timeout; 337 338 dentry = NULL; 339 + while ((dentry = get_next_positive_subdir(dentry, root))) { 340 spin_lock(&sbi->fs_lock); 341 ino = autofs4_dentry_ino(dentry); 342 /* No point expiring a pending mount */ 343 if (ino->flags & AUTOFS_INF_PENDING) 344 + goto next; 345 346 /* 347 * Case 1: (i) indirect mount or top level pseudo direct mount ··· 402 } 403 } 404 next: 405 spin_unlock(&sbi->fs_lock); 406 } 407 return NULL; ··· 415 ino->flags |= AUTOFS_INF_EXPIRING; 416 init_completion(&ino->expire_complete); 417 spin_unlock(&sbi->fs_lock); 418 + spin_lock(&sbi->lookup_lock); 419 spin_lock(&expired->d_parent->d_lock); 420 spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); 421 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); 422 spin_unlock(&expired->d_lock); 423 spin_unlock(&expired->d_parent->d_lock); 424 + spin_unlock(&sbi->lookup_lock); 425 return expired; 426 } 427 ··· 484 spin_lock(&sbi->fs_lock); 485 ino = autofs4_dentry_ino(dentry); 486 ino->flags &= ~AUTOFS_INF_EXPIRING; 487 complete_all(&ino->expire_complete); 488 spin_unlock(&sbi->fs_lock); 489 ··· 513 spin_lock(&sbi->fs_lock); 514 ino->flags &= ~AUTOFS_INF_EXPIRING; 515 spin_lock(&dentry->d_lock); 516 + if (!ret) { 517 if ((IS_ROOT(dentry) || 518 (autofs_type_indirect(sbi->type) && 519 IS_ROOT(dentry->d_parent))) &&

+21 -41

fs/autofs4/root.c

··· 23 24 #include "autofs_i.h" 25 26 - DEFINE_SPINLOCK(autofs4_lock); 27 - 28 static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); 29 static int autofs4_dir_unlink(struct inode *,struct dentry *); 30 static int autofs4_dir_rmdir(struct inode *,struct dentry *); ··· 123 * autofs file system so just let the libfs routines handle 124 * it. 125 */ 126 - spin_lock(&autofs4_lock); 127 spin_lock(&dentry->d_lock); 128 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { 129 spin_unlock(&dentry->d_lock); 130 - spin_unlock(&autofs4_lock); 131 return -ENOENT; 132 } 133 spin_unlock(&dentry->d_lock); 134 - spin_unlock(&autofs4_lock); 135 136 out: 137 return dcache_dir_open(inode, file); ··· 169 const unsigned char *str = name->name; 170 struct list_head *p, *head; 171 172 - spin_lock(&autofs4_lock); 173 spin_lock(&sbi->lookup_lock); 174 head = &sbi->active_list; 175 list_for_each(p, head) { ··· 201 dget_dlock(active); 202 spin_unlock(&active->d_lock); 203 spin_unlock(&sbi->lookup_lock); 204 - spin_unlock(&autofs4_lock); 205 return active; 206 } 207 next: 208 spin_unlock(&active->d_lock); 209 } 210 spin_unlock(&sbi->lookup_lock); 211 - spin_unlock(&autofs4_lock); 212 213 return NULL; 214 } ··· 221 const unsigned char *str = name->name; 222 struct list_head *p, *head; 223 224 - spin_lock(&autofs4_lock); 225 spin_lock(&sbi->lookup_lock); 226 head = &sbi->expiring_list; 227 list_for_each(p, head) { ··· 253 dget_dlock(expiring); 254 spin_unlock(&expiring->d_lock); 255 spin_unlock(&sbi->lookup_lock); 256 - spin_unlock(&autofs4_lock); 257 return expiring; 258 } 259 next: 260 spin_unlock(&expiring->d_lock); 261 } 262 spin_unlock(&sbi->lookup_lock); 263 - spin_unlock(&autofs4_lock); 264 265 return NULL; 266 } ··· 267 { 268 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 269 struct autofs_info *ino = autofs4_dentry_ino(dentry); 270 - int status; 271 272 if (ino->flags & AUTOFS_INF_PENDING) { 273 DPRINTK("waiting for mount name=%.*s", 274 dentry->d_name.len, dentry->d_name.name); 275 status = autofs4_wait(sbi, dentry, NFY_MOUNT); 276 DPRINTK("mount wait done status=%d", status); 277 - ino->last_used = jiffies; 278 - return status; 279 } 280 - return 0; 281 } 282 283 static int do_expire_wait(struct dentry *dentry) ··· 310 */ 311 if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { 312 struct dentry *parent = dentry->d_parent; 313 struct dentry *new = d_lookup(parent, &dentry->d_name); 314 if (!new) 315 return NULL; 316 dput(path->dentry); 317 path->dentry = new; 318 } ··· 331 332 DPRINTK("dentry=%p %.*s", 333 dentry, dentry->d_name.len, dentry->d_name.name); 334 - 335 - /* 336 - * Someone may have manually umounted this or it was a submount 337 - * that has gone away. 338 - */ 339 - spin_lock(&dentry->d_lock); 340 - if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { 341 - if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) && 342 - (dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) 343 - __managed_dentry_set_transit(path->dentry); 344 - } 345 - spin_unlock(&dentry->d_lock); 346 347 /* The daemon never triggers a mount. */ 348 if (autofs4_oz_mode(sbi)) ··· 400 done: 401 if (!(ino->flags & AUTOFS_INF_EXPIRING)) { 402 /* 403 - * Any needed mounting has been completed and the path updated 404 - * so turn this into a normal dentry so we don't continually 405 - * call ->d_automount() and ->d_manage(). 406 - */ 407 - spin_lock(&dentry->d_lock); 408 - __managed_dentry_clear_transit(dentry); 409 - /* 410 * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and 411 * symlinks as in all other cases the dentry will be covered by 412 * an actual mount so ->d_automount() won't be called during 413 * the follow. 414 */ 415 if ((!d_mountpoint(dentry) && 416 !list_empty(&dentry->d_subdirs)) || 417 (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) ··· 436 437 /* The daemon never waits. */ 438 if (autofs4_oz_mode(sbi)) { 439 if (!d_mountpoint(dentry)) 440 return -EISDIR; 441 return 0; ··· 595 596 dir->i_mtime = CURRENT_TIME; 597 598 - spin_lock(&autofs4_lock); 599 - autofs4_add_expiring(dentry); 600 spin_lock(&dentry->d_lock); 601 __d_drop(dentry); 602 spin_unlock(&dentry->d_lock); 603 - spin_unlock(&autofs4_lock); 604 605 return 0; 606 } ··· 669 if (!autofs4_oz_mode(sbi)) 670 return -EACCES; 671 672 - spin_lock(&autofs4_lock); 673 spin_lock(&sbi->lookup_lock); 674 spin_lock(&dentry->d_lock); 675 if (!list_empty(&dentry->d_subdirs)) { 676 spin_unlock(&dentry->d_lock); 677 spin_unlock(&sbi->lookup_lock); 678 - spin_unlock(&autofs4_lock); 679 return -ENOTEMPTY; 680 } 681 __autofs4_add_expiring(dentry); 682 - spin_unlock(&sbi->lookup_lock); 683 __d_drop(dentry); 684 spin_unlock(&dentry->d_lock); 685 - spin_unlock(&autofs4_lock); 686 687 if (sbi->version < 5) 688 autofs_clear_leaf_automount_flags(dentry);

··· 23 24 #include "autofs_i.h" 25 26 static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); 27 static int autofs4_dir_unlink(struct inode *,struct dentry *); 28 static int autofs4_dir_rmdir(struct inode *,struct dentry *); ··· 125 * autofs file system so just let the libfs routines handle 126 * it. 127 */ 128 + spin_lock(&sbi->lookup_lock); 129 spin_lock(&dentry->d_lock); 130 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { 131 spin_unlock(&dentry->d_lock); 132 + spin_unlock(&sbi->lookup_lock); 133 return -ENOENT; 134 } 135 spin_unlock(&dentry->d_lock); 136 + spin_unlock(&sbi->lookup_lock); 137 138 out: 139 return dcache_dir_open(inode, file); ··· 171 const unsigned char *str = name->name; 172 struct list_head *p, *head; 173 174 spin_lock(&sbi->lookup_lock); 175 head = &sbi->active_list; 176 list_for_each(p, head) { ··· 204 dget_dlock(active); 205 spin_unlock(&active->d_lock); 206 spin_unlock(&sbi->lookup_lock); 207 return active; 208 } 209 next: 210 spin_unlock(&active->d_lock); 211 } 212 spin_unlock(&sbi->lookup_lock); 213 214 return NULL; 215 } ··· 226 const unsigned char *str = name->name; 227 struct list_head *p, *head; 228 229 spin_lock(&sbi->lookup_lock); 230 head = &sbi->expiring_list; 231 list_for_each(p, head) { ··· 259 dget_dlock(expiring); 260 spin_unlock(&expiring->d_lock); 261 spin_unlock(&sbi->lookup_lock); 262 return expiring; 263 } 264 next: 265 spin_unlock(&expiring->d_lock); 266 } 267 spin_unlock(&sbi->lookup_lock); 268 269 return NULL; 270 } ··· 275 { 276 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 277 struct autofs_info *ino = autofs4_dentry_ino(dentry); 278 + int status = 0; 279 280 if (ino->flags & AUTOFS_INF_PENDING) { 281 DPRINTK("waiting for mount name=%.*s", 282 dentry->d_name.len, dentry->d_name.name); 283 status = autofs4_wait(sbi, dentry, NFY_MOUNT); 284 DPRINTK("mount wait done status=%d", status); 285 } 286 + ino->last_used = jiffies; 287 + return status; 288 } 289 290 static int do_expire_wait(struct dentry *dentry) ··· 319 */ 320 if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { 321 struct dentry *parent = dentry->d_parent; 322 + struct autofs_info *ino; 323 struct dentry *new = d_lookup(parent, &dentry->d_name); 324 if (!new) 325 return NULL; 326 + ino = autofs4_dentry_ino(new); 327 + ino->last_used = jiffies; 328 dput(path->dentry); 329 path->dentry = new; 330 } ··· 337 338 DPRINTK("dentry=%p %.*s", 339 dentry, dentry->d_name.len, dentry->d_name.name); 340 341 /* The daemon never triggers a mount. */ 342 if (autofs4_oz_mode(sbi)) ··· 418 done: 419 if (!(ino->flags & AUTOFS_INF_EXPIRING)) { 420 /* 421 + * Any needed mounting has been completed and the path 422 + * updated so clear DCACHE_NEED_AUTOMOUNT so we don't 423 + * call ->d_automount() on rootless multi-mounts since 424 + * it can lead to an incorrect ELOOP error return. 425 + * 426 * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and 427 * symlinks as in all other cases the dentry will be covered by 428 * an actual mount so ->d_automount() won't be called during 429 * the follow. 430 */ 431 + spin_lock(&dentry->d_lock); 432 if ((!d_mountpoint(dentry) && 433 !list_empty(&dentry->d_subdirs)) || 434 (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) ··· 455 456 /* The daemon never waits. */ 457 if (autofs4_oz_mode(sbi)) { 458 + if (rcu_walk) 459 + return 0; 460 if (!d_mountpoint(dentry)) 461 return -EISDIR; 462 return 0; ··· 612 613 dir->i_mtime = CURRENT_TIME; 614 615 + spin_lock(&sbi->lookup_lock); 616 + __autofs4_add_expiring(dentry); 617 spin_lock(&dentry->d_lock); 618 __d_drop(dentry); 619 spin_unlock(&dentry->d_lock); 620 + spin_unlock(&sbi->lookup_lock); 621 622 return 0; 623 } ··· 686 if (!autofs4_oz_mode(sbi)) 687 return -EACCES; 688 689 spin_lock(&sbi->lookup_lock); 690 spin_lock(&dentry->d_lock); 691 if (!list_empty(&dentry->d_subdirs)) { 692 spin_unlock(&dentry->d_lock); 693 spin_unlock(&sbi->lookup_lock); 694 return -ENOTEMPTY; 695 } 696 __autofs4_add_expiring(dentry); 697 __d_drop(dentry); 698 spin_unlock(&dentry->d_lock); 699 + spin_unlock(&sbi->lookup_lock); 700 701 if (sbi->version < 5) 702 autofs_clear_leaf_automount_flags(dentry);

+3 -3

fs/autofs4/waitq.c

··· 197 198 seq = read_seqbegin(&rename_lock); 199 rcu_read_lock(); 200 - spin_lock(&autofs4_lock); 201 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) 202 len += tmp->d_name.len + 1; 203 204 if (!len || --len > NAME_MAX) { 205 - spin_unlock(&autofs4_lock); 206 rcu_read_unlock(); 207 if (read_seqretry(&rename_lock, seq)) 208 goto rename_retry; ··· 218 p -= tmp->d_name.len; 219 strncpy(p, tmp->d_name.name, tmp->d_name.len); 220 } 221 - spin_unlock(&autofs4_lock); 222 rcu_read_unlock(); 223 if (read_seqretry(&rename_lock, seq)) 224 goto rename_retry;

··· 197 198 seq = read_seqbegin(&rename_lock); 199 rcu_read_lock(); 200 + spin_lock(&sbi->fs_lock); 201 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) 202 len += tmp->d_name.len + 1; 203 204 if (!len || --len > NAME_MAX) { 205 + spin_unlock(&sbi->fs_lock); 206 rcu_read_unlock(); 207 if (read_seqretry(&rename_lock, seq)) 208 goto rename_retry; ··· 218 p -= tmp->d_name.len; 219 strncpy(p, tmp->d_name.name, tmp->d_name.len); 220 } 221 + spin_unlock(&sbi->fs_lock); 222 rcu_read_unlock(); 223 if (read_seqretry(&rename_lock, seq)) 224 goto rename_retry;

+4 -2

fs/block_dev.c

··· 55 static void bdev_inode_switch_bdi(struct inode *inode, 56 struct backing_dev_info *dst) 57 { 58 - spin_lock(&inode_lock); 59 inode->i_data.backing_dev_info = dst; 60 if (inode->i_state & I_DIRTY) 61 list_move(&inode->i_wb_list, &dst->wb.b_dirty); 62 - spin_unlock(&inode_lock); 63 } 64 65 static sector_t max_block(struct block_device *bdev)

··· 55 static void bdev_inode_switch_bdi(struct inode *inode, 56 struct backing_dev_info *dst) 57 { 58 + spin_lock(&inode_wb_list_lock); 59 + spin_lock(&inode->i_lock); 60 inode->i_data.backing_dev_info = dst; 61 if (inode->i_state & I_DIRTY) 62 list_move(&inode->i_wb_list, &dst->wb.b_dirty); 63 + spin_unlock(&inode->i_lock); 64 + spin_unlock(&inode_wb_list_lock); 65 } 66 67 static sector_t max_block(struct block_device *bdev)

+1 -1

fs/buffer.c

··· 1138 * inode list. 1139 * 1140 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, 1141 - * mapping->tree_lock and the global inode_lock. 1142 */ 1143 void mark_buffer_dirty(struct buffer_head *bh) 1144 {

··· 1138 * inode list. 1139 * 1140 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, 1141 + * mapping->tree_lock and mapping->host->i_lock. 1142 */ 1143 void mark_buffer_dirty(struct buffer_head *bh) 1144 {

+11 -7

fs/drop_caches.c

··· 8 #include <linux/writeback.h> 9 #include <linux/sysctl.h> 10 #include <linux/gfp.h> 11 12 /* A global variable is a bit ugly, but it keeps the code simple */ 13 int sysctl_drop_caches; ··· 17 { 18 struct inode *inode, *toput_inode = NULL; 19 20 - spin_lock(&inode_lock); 21 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 22 - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 23 continue; 24 - if (inode->i_mapping->nrpages == 0) 25 - continue; 26 __iget(inode); 27 - spin_unlock(&inode_lock); 28 invalidate_mapping_pages(inode->i_mapping, 0, -1); 29 iput(toput_inode); 30 toput_inode = inode; 31 - spin_lock(&inode_lock); 32 } 33 - spin_unlock(&inode_lock); 34 iput(toput_inode); 35 } 36

··· 8 #include <linux/writeback.h> 9 #include <linux/sysctl.h> 10 #include <linux/gfp.h> 11 + #include "internal.h" 12 13 /* A global variable is a bit ugly, but it keeps the code simple */ 14 int sysctl_drop_caches; ··· 16 { 17 struct inode *inode, *toput_inode = NULL; 18 19 + spin_lock(&inode_sb_list_lock); 20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 21 + spin_lock(&inode->i_lock); 22 + if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || 23 + (inode->i_mapping->nrpages == 0)) { 24 + spin_unlock(&inode->i_lock); 25 continue; 26 + } 27 __iget(inode); 28 + spin_unlock(&inode->i_lock); 29 + spin_unlock(&inode_sb_list_lock); 30 invalidate_mapping_pages(inode->i_mapping, 0, -1); 31 iput(toput_inode); 32 toput_inode = inode; 33 + spin_lock(&inode_sb_list_lock); 34 } 35 + spin_unlock(&inode_sb_list_lock); 36 iput(toput_inode); 37 } 38

+91 -50

fs/fs-writeback.c

··· 176 } 177 178 /* 179 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the 180 * furthest end of its superblock's dirty-inode list. 181 * ··· 199 { 200 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 201 202 if (!list_empty(&wb->b_dirty)) { 203 struct inode *tail; 204 ··· 217 { 218 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 219 220 list_move(&inode->i_wb_list, &wb->b_more_io); 221 } 222 223 static void inode_sync_complete(struct inode *inode) 224 { 225 /* 226 - * Prevent speculative execution through spin_unlock(&inode_lock); 227 */ 228 smp_mb(); 229 wake_up_bit(&inode->i_state, __I_SYNC); 230 } ··· 301 */ 302 static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) 303 { 304 list_splice_init(&wb->b_more_io, &wb->b_io); 305 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); 306 } ··· 322 wait_queue_head_t *wqh; 323 324 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 325 - while (inode->i_state & I_SYNC) { 326 - spin_unlock(&inode_lock); 327 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); 328 - spin_lock(&inode_lock); 329 } 330 } 331 332 /* 333 - * Write out an inode's dirty pages. Called under inode_lock. Either the 334 - * caller has ref on the inode (either via __iget or via syscall against an fd) 335 - * or the inode has I_WILL_FREE set (via generic_forget_inode) 336 * 337 * If `wait' is set, wait on the writeout. 338 * 339 * The whole writeout design is quite complex and fragile. We want to avoid 340 * starvation of particular inodes when others are being redirtied, prevent 341 * livelocks, etc. 342 - * 343 - * Called under inode_lock. 344 */ 345 static int 346 writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ··· 348 struct address_space *mapping = inode->i_mapping; 349 unsigned dirty; 350 int ret; 351 352 if (!atomic_read(&inode->i_count)) 353 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); ··· 382 /* Set I_SYNC, reset I_DIRTY_PAGES */ 383 inode->i_state |= I_SYNC; 384 inode->i_state &= ~I_DIRTY_PAGES; 385 - spin_unlock(&inode_lock); 386 387 ret = do_writepages(mapping, wbc); 388 ··· 403 * due to delalloc, clear dirty metadata flags right before 404 * write_inode() 405 */ 406 - spin_lock(&inode_lock); 407 dirty = inode->i_state & I_DIRTY; 408 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); 409 - spin_unlock(&inode_lock); 410 /* Don't write the inode if only I_DIRTY_PAGES was set */ 411 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 412 int err = write_inode(inode, wbc); ··· 414 ret = err; 415 } 416 417 - spin_lock(&inode_lock); 418 inode->i_state &= ~I_SYNC; 419 if (!(inode->i_state & I_FREEING)) { 420 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { ··· 527 * kind does not need peridic writeout yet, and for the latter 528 * kind writeout is handled by the freer. 529 */ 530 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 531 requeue_io(inode); 532 continue; 533 } ··· 538 * Was this inode dirtied after sync_sb_inodes was called? 539 * This keeps sync from extra jobs and livelock. 540 */ 541 - if (inode_dirtied_after(inode, wbc->wb_start)) 542 return 1; 543 544 __iget(inode); 545 pages_skipped = wbc->pages_skipped; 546 writeback_single_inode(inode, wbc); 547 if (wbc->pages_skipped != pages_skipped) { ··· 554 */ 555 redirty_tail(inode); 556 } 557 - spin_unlock(&inode_lock); 558 iput(inode); 559 cond_resched(); 560 - spin_lock(&inode_lock); 561 if (wbc->nr_to_write <= 0) { 562 wbc->more_io = 1; 563 return 1; ··· 577 578 if (!wbc->wb_start) 579 wbc->wb_start = jiffies; /* livelock avoidance */ 580 - spin_lock(&inode_lock); 581 if (!wbc->for_kupdate || list_empty(&wb->b_io)) 582 queue_io(wb, wbc->older_than_this); 583 ··· 595 if (ret) 596 break; 597 } 598 - spin_unlock(&inode_lock); 599 /* Leave any unwritten inodes on b_io */ 600 } 601 ··· 604 { 605 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 606 607 - spin_lock(&inode_lock); 608 if (!wbc->for_kupdate || list_empty(&wb->b_io)) 609 queue_io(wb, wbc->older_than_this); 610 writeback_sb_inodes(sb, wb, wbc, true); 611 - spin_unlock(&inode_lock); 612 } 613 614 /* ··· 747 * become available for writeback. Otherwise 748 * we'll just busyloop. 749 */ 750 - spin_lock(&inode_lock); 751 if (!list_empty(&wb->b_more_io)) { 752 inode = wb_inode(wb->b_more_io.prev); 753 trace_wbc_writeback_wait(&wbc, wb->bdi); 754 inode_wait_for_writeback(inode); 755 } 756 - spin_unlock(&inode_lock); 757 } 758 759 return wrote; ··· 1021 { 1022 struct super_block *sb = inode->i_sb; 1023 struct backing_dev_info *bdi = NULL; 1024 - bool wakeup_bdi = false; 1025 1026 /* 1027 * Don't do this for I_DIRTY_PAGES - that doesn't actually ··· 1044 if (unlikely(block_dump)) 1045 block_dump___mark_inode_dirty(inode); 1046 1047 - spin_lock(&inode_lock); 1048 if ((inode->i_state & flags) != flags) { 1049 const int was_dirty = inode->i_state & I_DIRTY; 1050 ··· 1056 * superblock list, based upon its state. 1057 */ 1058 if (inode->i_state & I_SYNC) 1059 - goto out; 1060 1061 /* 1062 * Only add valid (hashed) inodes to the superblock's ··· 1064 */ 1065 if (!S_ISBLK(inode->i_mode)) { 1066 if (inode_unhashed(inode)) 1067 - goto out; 1068 } 1069 if (inode->i_state & I_FREEING) 1070 - goto out; 1071 1072 /* 1073 * If the inode was already on b_dirty/b_io/b_more_io, don't 1074 * reposition it (that would break b_dirty time-ordering). 1075 */ 1076 if (!was_dirty) { 1077 bdi = inode_to_bdi(inode); 1078 1079 if (bdi_cap_writeback_dirty(bdi)) { ··· 1091 wakeup_bdi = true; 1092 } 1093 1094 inode->dirtied_when = jiffies; 1095 list_move(&inode->i_wb_list, &bdi->wb.b_dirty); 1096 } 1097 } 1098 - out: 1099 - spin_unlock(&inode_lock); 1100 1101 - if (wakeup_bdi) 1102 - bdi_wakeup_thread_delayed(bdi); 1103 } 1104 EXPORT_SYMBOL(__mark_inode_dirty); 1105 ··· 1135 */ 1136 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1137 1138 - spin_lock(&inode_lock); 1139 1140 /* 1141 * Data integrity sync. Must wait for all pages under writeback, ··· 1145 * we still have to wait for that writeout. 1146 */ 1147 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1148 - struct address_space *mapping; 1149 1150 - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 1151 continue; 1152 - mapping = inode->i_mapping; 1153 - if (mapping->nrpages == 0) 1154 - continue; 1155 __iget(inode); 1156 - spin_unlock(&inode_lock); 1157 /* 1158 - * We hold a reference to 'inode' so it couldn't have 1159 - * been removed from s_inodes list while we dropped the 1160 - * inode_lock. We cannot iput the inode now as we can 1161 - * be holding the last reference and we cannot iput it 1162 - * under inode_lock. So we keep the reference and iput 1163 - * it later. 1164 */ 1165 iput(old_inode); 1166 old_inode = inode; ··· 1172 1173 cond_resched(); 1174 1175 - spin_lock(&inode_lock); 1176 } 1177 - spin_unlock(&inode_lock); 1178 iput(old_inode); 1179 } 1180 ··· 1308 wbc.nr_to_write = 0; 1309 1310 might_sleep(); 1311 - spin_lock(&inode_lock); 1312 ret = writeback_single_inode(inode, &wbc); 1313 - spin_unlock(&inode_lock); 1314 if (sync) 1315 inode_sync_wait(inode); 1316 return ret; ··· 1334 { 1335 int ret; 1336 1337 - spin_lock(&inode_lock); 1338 ret = writeback_single_inode(inode, wbc); 1339 - spin_unlock(&inode_lock); 1340 return ret; 1341 } 1342 EXPORT_SYMBOL(sync_inode);

··· 176 } 177 178 /* 179 + * Remove the inode from the writeback list it is on. 180 + */ 181 + void inode_wb_list_del(struct inode *inode) 182 + { 183 + spin_lock(&inode_wb_list_lock); 184 + list_del_init(&inode->i_wb_list); 185 + spin_unlock(&inode_wb_list_lock); 186 + } 187 + 188 + 189 + /* 190 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the 191 * furthest end of its superblock's dirty-inode list. 192 * ··· 188 { 189 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 190 191 + assert_spin_locked(&inode_wb_list_lock); 192 if (!list_empty(&wb->b_dirty)) { 193 struct inode *tail; 194 ··· 205 { 206 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 207 208 + assert_spin_locked(&inode_wb_list_lock); 209 list_move(&inode->i_wb_list, &wb->b_more_io); 210 } 211 212 static void inode_sync_complete(struct inode *inode) 213 { 214 /* 215 + * Prevent speculative execution through 216 + * spin_unlock(&inode_wb_list_lock); 217 */ 218 + 219 smp_mb(); 220 wake_up_bit(&inode->i_state, __I_SYNC); 221 } ··· 286 */ 287 static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) 288 { 289 + assert_spin_locked(&inode_wb_list_lock); 290 list_splice_init(&wb->b_more_io, &wb->b_io); 291 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); 292 } ··· 306 wait_queue_head_t *wqh; 307 308 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 309 + while (inode->i_state & I_SYNC) { 310 + spin_unlock(&inode->i_lock); 311 + spin_unlock(&inode_wb_list_lock); 312 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); 313 + spin_lock(&inode_wb_list_lock); 314 + spin_lock(&inode->i_lock); 315 } 316 } 317 318 /* 319 + * Write out an inode's dirty pages. Called under inode_wb_list_lock and 320 + * inode->i_lock. Either the caller has an active reference on the inode or 321 + * the inode has I_WILL_FREE set. 322 * 323 * If `wait' is set, wait on the writeout. 324 * 325 * The whole writeout design is quite complex and fragile. We want to avoid 326 * starvation of particular inodes when others are being redirtied, prevent 327 * livelocks, etc. 328 */ 329 static int 330 writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ··· 332 struct address_space *mapping = inode->i_mapping; 333 unsigned dirty; 334 int ret; 335 + 336 + assert_spin_locked(&inode_wb_list_lock); 337 + assert_spin_locked(&inode->i_lock); 338 339 if (!atomic_read(&inode->i_count)) 340 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); ··· 363 /* Set I_SYNC, reset I_DIRTY_PAGES */ 364 inode->i_state |= I_SYNC; 365 inode->i_state &= ~I_DIRTY_PAGES; 366 + spin_unlock(&inode->i_lock); 367 + spin_unlock(&inode_wb_list_lock); 368 369 ret = do_writepages(mapping, wbc); 370 ··· 383 * due to delalloc, clear dirty metadata flags right before 384 * write_inode() 385 */ 386 + spin_lock(&inode->i_lock); 387 dirty = inode->i_state & I_DIRTY; 388 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); 389 + spin_unlock(&inode->i_lock); 390 /* Don't write the inode if only I_DIRTY_PAGES was set */ 391 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 392 int err = write_inode(inode, wbc); ··· 394 ret = err; 395 } 396 397 + spin_lock(&inode_wb_list_lock); 398 + spin_lock(&inode->i_lock); 399 inode->i_state &= ~I_SYNC; 400 if (!(inode->i_state & I_FREEING)) { 401 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { ··· 506 * kind does not need peridic writeout yet, and for the latter 507 * kind writeout is handled by the freer. 508 */ 509 + spin_lock(&inode->i_lock); 510 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 511 + spin_unlock(&inode->i_lock); 512 requeue_io(inode); 513 continue; 514 } ··· 515 * Was this inode dirtied after sync_sb_inodes was called? 516 * This keeps sync from extra jobs and livelock. 517 */ 518 + if (inode_dirtied_after(inode, wbc->wb_start)) { 519 + spin_unlock(&inode->i_lock); 520 return 1; 521 + } 522 523 __iget(inode); 524 + 525 pages_skipped = wbc->pages_skipped; 526 writeback_single_inode(inode, wbc); 527 if (wbc->pages_skipped != pages_skipped) { ··· 528 */ 529 redirty_tail(inode); 530 } 531 + spin_unlock(&inode->i_lock); 532 + spin_unlock(&inode_wb_list_lock); 533 iput(inode); 534 cond_resched(); 535 + spin_lock(&inode_wb_list_lock); 536 if (wbc->nr_to_write <= 0) { 537 wbc->more_io = 1; 538 return 1; ··· 550 551 if (!wbc->wb_start) 552 wbc->wb_start = jiffies; /* livelock avoidance */ 553 + spin_lock(&inode_wb_list_lock); 554 if (!wbc->for_kupdate || list_empty(&wb->b_io)) 555 queue_io(wb, wbc->older_than_this); 556 ··· 568 if (ret) 569 break; 570 } 571 + spin_unlock(&inode_wb_list_lock); 572 /* Leave any unwritten inodes on b_io */ 573 } 574 ··· 577 { 578 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 579 580 + spin_lock(&inode_wb_list_lock); 581 if (!wbc->for_kupdate || list_empty(&wb->b_io)) 582 queue_io(wb, wbc->older_than_this); 583 writeback_sb_inodes(sb, wb, wbc, true); 584 + spin_unlock(&inode_wb_list_lock); 585 } 586 587 /* ··· 720 * become available for writeback. Otherwise 721 * we'll just busyloop. 722 */ 723 + spin_lock(&inode_wb_list_lock); 724 if (!list_empty(&wb->b_more_io)) { 725 inode = wb_inode(wb->b_more_io.prev); 726 trace_wbc_writeback_wait(&wbc, wb->bdi); 727 + spin_lock(&inode->i_lock); 728 inode_wait_for_writeback(inode); 729 + spin_unlock(&inode->i_lock); 730 } 731 + spin_unlock(&inode_wb_list_lock); 732 } 733 734 return wrote; ··· 992 { 993 struct super_block *sb = inode->i_sb; 994 struct backing_dev_info *bdi = NULL; 995 996 /* 997 * Don't do this for I_DIRTY_PAGES - that doesn't actually ··· 1016 if (unlikely(block_dump)) 1017 block_dump___mark_inode_dirty(inode); 1018 1019 + spin_lock(&inode->i_lock); 1020 if ((inode->i_state & flags) != flags) { 1021 const int was_dirty = inode->i_state & I_DIRTY; 1022 ··· 1028 * superblock list, based upon its state. 1029 */ 1030 if (inode->i_state & I_SYNC) 1031 + goto out_unlock_inode; 1032 1033 /* 1034 * Only add valid (hashed) inodes to the superblock's ··· 1036 */ 1037 if (!S_ISBLK(inode->i_mode)) { 1038 if (inode_unhashed(inode)) 1039 + goto out_unlock_inode; 1040 } 1041 if (inode->i_state & I_FREEING) 1042 + goto out_unlock_inode; 1043 1044 /* 1045 * If the inode was already on b_dirty/b_io/b_more_io, don't 1046 * reposition it (that would break b_dirty time-ordering). 1047 */ 1048 if (!was_dirty) { 1049 + bool wakeup_bdi = false; 1050 bdi = inode_to_bdi(inode); 1051 1052 if (bdi_cap_writeback_dirty(bdi)) { ··· 1062 wakeup_bdi = true; 1063 } 1064 1065 + spin_unlock(&inode->i_lock); 1066 + spin_lock(&inode_wb_list_lock); 1067 inode->dirtied_when = jiffies; 1068 list_move(&inode->i_wb_list, &bdi->wb.b_dirty); 1069 + spin_unlock(&inode_wb_list_lock); 1070 + 1071 + if (wakeup_bdi) 1072 + bdi_wakeup_thread_delayed(bdi); 1073 + return; 1074 } 1075 } 1076 + out_unlock_inode: 1077 + spin_unlock(&inode->i_lock); 1078 1079 } 1080 EXPORT_SYMBOL(__mark_inode_dirty); 1081 ··· 1101 */ 1102 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1103 1104 + spin_lock(&inode_sb_list_lock); 1105 1106 /* 1107 * Data integrity sync. Must wait for all pages under writeback, ··· 1111 * we still have to wait for that writeout. 1112 */ 1113 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1114 + struct address_space *mapping = inode->i_mapping; 1115 1116 + spin_lock(&inode->i_lock); 1117 + if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || 1118 + (mapping->nrpages == 0)) { 1119 + spin_unlock(&inode->i_lock); 1120 continue; 1121 + } 1122 __iget(inode); 1123 + spin_unlock(&inode->i_lock); 1124 + spin_unlock(&inode_sb_list_lock); 1125 + 1126 /* 1127 + * We hold a reference to 'inode' so it couldn't have been 1128 + * removed from s_inodes list while we dropped the 1129 + * inode_sb_list_lock. We cannot iput the inode now as we can 1130 + * be holding the last reference and we cannot iput it under 1131 + * inode_sb_list_lock. So we keep the reference and iput it 1132 + * later. 1133 */ 1134 iput(old_inode); 1135 old_inode = inode; ··· 1135 1136 cond_resched(); 1137 1138 + spin_lock(&inode_sb_list_lock); 1139 } 1140 + spin_unlock(&inode_sb_list_lock); 1141 iput(old_inode); 1142 } 1143 ··· 1271 wbc.nr_to_write = 0; 1272 1273 might_sleep(); 1274 + spin_lock(&inode_wb_list_lock); 1275 + spin_lock(&inode->i_lock); 1276 ret = writeback_single_inode(inode, &wbc); 1277 + spin_unlock(&inode->i_lock); 1278 + spin_unlock(&inode_wb_list_lock); 1279 if (sync) 1280 inode_sync_wait(inode); 1281 return ret; ··· 1295 { 1296 int ret; 1297 1298 + spin_lock(&inode_wb_list_lock); 1299 + spin_lock(&inode->i_lock); 1300 ret = writeback_single_inode(inode, wbc); 1301 + spin_unlock(&inode->i_lock); 1302 + spin_unlock(&inode_wb_list_lock); 1303 return ret; 1304 } 1305 EXPORT_SYMBOL(sync_inode);

+319 -351

fs/inode.c

··· 26 #include <linux/posix_acl.h> 27 #include <linux/ima.h> 28 #include <linux/cred.h> 29 30 /* 31 * This is needed for the following functions: ··· 92 93 static unsigned int i_hash_mask __read_mostly; 94 static unsigned int i_hash_shift __read_mostly; 95 96 /* 97 * Each inode can be on two separate lists. One is ··· 108 */ 109 110 static LIST_HEAD(inode_lru); 111 - static struct hlist_head *inode_hashtable __read_mostly; 112 113 - /* 114 - * A simple spinlock to protect the list manipulations. 115 - * 116 - * NOTE! You also have to own the lock if you change 117 - * the i_state of an inode while it is in use.. 118 - */ 119 - DEFINE_SPINLOCK(inode_lock); 120 121 /* 122 * iprune_sem provides exclusion between the icache shrinking and the ··· 165 return proc_dointvec(table, write, buffer, lenp, ppos); 166 } 167 #endif 168 - 169 - static void wake_up_inode(struct inode *inode) 170 - { 171 - /* 172 - * Prevent speculative execution through spin_unlock(&inode_lock); 173 - */ 174 - smp_mb(); 175 - wake_up_bit(&inode->i_state, __I_NEW); 176 - } 177 178 /** 179 * inode_init_always - perform inode structure intialisation ··· 356 } 357 358 /* 359 - * inode_lock must be held 360 */ 361 void __iget(struct inode *inode) 362 { ··· 374 375 static void inode_lru_list_add(struct inode *inode) 376 { 377 if (list_empty(&inode->i_lru)) { 378 list_add(&inode->i_lru, &inode_lru); 379 inodes_stat.nr_unused++; 380 } 381 } 382 383 static void inode_lru_list_del(struct inode *inode) 384 { 385 if (!list_empty(&inode->i_lru)) { 386 list_del_init(&inode->i_lru); 387 inodes_stat.nr_unused--; 388 } 389 - } 390 - 391 - static inline void __inode_sb_list_add(struct inode *inode) 392 - { 393 - list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); 394 } 395 396 /** ··· 398 */ 399 void inode_sb_list_add(struct inode *inode) 400 { 401 - spin_lock(&inode_lock); 402 - __inode_sb_list_add(inode); 403 - spin_unlock(&inode_lock); 404 } 405 EXPORT_SYMBOL_GPL(inode_sb_list_add); 406 407 - static inline void __inode_sb_list_del(struct inode *inode) 408 { 409 list_del_init(&inode->i_sb_list); 410 } 411 412 static unsigned long hash(struct super_block *sb, unsigned long hashval) ··· 433 { 434 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); 435 436 - spin_lock(&inode_lock); 437 hlist_add_head(&inode->i_hash, b); 438 - spin_unlock(&inode_lock); 439 } 440 EXPORT_SYMBOL(__insert_inode_hash); 441 - 442 - /** 443 - * __remove_inode_hash - remove an inode from the hash 444 - * @inode: inode to unhash 445 - * 446 - * Remove an inode from the superblock. 447 - */ 448 - static void __remove_inode_hash(struct inode *inode) 449 - { 450 - hlist_del_init(&inode->i_hash); 451 - } 452 453 /** 454 * remove_inode_hash - remove an inode from the hash ··· 449 */ 450 void remove_inode_hash(struct inode *inode) 451 { 452 - spin_lock(&inode_lock); 453 hlist_del_init(&inode->i_hash); 454 - spin_unlock(&inode_lock); 455 } 456 EXPORT_SYMBOL(remove_inode_hash); 457 ··· 470 } 471 EXPORT_SYMBOL(end_writeback); 472 473 static void evict(struct inode *inode) 474 { 475 const struct super_operations *op = inode->i_sb->s_op; 476 477 if (op->evict_inode) { 478 op->evict_inode(inode); ··· 504 bd_forget(inode); 505 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 506 cd_forget(inode); 507 } 508 509 /* ··· 531 list_del_init(&inode->i_lru); 532 533 evict(inode); 534 - 535 - spin_lock(&inode_lock); 536 - __remove_inode_hash(inode); 537 - __inode_sb_list_del(inode); 538 - spin_unlock(&inode_lock); 539 - 540 - wake_up_inode(inode); 541 - destroy_inode(inode); 542 } 543 } 544 ··· 548 struct inode *inode, *next; 549 LIST_HEAD(dispose); 550 551 - spin_lock(&inode_lock); 552 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 553 if (atomic_read(&inode->i_count)) 554 continue; 555 - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 556 continue; 557 558 inode->i_state |= I_FREEING; 559 - 560 - /* 561 - * Move the inode off the IO lists and LRU once I_FREEING is 562 - * set so that it won't get moved back on there if it is dirty. 563 - */ 564 - list_move(&inode->i_lru, &dispose); 565 - list_del_init(&inode->i_wb_list); 566 - if (!(inode->i_state & (I_DIRTY | I_SYNC))) 567 - inodes_stat.nr_unused--; 568 } 569 - spin_unlock(&inode_lock); 570 571 dispose_list(&dispose); 572 ··· 593 struct inode *inode, *next; 594 LIST_HEAD(dispose); 595 596 - spin_lock(&inode_lock); 597 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 598 - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 599 continue; 600 if (inode->i_state & I_DIRTY && !kill_dirty) { 601 busy = 1; 602 continue; 603 } 604 if (atomic_read(&inode->i_count)) { 605 busy = 1; 606 continue; 607 } 608 609 inode->i_state |= I_FREEING; 610 - 611 - /* 612 - * Move the inode off the IO lists and LRU once I_FREEING is 613 - * set so that it won't get moved back on there if it is dirty. 614 - */ 615 - list_move(&inode->i_lru, &dispose); 616 - list_del_init(&inode->i_wb_list); 617 - if (!(inode->i_state & (I_DIRTY | I_SYNC))) 618 - inodes_stat.nr_unused--; 619 } 620 - spin_unlock(&inode_lock); 621 622 dispose_list(&dispose); 623 ··· 638 639 /* 640 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a 641 - * temporary list and then are freed outside inode_lock by dispose_list(). 642 * 643 * Any inodes which are pinned purely because of attached pagecache have their 644 * pagecache removed. If the inode has metadata buffers attached to ··· 659 unsigned long reap = 0; 660 661 down_read(&iprune_sem); 662 - spin_lock(&inode_lock); 663 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 664 struct inode *inode; 665 ··· 669 inode = list_entry(inode_lru.prev, struct inode, i_lru); 670 671 /* 672 * Referenced or dirty inodes are still in use. Give them 673 * another pass through the LRU as we canot reclaim them now. 674 */ 675 if (atomic_read(&inode->i_count) || 676 (inode->i_state & ~I_REFERENCED)) { 677 list_del_init(&inode->i_lru); 678 inodes_stat.nr_unused--; 679 continue; 680 } 681 682 /* recently referenced inodes get one more pass */ 683 if (inode->i_state & I_REFERENCED) { 684 - list_move(&inode->i_lru, &inode_lru); 685 inode->i_state &= ~I_REFERENCED; 686 continue; 687 } 688 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 689 __iget(inode); 690 - spin_unlock(&inode_lock); 691 if (remove_inode_buffers(inode)) 692 reap += invalidate_mapping_pages(&inode->i_data, 693 0, -1); 694 iput(inode); 695 - spin_lock(&inode_lock); 696 697 if (inode != list_entry(inode_lru.next, 698 struct inode, i_lru)) 699 continue; /* wrong inode or list_empty */ 700 - if (!can_unuse(inode)) 701 continue; 702 } 703 WARN_ON(inode->i_state & I_NEW); 704 inode->i_state |= I_FREEING; 705 706 - /* 707 - * Move the inode off the IO lists and LRU once I_FREEING is 708 - * set so that it won't get moved back on there if it is dirty. 709 - */ 710 list_move(&inode->i_lru, &freeable); 711 - list_del_init(&inode->i_wb_list); 712 inodes_stat.nr_unused--; 713 } 714 if (current_is_kswapd()) 715 __count_vm_events(KSWAPD_INODESTEAL, reap); 716 else 717 __count_vm_events(PGINODESTEAL, reap); 718 - spin_unlock(&inode_lock); 719 720 dispose_list(&freeable); 721 up_read(&iprune_sem); ··· 778 779 repeat: 780 hlist_for_each_entry(inode, node, head, i_hash) { 781 - if (inode->i_sb != sb) 782 continue; 783 - if (!test(inode, data)) 784 continue; 785 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 786 __wait_on_freeing_inode(inode); 787 goto repeat; 788 } 789 __iget(inode); 790 return inode; 791 } 792 return NULL; ··· 810 811 repeat: 812 hlist_for_each_entry(inode, node, head, i_hash) { 813 - if (inode->i_ino != ino) 814 continue; 815 - if (inode->i_sb != sb) 816 continue; 817 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 818 __wait_on_freeing_inode(inode); 819 goto repeat; 820 } 821 __iget(inode); 822 return inode; 823 } 824 return NULL; ··· 884 { 885 struct inode *inode; 886 887 - spin_lock_prefetch(&inode_lock); 888 889 inode = alloc_inode(sb); 890 if (inode) { 891 - spin_lock(&inode_lock); 892 - __inode_sb_list_add(inode); 893 inode->i_state = 0; 894 - spin_unlock(&inode_lock); 895 } 896 return inode; 897 } 898 EXPORT_SYMBOL(new_inode); 899 900 void unlock_new_inode(struct inode *inode) 901 { 902 #ifdef CONFIG_DEBUG_LOCK_ALLOC ··· 923 } 924 } 925 #endif 926 - /* 927 - * This is special! We do not need the spinlock when clearing I_NEW, 928 - * because we're guaranteed that nobody else tries to do anything about 929 - * the state of the inode when it is locked, as we just created it (so 930 - * there can be no old holders that haven't tested I_NEW). 931 - * However we must emit the memory barrier so that other CPUs reliably 932 - * see the clearing of I_NEW after the other inode initialisation has 933 - * completed. 934 - */ 935 - smp_mb(); 936 WARN_ON(!(inode->i_state & I_NEW)); 937 inode->i_state &= ~I_NEW; 938 - wake_up_inode(inode); 939 } 940 EXPORT_SYMBOL(unlock_new_inode); 941 942 - /* 943 - * This is called without the inode lock held.. Be careful. 944 * 945 - * We no longer cache the sb_flags in i_flags - see fs.h 946 - * -- rmk@arm.uk.linux.org 947 */ 948 - static struct inode *get_new_inode(struct super_block *sb, 949 - struct hlist_head *head, 950 - int (*test)(struct inode *, void *), 951 - int (*set)(struct inode *, void *), 952 - void *data) 953 { 954 struct inode *inode; 955 956 inode = alloc_inode(sb); 957 if (inode) { 958 struct inode *old; 959 960 - spin_lock(&inode_lock); 961 /* We released the lock, so.. */ 962 old = find_inode(sb, head, test, data); 963 if (!old) { 964 if (set(inode, data)) 965 goto set_failed; 966 967 - hlist_add_head(&inode->i_hash, head); 968 - __inode_sb_list_add(inode); 969 inode->i_state = I_NEW; 970 - spin_unlock(&inode_lock); 971 972 /* Return the locked inode with I_NEW set, the 973 * caller is responsible for filling in the contents ··· 996 * us. Use the old inode instead of the one we just 997 * allocated. 998 */ 999 - spin_unlock(&inode_lock); 1000 destroy_inode(inode); 1001 inode = old; 1002 wait_on_inode(inode); ··· 1004 return inode; 1005 1006 set_failed: 1007 - spin_unlock(&inode_lock); 1008 destroy_inode(inode); 1009 return NULL; 1010 } 1011 1012 - /* 1013 - * get_new_inode_fast is the fast path version of get_new_inode, see the 1014 - * comment at iget_locked for details. 1015 */ 1016 - static struct inode *get_new_inode_fast(struct super_block *sb, 1017 - struct hlist_head *head, unsigned long ino) 1018 { 1019 struct inode *inode; 1020 1021 inode = alloc_inode(sb); 1022 if (inode) { 1023 struct inode *old; 1024 1025 - spin_lock(&inode_lock); 1026 /* We released the lock, so.. */ 1027 old = find_inode_fast(sb, head, ino); 1028 if (!old) { 1029 inode->i_ino = ino; 1030 - hlist_add_head(&inode->i_hash, head); 1031 - __inode_sb_list_add(inode); 1032 inode->i_state = I_NEW; 1033 - spin_unlock(&inode_lock); 1034 1035 /* Return the locked inode with I_NEW set, the 1036 * caller is responsible for filling in the contents ··· 1063 * us. Use the old inode instead of the one we just 1064 * allocated. 1065 */ 1066 - spin_unlock(&inode_lock); 1067 destroy_inode(inode); 1068 inode = old; 1069 wait_on_inode(inode); 1070 } 1071 return inode; 1072 } 1073 1074 /* 1075 * search the inode cache for a matching inode number. ··· 1085 struct hlist_node *node; 1086 struct inode *inode; 1087 1088 hlist_for_each_entry(inode, node, b, i_hash) { 1089 - if (inode->i_ino == ino && inode->i_sb == sb) 1090 return 0; 1091 } 1092 1093 return 1; 1094 } ··· 1122 static unsigned int counter; 1123 ino_t res; 1124 1125 - spin_lock(&inode_lock); 1126 spin_lock(&iunique_lock); 1127 do { 1128 if (counter <= max_reserved) ··· 1129 res = counter++; 1130 } while (!test_inode_iunique(sb, res)); 1131 spin_unlock(&iunique_lock); 1132 - spin_unlock(&inode_lock); 1133 1134 return res; 1135 } ··· 1136 1137 struct inode *igrab(struct inode *inode) 1138 { 1139 - spin_lock(&inode_lock); 1140 - if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) 1141 __iget(inode); 1142 - else 1143 /* 1144 * Handle the case where s_op->clear_inode is not been 1145 * called yet, and somebody is calling igrab 1146 * while the inode is getting freed. 1147 */ 1148 inode = NULL; 1149 - spin_unlock(&inode_lock); 1150 return inode; 1151 } 1152 EXPORT_SYMBOL(igrab); 1153 - 1154 - /** 1155 - * ifind - internal function, you want ilookup5() or iget5(). 1156 - * @sb: super block of file system to search 1157 - * @head: the head of the list to search 1158 - * @test: callback used for comparisons between inodes 1159 - * @data: opaque data pointer to pass to @test 1160 - * @wait: if true wait for the inode to be unlocked, if false do not 1161 - * 1162 - * ifind() searches for the inode specified by @data in the inode 1163 - * cache. This is a generalized version of ifind_fast() for file systems where 1164 - * the inode number is not sufficient for unique identification of an inode. 1165 - * 1166 - * If the inode is in the cache, the inode is returned with an incremented 1167 - * reference count. 1168 - * 1169 - * Otherwise NULL is returned. 1170 - * 1171 - * Note, @test is called with the inode_lock held, so can't sleep. 1172 - */ 1173 - static struct inode *ifind(struct super_block *sb, 1174 - struct hlist_head *head, int (*test)(struct inode *, void *), 1175 - void *data, const int wait) 1176 - { 1177 - struct inode *inode; 1178 - 1179 - spin_lock(&inode_lock); 1180 - inode = find_inode(sb, head, test, data); 1181 - if (inode) { 1182 - spin_unlock(&inode_lock); 1183 - if (likely(wait)) 1184 - wait_on_inode(inode); 1185 - return inode; 1186 - } 1187 - spin_unlock(&inode_lock); 1188 - return NULL; 1189 - } 1190 - 1191 - /** 1192 - * ifind_fast - internal function, you want ilookup() or iget(). 1193 - * @sb: super block of file system to search 1194 - * @head: head of the list to search 1195 - * @ino: inode number to search for 1196 - * 1197 - * ifind_fast() searches for the inode @ino in the inode cache. This is for 1198 - * file systems where the inode number is sufficient for unique identification 1199 - * of an inode. 1200 - * 1201 - * If the inode is in the cache, the inode is returned with an incremented 1202 - * reference count. 1203 - * 1204 - * Otherwise NULL is returned. 1205 - */ 1206 - static struct inode *ifind_fast(struct super_block *sb, 1207 - struct hlist_head *head, unsigned long ino) 1208 - { 1209 - struct inode *inode; 1210 - 1211 - spin_lock(&inode_lock); 1212 - inode = find_inode_fast(sb, head, ino); 1213 - if (inode) { 1214 - spin_unlock(&inode_lock); 1215 - wait_on_inode(inode); 1216 - return inode; 1217 - } 1218 - spin_unlock(&inode_lock); 1219 - return NULL; 1220 - } 1221 1222 /** 1223 * ilookup5_nowait - search for an inode in the inode cache ··· 1160 * @test: callback used for comparisons between inodes 1161 * @data: opaque data pointer to pass to @test 1162 * 1163 - * ilookup5() uses ifind() to search for the inode specified by @hashval and 1164 - * @data in the inode cache. This is a generalized version of ilookup() for 1165 - * file systems where the inode number is not sufficient for unique 1166 - * identification of an inode. 1167 - * 1168 * If the inode is in the cache, the inode is returned with an incremented 1169 - * reference count. Note, the inode lock is not waited upon so you have to be 1170 - * very careful what you do with the returned inode. You probably should be 1171 - * using ilookup5() instead. 1172 * 1173 - * Otherwise NULL is returned. 1174 * 1175 - * Note, @test is called with the inode_lock held, so can't sleep. 1176 */ 1177 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 1178 int (*test)(struct inode *, void *), void *data) 1179 { 1180 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1181 1182 - return ifind(sb, head, test, data, 0); 1183 } 1184 EXPORT_SYMBOL(ilookup5_nowait); 1185 ··· 1190 * @test: callback used for comparisons between inodes 1191 * @data: opaque data pointer to pass to @test 1192 * 1193 - * ilookup5() uses ifind() to search for the inode specified by @hashval and 1194 - * @data in the inode cache. This is a generalized version of ilookup() for 1195 - * file systems where the inode number is not sufficient for unique 1196 - * identification of an inode. 1197 - * 1198 - * If the inode is in the cache, the inode lock is waited upon and the inode is 1199 * returned with an incremented reference count. 1200 * 1201 - * Otherwise NULL is returned. 1202 * 1203 - * Note, @test is called with the inode_lock held, so can't sleep. 1204 */ 1205 struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 1206 int (*test)(struct inode *, void *), void *data) 1207 { 1208 - struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1209 1210 - return ifind(sb, head, test, data, 1); 1211 } 1212 EXPORT_SYMBOL(ilookup5); 1213 ··· 1216 * @sb: super block of file system to search 1217 * @ino: inode number to search for 1218 * 1219 - * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. 1220 - * This is for file systems where the inode number is sufficient for unique 1221 - * identification of an inode. 1222 - * 1223 - * If the inode is in the cache, the inode is returned with an incremented 1224 - * reference count. 1225 - * 1226 - * Otherwise NULL is returned. 1227 */ 1228 struct inode *ilookup(struct super_block *sb, unsigned long ino) 1229 { 1230 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1231 1232 - return ifind_fast(sb, head, ino); 1233 } 1234 EXPORT_SYMBOL(ilookup); 1235 - 1236 - /** 1237 - * iget5_locked - obtain an inode from a mounted file system 1238 - * @sb: super block of file system 1239 - * @hashval: hash value (usually inode number) to get 1240 - * @test: callback used for comparisons between inodes 1241 - * @set: callback used to initialize a new struct inode 1242 - * @data: opaque data pointer to pass to @test and @set 1243 - * 1244 - * iget5_locked() uses ifind() to search for the inode specified by @hashval 1245 - * and @data in the inode cache and if present it is returned with an increased 1246 - * reference count. This is a generalized version of iget_locked() for file 1247 - * systems where the inode number is not sufficient for unique identification 1248 - * of an inode. 1249 - * 1250 - * If the inode is not in cache, get_new_inode() is called to allocate a new 1251 - * inode and this is returned locked, hashed, and with the I_NEW flag set. The 1252 - * file system gets to fill it in before unlocking it via unlock_new_inode(). 1253 - * 1254 - * Note both @test and @set are called with the inode_lock held, so can't sleep. 1255 - */ 1256 - struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, 1257 - int (*test)(struct inode *, void *), 1258 - int (*set)(struct inode *, void *), void *data) 1259 - { 1260 - struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1261 - struct inode *inode; 1262 - 1263 - inode = ifind(sb, head, test, data, 1); 1264 - if (inode) 1265 - return inode; 1266 - /* 1267 - * get_new_inode() will do the right thing, re-trying the search 1268 - * in case it had to block at any point. 1269 - */ 1270 - return get_new_inode(sb, head, test, set, data); 1271 - } 1272 - EXPORT_SYMBOL(iget5_locked); 1273 - 1274 - /** 1275 - * iget_locked - obtain an inode from a mounted file system 1276 - * @sb: super block of file system 1277 - * @ino: inode number to get 1278 - * 1279 - * iget_locked() uses ifind_fast() to search for the inode specified by @ino in 1280 - * the inode cache and if present it is returned with an increased reference 1281 - * count. This is for file systems where the inode number is sufficient for 1282 - * unique identification of an inode. 1283 - * 1284 - * If the inode is not in cache, get_new_inode_fast() is called to allocate a 1285 - * new inode and this is returned locked, hashed, and with the I_NEW flag set. 1286 - * The file system gets to fill it in before unlocking it via 1287 - * unlock_new_inode(). 1288 - */ 1289 - struct inode *iget_locked(struct super_block *sb, unsigned long ino) 1290 - { 1291 - struct hlist_head *head = inode_hashtable + hash(sb, ino); 1292 - struct inode *inode; 1293 - 1294 - inode = ifind_fast(sb, head, ino); 1295 - if (inode) 1296 - return inode; 1297 - /* 1298 - * get_new_inode_fast() will do the right thing, re-trying the search 1299 - * in case it had to block at any point. 1300 - */ 1301 - return get_new_inode_fast(sb, head, ino); 1302 - } 1303 - EXPORT_SYMBOL(iget_locked); 1304 1305 int insert_inode_locked(struct inode *inode) 1306 { ··· 1240 ino_t ino = inode->i_ino; 1241 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1242 1243 - inode->i_state |= I_NEW; 1244 while (1) { 1245 struct hlist_node *node; 1246 struct inode *old = NULL; 1247 - spin_lock(&inode_lock); 1248 hlist_for_each_entry(old, node, head, i_hash) { 1249 if (old->i_ino != ino) 1250 continue; 1251 if (old->i_sb != sb) 1252 continue; 1253 - if (old->i_state & (I_FREEING|I_WILL_FREE)) 1254 continue; 1255 break; 1256 } 1257 if (likely(!node)) { 1258 hlist_add_head(&inode->i_hash, head); 1259 - spin_unlock(&inode_lock); 1260 return 0; 1261 } 1262 __iget(old); 1263 - spin_unlock(&inode_lock); 1264 wait_on_inode(old); 1265 if (unlikely(!inode_unhashed(old))) { 1266 iput(old); ··· 1283 struct super_block *sb = inode->i_sb; 1284 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1285 1286 - inode->i_state |= I_NEW; 1287 - 1288 while (1) { 1289 struct hlist_node *node; 1290 struct inode *old = NULL; 1291 1292 - spin_lock(&inode_lock); 1293 hlist_for_each_entry(old, node, head, i_hash) { 1294 if (old->i_sb != sb) 1295 continue; 1296 if (!test(old, data)) 1297 continue; 1298 - if (old->i_state & (I_FREEING|I_WILL_FREE)) 1299 continue; 1300 break; 1301 } 1302 if (likely(!node)) { 1303 hlist_add_head(&inode->i_hash, head); 1304 - spin_unlock(&inode_lock); 1305 return 0; 1306 } 1307 __iget(old); 1308 - spin_unlock(&inode_lock); 1309 wait_on_inode(old); 1310 if (unlikely(!inode_unhashed(old))) { 1311 iput(old); ··· 1355 const struct super_operations *op = inode->i_sb->s_op; 1356 int drop; 1357 1358 if (op && op->drop_inode) 1359 drop = op->drop_inode(inode); 1360 else 1361 drop = generic_drop_inode(inode); 1362 1363 - if (!drop) { 1364 - if (sb->s_flags & MS_ACTIVE) { 1365 - inode->i_state |= I_REFERENCED; 1366 - if (!(inode->i_state & (I_DIRTY|I_SYNC))) { 1367 - inode_lru_list_add(inode); 1368 - } 1369 - spin_unlock(&inode_lock); 1370 - return; 1371 - } 1372 - WARN_ON(inode->i_state & I_NEW); 1373 - inode->i_state |= I_WILL_FREE; 1374 - spin_unlock(&inode_lock); 1375 - write_inode_now(inode, 1); 1376 - spin_lock(&inode_lock); 1377 - WARN_ON(inode->i_state & I_NEW); 1378 - inode->i_state &= ~I_WILL_FREE; 1379 - __remove_inode_hash(inode); 1380 } 1381 1382 - WARN_ON(inode->i_state & I_NEW); 1383 inode->i_state |= I_FREEING; 1384 - 1385 - /* 1386 - * Move the inode off the IO lists and LRU once I_FREEING is 1387 - * set so that it won't get moved back on there if it is dirty. 1388 - */ 1389 inode_lru_list_del(inode); 1390 - list_del_init(&inode->i_wb_list); 1391 1392 - __inode_sb_list_del(inode); 1393 - spin_unlock(&inode_lock); 1394 evict(inode); 1395 - remove_inode_hash(inode); 1396 - wake_up_inode(inode); 1397 - BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); 1398 - destroy_inode(inode); 1399 } 1400 1401 /** ··· 1400 if (inode) { 1401 BUG_ON(inode->i_state & I_CLEAR); 1402 1403 - if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1404 iput_final(inode); 1405 } 1406 } ··· 1579 * to recheck inode state. 1580 * 1581 * It doesn't matter if I_NEW is not set initially, a call to 1582 - * wake_up_inode() after removing from the hash list will DTRT. 1583 - * 1584 - * This is called with inode_lock held. 1585 */ 1586 static void __wait_on_freeing_inode(struct inode *inode) 1587 { ··· 1588 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 1589 wq = bit_waitqueue(&inode->i_state, __I_NEW); 1590 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1591 - spin_unlock(&inode_lock); 1592 schedule(); 1593 finish_wait(wq, &wait.wait); 1594 - spin_lock(&inode_lock); 1595 } 1596 1597 static __initdata unsigned long ihash_entries;

··· 26 #include <linux/posix_acl.h> 27 #include <linux/ima.h> 28 #include <linux/cred.h> 29 + #include "internal.h" 30 + 31 + /* 32 + * inode locking rules. 33 + * 34 + * inode->i_lock protects: 35 + * inode->i_state, inode->i_hash, __iget() 36 + * inode_lru_lock protects: 37 + * inode_lru, inode->i_lru 38 + * inode_sb_list_lock protects: 39 + * sb->s_inodes, inode->i_sb_list 40 + * inode_wb_list_lock protects: 41 + * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list 42 + * inode_hash_lock protects: 43 + * inode_hashtable, inode->i_hash 44 + * 45 + * Lock ordering: 46 + * 47 + * inode_sb_list_lock 48 + * inode->i_lock 49 + * inode_lru_lock 50 + * 51 + * inode_wb_list_lock 52 + * inode->i_lock 53 + * 54 + * inode_hash_lock 55 + * inode_sb_list_lock 56 + * inode->i_lock 57 + * 58 + * iunique_lock 59 + * inode_hash_lock 60 + */ 61 62 /* 63 * This is needed for the following functions: ··· 60 61 static unsigned int i_hash_mask __read_mostly; 62 static unsigned int i_hash_shift __read_mostly; 63 + static struct hlist_head *inode_hashtable __read_mostly; 64 + static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); 65 66 /* 67 * Each inode can be on two separate lists. One is ··· 74 */ 75 76 static LIST_HEAD(inode_lru); 77 + static DEFINE_SPINLOCK(inode_lru_lock); 78 79 + __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); 80 + __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); 81 82 /* 83 * iprune_sem provides exclusion between the icache shrinking and the ··· 136 return proc_dointvec(table, write, buffer, lenp, ppos); 137 } 138 #endif 139 140 /** 141 * inode_init_always - perform inode structure intialisation ··· 336 } 337 338 /* 339 + * inode->i_lock must be held 340 */ 341 void __iget(struct inode *inode) 342 { ··· 354 355 static void inode_lru_list_add(struct inode *inode) 356 { 357 + spin_lock(&inode_lru_lock); 358 if (list_empty(&inode->i_lru)) { 359 list_add(&inode->i_lru, &inode_lru); 360 inodes_stat.nr_unused++; 361 } 362 + spin_unlock(&inode_lru_lock); 363 } 364 365 static void inode_lru_list_del(struct inode *inode) 366 { 367 + spin_lock(&inode_lru_lock); 368 if (!list_empty(&inode->i_lru)) { 369 list_del_init(&inode->i_lru); 370 inodes_stat.nr_unused--; 371 } 372 + spin_unlock(&inode_lru_lock); 373 } 374 375 /** ··· 379 */ 380 void inode_sb_list_add(struct inode *inode) 381 { 382 + spin_lock(&inode_sb_list_lock); 383 + list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); 384 + spin_unlock(&inode_sb_list_lock); 385 } 386 EXPORT_SYMBOL_GPL(inode_sb_list_add); 387 388 + static inline void inode_sb_list_del(struct inode *inode) 389 { 390 + spin_lock(&inode_sb_list_lock); 391 list_del_init(&inode->i_sb_list); 392 + spin_unlock(&inode_sb_list_lock); 393 } 394 395 static unsigned long hash(struct super_block *sb, unsigned long hashval) ··· 412 { 413 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); 414 415 + spin_lock(&inode_hash_lock); 416 + spin_lock(&inode->i_lock); 417 hlist_add_head(&inode->i_hash, b); 418 + spin_unlock(&inode->i_lock); 419 + spin_unlock(&inode_hash_lock); 420 } 421 EXPORT_SYMBOL(__insert_inode_hash); 422 423 /** 424 * remove_inode_hash - remove an inode from the hash ··· 437 */ 438 void remove_inode_hash(struct inode *inode) 439 { 440 + spin_lock(&inode_hash_lock); 441 + spin_lock(&inode->i_lock); 442 hlist_del_init(&inode->i_hash); 443 + spin_unlock(&inode->i_lock); 444 + spin_unlock(&inode_hash_lock); 445 } 446 EXPORT_SYMBOL(remove_inode_hash); 447 ··· 456 } 457 EXPORT_SYMBOL(end_writeback); 458 459 + /* 460 + * Free the inode passed in, removing it from the lists it is still connected 461 + * to. We remove any pages still attached to the inode and wait for any IO that 462 + * is still in progress before finally destroying the inode. 463 + * 464 + * An inode must already be marked I_FREEING so that we avoid the inode being 465 + * moved back onto lists if we race with other code that manipulates the lists 466 + * (e.g. writeback_single_inode). The caller is responsible for setting this. 467 + * 468 + * An inode must already be removed from the LRU list before being evicted from 469 + * the cache. This should occur atomically with setting the I_FREEING state 470 + * flag, so no inodes here should ever be on the LRU when being evicted. 471 + */ 472 static void evict(struct inode *inode) 473 { 474 const struct super_operations *op = inode->i_sb->s_op; 475 + 476 + BUG_ON(!(inode->i_state & I_FREEING)); 477 + BUG_ON(!list_empty(&inode->i_lru)); 478 + 479 + inode_wb_list_del(inode); 480 + inode_sb_list_del(inode); 481 482 if (op->evict_inode) { 483 op->evict_inode(inode); ··· 471 bd_forget(inode); 472 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 473 cd_forget(inode); 474 + 475 + remove_inode_hash(inode); 476 + 477 + spin_lock(&inode->i_lock); 478 + wake_up_bit(&inode->i_state, __I_NEW); 479 + BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); 480 + spin_unlock(&inode->i_lock); 481 + 482 + destroy_inode(inode); 483 } 484 485 /* ··· 489 list_del_init(&inode->i_lru); 490 491 evict(inode); 492 } 493 } 494 ··· 514 struct inode *inode, *next; 515 LIST_HEAD(dispose); 516 517 + spin_lock(&inode_sb_list_lock); 518 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 519 if (atomic_read(&inode->i_count)) 520 continue; 521 + 522 + spin_lock(&inode->i_lock); 523 + if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 524 + spin_unlock(&inode->i_lock); 525 continue; 526 + } 527 528 inode->i_state |= I_FREEING; 529 + inode_lru_list_del(inode); 530 + spin_unlock(&inode->i_lock); 531 + list_add(&inode->i_lru, &dispose); 532 } 533 + spin_unlock(&inode_sb_list_lock); 534 535 dispose_list(&dispose); 536 ··· 561 struct inode *inode, *next; 562 LIST_HEAD(dispose); 563 564 + spin_lock(&inode_sb_list_lock); 565 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 566 + spin_lock(&inode->i_lock); 567 + if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 568 + spin_unlock(&inode->i_lock); 569 continue; 570 + } 571 if (inode->i_state & I_DIRTY && !kill_dirty) { 572 + spin_unlock(&inode->i_lock); 573 busy = 1; 574 continue; 575 } 576 if (atomic_read(&inode->i_count)) { 577 + spin_unlock(&inode->i_lock); 578 busy = 1; 579 continue; 580 } 581 582 inode->i_state |= I_FREEING; 583 + inode_lru_list_del(inode); 584 + spin_unlock(&inode->i_lock); 585 + list_add(&inode->i_lru, &dispose); 586 } 587 + spin_unlock(&inode_sb_list_lock); 588 589 dispose_list(&dispose); 590 ··· 607 608 /* 609 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a 610 + * temporary list and then are freed outside inode_lru_lock by dispose_list(). 611 * 612 * Any inodes which are pinned purely because of attached pagecache have their 613 * pagecache removed. If the inode has metadata buffers attached to ··· 628 unsigned long reap = 0; 629 630 down_read(&iprune_sem); 631 + spin_lock(&inode_lru_lock); 632 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 633 struct inode *inode; 634 ··· 638 inode = list_entry(inode_lru.prev, struct inode, i_lru); 639 640 /* 641 + * we are inverting the inode_lru_lock/inode->i_lock here, 642 + * so use a trylock. If we fail to get the lock, just move the 643 + * inode to the back of the list so we don't spin on it. 644 + */ 645 + if (!spin_trylock(&inode->i_lock)) { 646 + list_move(&inode->i_lru, &inode_lru); 647 + continue; 648 + } 649 + 650 + /* 651 * Referenced or dirty inodes are still in use. Give them 652 * another pass through the LRU as we canot reclaim them now. 653 */ 654 if (atomic_read(&inode->i_count) || 655 (inode->i_state & ~I_REFERENCED)) { 656 list_del_init(&inode->i_lru); 657 + spin_unlock(&inode->i_lock); 658 inodes_stat.nr_unused--; 659 continue; 660 } 661 662 /* recently referenced inodes get one more pass */ 663 if (inode->i_state & I_REFERENCED) { 664 inode->i_state &= ~I_REFERENCED; 665 + list_move(&inode->i_lru, &inode_lru); 666 + spin_unlock(&inode->i_lock); 667 continue; 668 } 669 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 670 __iget(inode); 671 + spin_unlock(&inode->i_lock); 672 + spin_unlock(&inode_lru_lock); 673 if (remove_inode_buffers(inode)) 674 reap += invalidate_mapping_pages(&inode->i_data, 675 0, -1); 676 iput(inode); 677 + spin_lock(&inode_lru_lock); 678 679 if (inode != list_entry(inode_lru.next, 680 struct inode, i_lru)) 681 continue; /* wrong inode or list_empty */ 682 + /* avoid lock inversions with trylock */ 683 + if (!spin_trylock(&inode->i_lock)) 684 continue; 685 + if (!can_unuse(inode)) { 686 + spin_unlock(&inode->i_lock); 687 + continue; 688 + } 689 } 690 WARN_ON(inode->i_state & I_NEW); 691 inode->i_state |= I_FREEING; 692 + spin_unlock(&inode->i_lock); 693 694 list_move(&inode->i_lru, &freeable); 695 inodes_stat.nr_unused--; 696 } 697 if (current_is_kswapd()) 698 __count_vm_events(KSWAPD_INODESTEAL, reap); 699 else 700 __count_vm_events(PGINODESTEAL, reap); 701 + spin_unlock(&inode_lru_lock); 702 703 dispose_list(&freeable); 704 up_read(&iprune_sem); ··· 733 734 repeat: 735 hlist_for_each_entry(inode, node, head, i_hash) { 736 + spin_lock(&inode->i_lock); 737 + if (inode->i_sb != sb) { 738 + spin_unlock(&inode->i_lock); 739 continue; 740 + } 741 + if (!test(inode, data)) { 742 + spin_unlock(&inode->i_lock); 743 continue; 744 + } 745 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 746 __wait_on_freeing_inode(inode); 747 goto repeat; 748 } 749 __iget(inode); 750 + spin_unlock(&inode->i_lock); 751 return inode; 752 } 753 return NULL; ··· 759 760 repeat: 761 hlist_for_each_entry(inode, node, head, i_hash) { 762 + spin_lock(&inode->i_lock); 763 + if (inode->i_ino != ino) { 764 + spin_unlock(&inode->i_lock); 765 continue; 766 + } 767 + if (inode->i_sb != sb) { 768 + spin_unlock(&inode->i_lock); 769 continue; 770 + } 771 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 772 __wait_on_freeing_inode(inode); 773 goto repeat; 774 } 775 __iget(inode); 776 + spin_unlock(&inode->i_lock); 777 return inode; 778 } 779 return NULL; ··· 827 { 828 struct inode *inode; 829 830 + spin_lock_prefetch(&inode_sb_list_lock); 831 832 inode = alloc_inode(sb); 833 if (inode) { 834 + spin_lock(&inode->i_lock); 835 inode->i_state = 0; 836 + spin_unlock(&inode->i_lock); 837 + inode_sb_list_add(inode); 838 } 839 return inode; 840 } 841 EXPORT_SYMBOL(new_inode); 842 843 + /** 844 + * unlock_new_inode - clear the I_NEW state and wake up any waiters 845 + * @inode: new inode to unlock 846 + * 847 + * Called when the inode is fully initialised to clear the new state of the 848 + * inode and wake up anyone waiting for the inode to finish initialisation. 849 + */ 850 void unlock_new_inode(struct inode *inode) 851 { 852 #ifdef CONFIG_DEBUG_LOCK_ALLOC ··· 859 } 860 } 861 #endif 862 + spin_lock(&inode->i_lock); 863 WARN_ON(!(inode->i_state & I_NEW)); 864 inode->i_state &= ~I_NEW; 865 + wake_up_bit(&inode->i_state, __I_NEW); 866 + spin_unlock(&inode->i_lock); 867 } 868 EXPORT_SYMBOL(unlock_new_inode); 869 870 + /** 871 + * iget5_locked - obtain an inode from a mounted file system 872 + * @sb: super block of file system 873 + * @hashval: hash value (usually inode number) to get 874 + * @test: callback used for comparisons between inodes 875 + * @set: callback used to initialize a new struct inode 876 + * @data: opaque data pointer to pass to @test and @set 877 * 878 + * Search for the inode specified by @hashval and @data in the inode cache, 879 + * and if present it is return it with an increased reference count. This is 880 + * a generalized version of iget_locked() for file systems where the inode 881 + * number is not sufficient for unique identification of an inode. 882 + * 883 + * If the inode is not in cache, allocate a new inode and return it locked, 884 + * hashed, and with the I_NEW flag set. The file system gets to fill it in 885 + * before unlocking it via unlock_new_inode(). 886 + * 887 + * Note both @test and @set are called with the inode_hash_lock held, so can't 888 + * sleep. 889 */ 890 + struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, 891 + int (*test)(struct inode *, void *), 892 + int (*set)(struct inode *, void *), void *data) 893 { 894 + struct hlist_head *head = inode_hashtable + hash(sb, hashval); 895 struct inode *inode; 896 + 897 + spin_lock(&inode_hash_lock); 898 + inode = find_inode(sb, head, test, data); 899 + spin_unlock(&inode_hash_lock); 900 + 901 + if (inode) { 902 + wait_on_inode(inode); 903 + return inode; 904 + } 905 906 inode = alloc_inode(sb); 907 if (inode) { 908 struct inode *old; 909 910 + spin_lock(&inode_hash_lock); 911 /* We released the lock, so.. */ 912 old = find_inode(sb, head, test, data); 913 if (!old) { 914 if (set(inode, data)) 915 goto set_failed; 916 917 + spin_lock(&inode->i_lock); 918 inode->i_state = I_NEW; 919 + hlist_add_head(&inode->i_hash, head); 920 + spin_unlock(&inode->i_lock); 921 + inode_sb_list_add(inode); 922 + spin_unlock(&inode_hash_lock); 923 924 /* Return the locked inode with I_NEW set, the 925 * caller is responsible for filling in the contents ··· 916 * us. Use the old inode instead of the one we just 917 * allocated. 918 */ 919 + spin_unlock(&inode_hash_lock); 920 destroy_inode(inode); 921 inode = old; 922 wait_on_inode(inode); ··· 924 return inode; 925 926 set_failed: 927 + spin_unlock(&inode_hash_lock); 928 destroy_inode(inode); 929 return NULL; 930 } 931 + EXPORT_SYMBOL(iget5_locked); 932 933 + /** 934 + * iget_locked - obtain an inode from a mounted file system 935 + * @sb: super block of file system 936 + * @ino: inode number to get 937 + * 938 + * Search for the inode specified by @ino in the inode cache and if present 939 + * return it with an increased reference count. This is for file systems 940 + * where the inode number is sufficient for unique identification of an inode. 941 + * 942 + * If the inode is not in cache, allocate a new inode and return it locked, 943 + * hashed, and with the I_NEW flag set. The file system gets to fill it in 944 + * before unlocking it via unlock_new_inode(). 945 */ 946 + struct inode *iget_locked(struct super_block *sb, unsigned long ino) 947 { 948 + struct hlist_head *head = inode_hashtable + hash(sb, ino); 949 struct inode *inode; 950 + 951 + spin_lock(&inode_hash_lock); 952 + inode = find_inode_fast(sb, head, ino); 953 + spin_unlock(&inode_hash_lock); 954 + if (inode) { 955 + wait_on_inode(inode); 956 + return inode; 957 + } 958 959 inode = alloc_inode(sb); 960 if (inode) { 961 struct inode *old; 962 963 + spin_lock(&inode_hash_lock); 964 /* We released the lock, so.. */ 965 old = find_inode_fast(sb, head, ino); 966 if (!old) { 967 inode->i_ino = ino; 968 + spin_lock(&inode->i_lock); 969 inode->i_state = I_NEW; 970 + hlist_add_head(&inode->i_hash, head); 971 + spin_unlock(&inode->i_lock); 972 + inode_sb_list_add(inode); 973 + spin_unlock(&inode_hash_lock); 974 975 /* Return the locked inode with I_NEW set, the 976 * caller is responsible for filling in the contents ··· 963 * us. Use the old inode instead of the one we just 964 * allocated. 965 */ 966 + spin_unlock(&inode_hash_lock); 967 destroy_inode(inode); 968 inode = old; 969 wait_on_inode(inode); 970 } 971 return inode; 972 } 973 + EXPORT_SYMBOL(iget_locked); 974 975 /* 976 * search the inode cache for a matching inode number. ··· 984 struct hlist_node *node; 985 struct inode *inode; 986 987 + spin_lock(&inode_hash_lock); 988 hlist_for_each_entry(inode, node, b, i_hash) { 989 + if (inode->i_ino == ino && inode->i_sb == sb) { 990 + spin_unlock(&inode_hash_lock); 991 return 0; 992 + } 993 } 994 + spin_unlock(&inode_hash_lock); 995 996 return 1; 997 } ··· 1017 static unsigned int counter; 1018 ino_t res; 1019 1020 spin_lock(&iunique_lock); 1021 do { 1022 if (counter <= max_reserved) ··· 1025 res = counter++; 1026 } while (!test_inode_iunique(sb, res)); 1027 spin_unlock(&iunique_lock); 1028 1029 return res; 1030 } ··· 1033 1034 struct inode *igrab(struct inode *inode) 1035 { 1036 + spin_lock(&inode->i_lock); 1037 + if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { 1038 __iget(inode); 1039 + spin_unlock(&inode->i_lock); 1040 + } else { 1041 + spin_unlock(&inode->i_lock); 1042 /* 1043 * Handle the case where s_op->clear_inode is not been 1044 * called yet, and somebody is calling igrab 1045 * while the inode is getting freed. 1046 */ 1047 inode = NULL; 1048 + } 1049 return inode; 1050 } 1051 EXPORT_SYMBOL(igrab); 1052 1053 /** 1054 * ilookup5_nowait - search for an inode in the inode cache ··· 1123 * @test: callback used for comparisons between inodes 1124 * @data: opaque data pointer to pass to @test 1125 * 1126 + * Search for the inode specified by @hashval and @data in the inode cache. 1127 * If the inode is in the cache, the inode is returned with an incremented 1128 + * reference count. 1129 * 1130 + * Note: I_NEW is not waited upon so you have to be very careful what you do 1131 + * with the returned inode. You probably should be using ilookup5() instead. 1132 * 1133 + * Note: @test is called with the inode_hash_lock held, so can't sleep. 1134 */ 1135 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 1136 int (*test)(struct inode *, void *), void *data) 1137 { 1138 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1139 + struct inode *inode; 1140 1141 + spin_lock(&inode_hash_lock); 1142 + inode = find_inode(sb, head, test, data); 1143 + spin_unlock(&inode_hash_lock); 1144 + 1145 + return inode; 1146 } 1147 EXPORT_SYMBOL(ilookup5_nowait); 1148 ··· 1153 * @test: callback used for comparisons between inodes 1154 * @data: opaque data pointer to pass to @test 1155 * 1156 + * Search for the inode specified by @hashval and @data in the inode cache, 1157 + * and if the inode is in the cache, return the inode with an incremented 1158 + * reference count. Waits on I_NEW before returning the inode. 1159 * returned with an incremented reference count. 1160 * 1161 + * This is a generalized version of ilookup() for file systems where the 1162 + * inode number is not sufficient for unique identification of an inode. 1163 * 1164 + * Note: @test is called with the inode_hash_lock held, so can't sleep. 1165 */ 1166 struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 1167 int (*test)(struct inode *, void *), void *data) 1168 { 1169 + struct inode *inode = ilookup5_nowait(sb, hashval, test, data); 1170 1171 + if (inode) 1172 + wait_on_inode(inode); 1173 + return inode; 1174 } 1175 EXPORT_SYMBOL(ilookup5); 1176 ··· 1179 * @sb: super block of file system to search 1180 * @ino: inode number to search for 1181 * 1182 + * Search for the inode @ino in the inode cache, and if the inode is in the 1183 + * cache, the inode is returned with an incremented reference count. 1184 */ 1185 struct inode *ilookup(struct super_block *sb, unsigned long ino) 1186 { 1187 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1188 + struct inode *inode; 1189 1190 + spin_lock(&inode_hash_lock); 1191 + inode = find_inode_fast(sb, head, ino); 1192 + spin_unlock(&inode_hash_lock); 1193 + 1194 + if (inode) 1195 + wait_on_inode(inode); 1196 + return inode; 1197 } 1198 EXPORT_SYMBOL(ilookup); 1199 1200 int insert_inode_locked(struct inode *inode) 1201 { ··· 1271 ino_t ino = inode->i_ino; 1272 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1273 1274 while (1) { 1275 struct hlist_node *node; 1276 struct inode *old = NULL; 1277 + spin_lock(&inode_hash_lock); 1278 hlist_for_each_entry(old, node, head, i_hash) { 1279 if (old->i_ino != ino) 1280 continue; 1281 if (old->i_sb != sb) 1282 continue; 1283 + spin_lock(&old->i_lock); 1284 + if (old->i_state & (I_FREEING|I_WILL_FREE)) { 1285 + spin_unlock(&old->i_lock); 1286 continue; 1287 + } 1288 break; 1289 } 1290 if (likely(!node)) { 1291 + spin_lock(&inode->i_lock); 1292 + inode->i_state |= I_NEW; 1293 hlist_add_head(&inode->i_hash, head); 1294 + spin_unlock(&inode->i_lock); 1295 + spin_unlock(&inode_hash_lock); 1296 return 0; 1297 } 1298 __iget(old); 1299 + spin_unlock(&old->i_lock); 1300 + spin_unlock(&inode_hash_lock); 1301 wait_on_inode(old); 1302 if (unlikely(!inode_unhashed(old))) { 1303 iput(old); ··· 1308 struct super_block *sb = inode->i_sb; 1309 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1310 1311 while (1) { 1312 struct hlist_node *node; 1313 struct inode *old = NULL; 1314 1315 + spin_lock(&inode_hash_lock); 1316 hlist_for_each_entry(old, node, head, i_hash) { 1317 if (old->i_sb != sb) 1318 continue; 1319 if (!test(old, data)) 1320 continue; 1321 + spin_lock(&old->i_lock); 1322 + if (old->i_state & (I_FREEING|I_WILL_FREE)) { 1323 + spin_unlock(&old->i_lock); 1324 continue; 1325 + } 1326 break; 1327 } 1328 if (likely(!node)) { 1329 + spin_lock(&inode->i_lock); 1330 + inode->i_state |= I_NEW; 1331 hlist_add_head(&inode->i_hash, head); 1332 + spin_unlock(&inode->i_lock); 1333 + spin_unlock(&inode_hash_lock); 1334 return 0; 1335 } 1336 __iget(old); 1337 + spin_unlock(&old->i_lock); 1338 + spin_unlock(&inode_hash_lock); 1339 wait_on_inode(old); 1340 if (unlikely(!inode_unhashed(old))) { 1341 iput(old); ··· 1375 const struct super_operations *op = inode->i_sb->s_op; 1376 int drop; 1377 1378 + WARN_ON(inode->i_state & I_NEW); 1379 + 1380 if (op && op->drop_inode) 1381 drop = op->drop_inode(inode); 1382 else 1383 drop = generic_drop_inode(inode); 1384 1385 + if (!drop && (sb->s_flags & MS_ACTIVE)) { 1386 + inode->i_state |= I_REFERENCED; 1387 + if (!(inode->i_state & (I_DIRTY|I_SYNC))) 1388 + inode_lru_list_add(inode); 1389 + spin_unlock(&inode->i_lock); 1390 + return; 1391 } 1392 1393 + if (!drop) { 1394 + inode->i_state |= I_WILL_FREE; 1395 + spin_unlock(&inode->i_lock); 1396 + write_inode_now(inode, 1); 1397 + spin_lock(&inode->i_lock); 1398 + WARN_ON(inode->i_state & I_NEW); 1399 + inode->i_state &= ~I_WILL_FREE; 1400 + } 1401 + 1402 inode->i_state |= I_FREEING; 1403 inode_lru_list_del(inode); 1404 + spin_unlock(&inode->i_lock); 1405 1406 evict(inode); 1407 } 1408 1409 /** ··· 1432 if (inode) { 1433 BUG_ON(inode->i_state & I_CLEAR); 1434 1435 + if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) 1436 iput_final(inode); 1437 } 1438 } ··· 1611 * to recheck inode state. 1612 * 1613 * It doesn't matter if I_NEW is not set initially, a call to 1614 + * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list 1615 + * will DTRT. 1616 */ 1617 static void __wait_on_freeing_inode(struct inode *inode) 1618 { ··· 1621 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 1622 wq = bit_waitqueue(&inode->i_state, __I_NEW); 1623 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1624 + spin_unlock(&inode->i_lock); 1625 + spin_unlock(&inode_hash_lock); 1626 schedule(); 1627 finish_wait(wq, &wait.wait); 1628 + spin_lock(&inode_hash_lock); 1629 } 1630 1631 static __initdata unsigned long ihash_entries;

+7

fs/internal.h

··· 125 /* 126 * inode.c 127 */ 128 extern int get_nr_dirty_inodes(void); 129 extern void evict_inodes(struct super_block *); 130 extern int invalidate_inodes(struct super_block *, bool);

··· 125 /* 126 * inode.c 127 */ 128 + extern spinlock_t inode_sb_list_lock; 129 + 130 + /* 131 + * fs-writeback.c 132 + */ 133 + extern void inode_wb_list_del(struct inode *inode); 134 + 135 extern int get_nr_dirty_inodes(void); 136 extern void evict_inodes(struct super_block *); 137 extern int invalidate_inodes(struct super_block *, bool);

+1 -1

fs/logfs/inode.c

··· 293 return ret; 294 } 295 296 - /* called with inode_lock held */ 297 static int logfs_drop_inode(struct inode *inode) 298 { 299 struct logfs_super *super = logfs_super(inode->i_sb);

··· 293 return ret; 294 } 295 296 + /* called with inode->i_lock held */ 297 static int logfs_drop_inode(struct inode *inode) 298 { 299 struct logfs_super *super = logfs_super(inode->i_sb);

+18 -5

fs/namei.c

··· 992 return 0; 993 } 994 995 /* 996 * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we 997 * meet a managed dentry and we're not walking to "..". True is returned to ··· 1006 static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, 1007 struct inode **inode, bool reverse_transit) 1008 { 1009 - while (d_mountpoint(path->dentry)) { 1010 struct vfsmount *mounted; 1011 - if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && 1012 - !reverse_transit && 1013 - path->dentry->d_op->d_manage(path->dentry, true) < 0) 1014 return false; 1015 mounted = __lookup_mnt(path->mnt, path->dentry, 1); 1016 if (!mounted) 1017 break; 1018 path->mnt = mounted; 1019 path->dentry = mounted->mnt_root; 1020 nd->seq = read_seqcount_begin(&path->dentry->d_seq); 1021 - *inode = path->dentry->d_inode; 1022 } 1023 1024 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))

··· 992 return 0; 993 } 994 995 + static inline bool managed_dentry_might_block(struct dentry *dentry) 996 + { 997 + return (dentry->d_flags & DCACHE_MANAGE_TRANSIT && 998 + dentry->d_op->d_manage(dentry, true) < 0); 999 + } 1000 + 1001 /* 1002 * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we 1003 * meet a managed dentry and we're not walking to "..". True is returned to ··· 1000 static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, 1001 struct inode **inode, bool reverse_transit) 1002 { 1003 + for (;;) { 1004 struct vfsmount *mounted; 1005 + /* 1006 + * Don't forget we might have a non-mountpoint managed dentry 1007 + * that wants to block transit. 1008 + */ 1009 + *inode = path->dentry->d_inode; 1010 + if (!reverse_transit && 1011 + unlikely(managed_dentry_might_block(path->dentry))) 1012 return false; 1013 + 1014 + if (!d_mountpoint(path->dentry)) 1015 + break; 1016 + 1017 mounted = __lookup_mnt(path->mnt, path->dentry, 1); 1018 if (!mounted) 1019 break; 1020 path->mnt = mounted; 1021 path->dentry = mounted->mnt_root; 1022 nd->seq = read_seqcount_begin(&path->dentry->d_seq); 1023 } 1024 1025 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))

+25 -17

fs/notify/inode_mark.c

··· 22 #include <linux/module.h> 23 #include <linux/mutex.h> 24 #include <linux/spinlock.h> 25 - #include <linux/writeback.h> /* for inode_lock */ 26 27 #include <asm/atomic.h> 28 29 #include <linux/fsnotify_backend.h> 30 #include "fsnotify.h" 31 32 /* 33 * Recalculate the mask of events relevant to a given inode locked. ··· 238 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. 239 * @list: list of inodes being unmounted (sb->s_inodes) 240 * 241 - * Called with inode_lock held, protecting the unmounting super block's list 242 - * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. 243 - * We temporarily drop inode_lock, however, and CAN block. 244 */ 245 void fsnotify_unmount_inodes(struct list_head *list) 246 { 247 struct inode *inode, *next_i, *need_iput = NULL; 248 249 - spin_lock(&inode_lock); 250 list_for_each_entry_safe(inode, next_i, list, i_sb_list) { 251 struct inode *need_iput_tmp; 252 ··· 254 * I_WILL_FREE, or I_NEW which is fine because by that point 255 * the inode cannot have any associated watches. 256 */ 257 - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 258 continue; 259 260 /* 261 * If i_count is zero, the inode cannot have any watches and ··· 266 * evict all inodes with zero i_count from icache which is 267 * unnecessarily violent and may in fact be illegal to do. 268 */ 269 - if (!atomic_read(&inode->i_count)) 270 continue; 271 272 need_iput_tmp = need_iput; 273 need_iput = NULL; ··· 279 __iget(inode); 280 else 281 need_iput_tmp = NULL; 282 283 /* In case the dropping of a reference would nuke next_i. */ 284 if ((&next_i->i_sb_list != list) && 285 - atomic_read(&next_i->i_count) && 286 - !(next_i->i_state & (I_FREEING | I_WILL_FREE))) { 287 - __iget(next_i); 288 - need_iput = next_i; 289 } 290 291 /* 292 - * We can safely drop inode_lock here because we hold 293 * references on both inode and next_i. Also no new inodes 294 - * will be added since the umount has begun. Finally, 295 - * iprune_mutex keeps shrink_icache_memory() away. 296 */ 297 - spin_unlock(&inode_lock); 298 299 if (need_iput_tmp) 300 iput(need_iput_tmp); ··· 309 310 iput(inode); 311 312 - spin_lock(&inode_lock); 313 } 314 - spin_unlock(&inode_lock); 315 }

··· 22 #include <linux/module.h> 23 #include <linux/mutex.h> 24 #include <linux/spinlock.h> 25 26 #include <asm/atomic.h> 27 28 #include <linux/fsnotify_backend.h> 29 #include "fsnotify.h" 30 + 31 + #include "../internal.h" 32 33 /* 34 * Recalculate the mask of events relevant to a given inode locked. ··· 237 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. 238 * @list: list of inodes being unmounted (sb->s_inodes) 239 * 240 + * Called during unmount with no locks held, so needs to be safe against 241 + * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block. 242 */ 243 void fsnotify_unmount_inodes(struct list_head *list) 244 { 245 struct inode *inode, *next_i, *need_iput = NULL; 246 247 + spin_lock(&inode_sb_list_lock); 248 list_for_each_entry_safe(inode, next_i, list, i_sb_list) { 249 struct inode *need_iput_tmp; 250 ··· 254 * I_WILL_FREE, or I_NEW which is fine because by that point 255 * the inode cannot have any associated watches. 256 */ 257 + spin_lock(&inode->i_lock); 258 + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { 259 + spin_unlock(&inode->i_lock); 260 continue; 261 + } 262 263 /* 264 * If i_count is zero, the inode cannot have any watches and ··· 263 * evict all inodes with zero i_count from icache which is 264 * unnecessarily violent and may in fact be illegal to do. 265 */ 266 + if (!atomic_read(&inode->i_count)) { 267 + spin_unlock(&inode->i_lock); 268 continue; 269 + } 270 271 need_iput_tmp = need_iput; 272 need_iput = NULL; ··· 274 __iget(inode); 275 else 276 need_iput_tmp = NULL; 277 + spin_unlock(&inode->i_lock); 278 279 /* In case the dropping of a reference would nuke next_i. */ 280 if ((&next_i->i_sb_list != list) && 281 + atomic_read(&next_i->i_count)) { 282 + spin_lock(&next_i->i_lock); 283 + if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) { 284 + __iget(next_i); 285 + need_iput = next_i; 286 + } 287 + spin_unlock(&next_i->i_lock); 288 } 289 290 /* 291 + * We can safely drop inode_sb_list_lock here because we hold 292 * references on both inode and next_i. Also no new inodes 293 + * will be added since the umount has begun. 294 */ 295 + spin_unlock(&inode_sb_list_lock); 296 297 if (need_iput_tmp) 298 iput(need_iput_tmp); ··· 301 302 iput(inode); 303 304 + spin_lock(&inode_sb_list_lock); 305 } 306 + spin_unlock(&inode_sb_list_lock); 307 }

-1

fs/notify/mark.c

··· 91 #include <linux/slab.h> 92 #include <linux/spinlock.h> 93 #include <linux/srcu.h> 94 - #include <linux/writeback.h> /* for inode_lock */ 95 96 #include <asm/atomic.h> 97

··· 91 #include <linux/slab.h> 92 #include <linux/spinlock.h> 93 #include <linux/srcu.h> 94 95 #include <asm/atomic.h> 96

-1

fs/notify/vfsmount_mark.c

··· 23 #include <linux/mount.h> 24 #include <linux/mutex.h> 25 #include <linux/spinlock.h> 26 - #include <linux/writeback.h> /* for inode_lock */ 27 28 #include <asm/atomic.h> 29

··· 23 #include <linux/mount.h> 24 #include <linux/mutex.h> 25 #include <linux/spinlock.h> 26 27 #include <asm/atomic.h> 28

+2 -2

fs/ntfs/inode.c

··· 54 * 55 * Return 1 if the attributes match and 0 if not. 56 * 57 - * NOTE: This function runs with the inode_lock spin lock held so it is not 58 * allowed to sleep. 59 */ 60 int ntfs_test_inode(struct inode *vi, ntfs_attr *na) ··· 98 * 99 * Return 0 on success and -errno on error. 100 * 101 - * NOTE: This function runs with the inode_lock spin lock held so it is not 102 * allowed to sleep. (Hence the GFP_ATOMIC allocation.) 103 */ 104 static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)

··· 54 * 55 * Return 1 if the attributes match and 0 if not. 56 * 57 + * NOTE: This function runs with the inode->i_lock spin lock held so it is not 58 * allowed to sleep. 59 */ 60 int ntfs_test_inode(struct inode *vi, ntfs_attr *na) ··· 98 * 99 * Return 0 on success and -errno on error. 100 * 101 + * NOTE: This function runs with the inode->i_lock spin lock held so it is not 102 * allowed to sleep. (Hence the GFP_ATOMIC allocation.) 103 */ 104 static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)

+23 -18

fs/quota/dquot.c

··· 76 #include <linux/buffer_head.h> 77 #include <linux/capability.h> 78 #include <linux/quotaops.h> 79 - #include <linux/writeback.h> /* for inode_lock, oddly enough.. */ 80 81 #include <asm/uaccess.h> 82 ··· 900 int reserved = 0; 901 #endif 902 903 - spin_lock(&inode_lock); 904 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 905 - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 906 continue; 907 #ifdef CONFIG_QUOTA_DEBUG 908 if (unlikely(inode_get_rsv_space(inode) > 0)) 909 reserved = 1; 910 #endif 911 - if (!atomic_read(&inode->i_writecount)) 912 - continue; 913 - if (!dqinit_needed(inode, type)) 914 - continue; 915 - 916 __iget(inode); 917 - spin_unlock(&inode_lock); 918 919 iput(old_inode); 920 __dquot_initialize(inode, type); 921 - /* We hold a reference to 'inode' so it couldn't have been 922 - * removed from s_inodes list while we dropped the inode_lock. 923 - * We cannot iput the inode now as we can be holding the last 924 - * reference and we cannot iput it under inode_lock. So we 925 - * keep the reference and iput it later. */ 926 old_inode = inode; 927 - spin_lock(&inode_lock); 928 } 929 - spin_unlock(&inode_lock); 930 iput(old_inode); 931 932 #ifdef CONFIG_QUOTA_DEBUG ··· 1012 struct inode *inode; 1013 int reserved = 0; 1014 1015 - spin_lock(&inode_lock); 1016 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1017 /* 1018 * We have to scan also I_NEW inodes because they can already ··· 1026 remove_inode_dquot_ref(inode, type, tofree_head); 1027 } 1028 } 1029 - spin_unlock(&inode_lock); 1030 #ifdef CONFIG_QUOTA_DEBUG 1031 if (reserved) { 1032 printk(KERN_WARNING "VFS (%s): Writes happened after quota"

··· 76 #include <linux/buffer_head.h> 77 #include <linux/capability.h> 78 #include <linux/quotaops.h> 79 + #include "../internal.h" /* ugh */ 80 81 #include <asm/uaccess.h> 82 ··· 900 int reserved = 0; 901 #endif 902 903 + spin_lock(&inode_sb_list_lock); 904 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 905 + spin_lock(&inode->i_lock); 906 + if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || 907 + !atomic_read(&inode->i_writecount) || 908 + !dqinit_needed(inode, type)) { 909 + spin_unlock(&inode->i_lock); 910 continue; 911 + } 912 #ifdef CONFIG_QUOTA_DEBUG 913 if (unlikely(inode_get_rsv_space(inode) > 0)) 914 reserved = 1; 915 #endif 916 __iget(inode); 917 + spin_unlock(&inode->i_lock); 918 + spin_unlock(&inode_sb_list_lock); 919 920 iput(old_inode); 921 __dquot_initialize(inode, type); 922 + 923 + /* 924 + * We hold a reference to 'inode' so it couldn't have been 925 + * removed from s_inodes list while we dropped the 926 + * inode_sb_list_lock We cannot iput the inode now as we can be 927 + * holding the last reference and we cannot iput it under 928 + * inode_sb_list_lock. So we keep the reference and iput it 929 + * later. 930 + */ 931 old_inode = inode; 932 + spin_lock(&inode_sb_list_lock); 933 } 934 + spin_unlock(&inode_sb_list_lock); 935 iput(old_inode); 936 937 #ifdef CONFIG_QUOTA_DEBUG ··· 1007 struct inode *inode; 1008 int reserved = 0; 1009 1010 + spin_lock(&inode_sb_list_lock); 1011 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1012 /* 1013 * We have to scan also I_NEW inodes because they can already ··· 1021 remove_inode_dquot_ref(inode, type, tofree_head); 1022 } 1023 } 1024 + spin_unlock(&inode_sb_list_lock); 1025 #ifdef CONFIG_QUOTA_DEBUG 1026 if (reserved) { 1027 printk(KERN_WARNING "VFS (%s): Writes happened after quota"

+1 -1

include/linux/fs.h

··· 1636 }; 1637 1638 /* 1639 - * Inode state bits. Protected by inode_lock. 1640 * 1641 * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, 1642 * I_DIRTY_DATASYNC and I_DIRTY_PAGES.

··· 1636 }; 1637 1638 /* 1639 + * Inode state bits. Protected by inode->i_lock 1640 * 1641 * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, 1642 * I_DIRTY_DATASYNC and I_DIRTY_PAGES.

+1 -1

include/linux/quotaops.h

··· 277 /* 278 * Mark inode fully dirty. Since we are allocating blocks, inode 279 * would become fully dirty soon anyway and it reportedly 280 - * reduces inode_lock contention. 281 */ 282 mark_inode_dirty(inode); 283 }

··· 277 /* 278 * Mark inode fully dirty. Since we are allocating blocks, inode 279 * would become fully dirty soon anyway and it reportedly 280 + * reduces lock contention. 281 */ 282 mark_inode_dirty(inode); 283 }

+1 -1

include/linux/writeback.h

··· 9 10 struct backing_dev_info; 11 12 - extern spinlock_t inode_lock; 13 14 /* 15 * fs/fs-writeback.c

··· 9 10 struct backing_dev_info; 11 12 + extern spinlock_t inode_wb_list_lock; 13 14 /* 15 * fs/fs-writeback.c

+4 -4

mm/backing-dev.c

··· 67 struct inode *inode; 68 69 nr_wb = nr_dirty = nr_io = nr_more_io = 0; 70 - spin_lock(&inode_lock); 71 list_for_each_entry(inode, &wb->b_dirty, i_wb_list) 72 nr_dirty++; 73 list_for_each_entry(inode, &wb->b_io, i_wb_list) 74 nr_io++; 75 list_for_each_entry(inode, &wb->b_more_io, i_wb_list) 76 nr_more_io++; 77 - spin_unlock(&inode_lock); 78 79 global_dirty_limits(&background_thresh, &dirty_thresh); 80 bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); ··· 676 if (bdi_has_dirty_io(bdi)) { 677 struct bdi_writeback *dst = &default_backing_dev_info.wb; 678 679 - spin_lock(&inode_lock); 680 list_splice(&bdi->wb.b_dirty, &dst->b_dirty); 681 list_splice(&bdi->wb.b_io, &dst->b_io); 682 list_splice(&bdi->wb.b_more_io, &dst->b_more_io); 683 - spin_unlock(&inode_lock); 684 } 685 686 bdi_unregister(bdi);

··· 67 struct inode *inode; 68 69 nr_wb = nr_dirty = nr_io = nr_more_io = 0; 70 + spin_lock(&inode_wb_list_lock); 71 list_for_each_entry(inode, &wb->b_dirty, i_wb_list) 72 nr_dirty++; 73 list_for_each_entry(inode, &wb->b_io, i_wb_list) 74 nr_io++; 75 list_for_each_entry(inode, &wb->b_more_io, i_wb_list) 76 nr_more_io++; 77 + spin_unlock(&inode_wb_list_lock); 78 79 global_dirty_limits(&background_thresh, &dirty_thresh); 80 bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); ··· 676 if (bdi_has_dirty_io(bdi)) { 677 struct bdi_writeback *dst = &default_backing_dev_info.wb; 678 679 + spin_lock(&inode_wb_list_lock); 680 list_splice(&bdi->wb.b_dirty, &dst->b_dirty); 681 list_splice(&bdi->wb.b_io, &dst->b_io); 682 list_splice(&bdi->wb.b_more_io, &dst->b_more_io); 683 + spin_unlock(&inode_wb_list_lock); 684 } 685 686 bdi_unregister(bdi);

+6 -4

mm/filemap.c

··· 80 * ->i_mutex 81 * ->i_alloc_sem (various) 82 * 83 - * ->inode_lock 84 - * ->sb_lock (fs/fs-writeback.c) 85 * ->mapping->tree_lock (__sync_single_inode) 86 * 87 * ->i_mmap_lock ··· 98 * ->zone.lru_lock (check_pte_range->isolate_lru_page) 99 * ->private_lock (page_remove_rmap->set_page_dirty) 100 * ->tree_lock (page_remove_rmap->set_page_dirty) 101 - * ->inode_lock (page_remove_rmap->set_page_dirty) 102 - * ->inode_lock (zap_pte_range->set_page_dirty) 103 * ->private_lock (zap_pte_range->__set_page_dirty_buffers) 104 * 105 * (code doesn't rely on that order, so you could switch it around)

··· 80 * ->i_mutex 81 * ->i_alloc_sem (various) 82 * 83 + * inode_wb_list_lock 84 + * sb_lock (fs/fs-writeback.c) 85 * ->mapping->tree_lock (__sync_single_inode) 86 * 87 * ->i_mmap_lock ··· 98 * ->zone.lru_lock (check_pte_range->isolate_lru_page) 99 * ->private_lock (page_remove_rmap->set_page_dirty) 100 * ->tree_lock (page_remove_rmap->set_page_dirty) 101 + * inode_wb_list_lock (page_remove_rmap->set_page_dirty) 102 + * ->inode->i_lock (page_remove_rmap->set_page_dirty) 103 + * inode_wb_list_lock (zap_pte_range->set_page_dirty) 104 + * ->inode->i_lock (zap_pte_range->set_page_dirty) 105 * ->private_lock (zap_pte_range->__set_page_dirty_buffers) 106 * 107 * (code doesn't rely on that order, so you could switch it around)

+3 -2

mm/rmap.c

··· 31 * swap_lock (in swap_duplicate, swap_info_get) 32 * mmlist_lock (in mmput, drain_mmlist and others) 33 * mapping->private_lock (in __set_page_dirty_buffers) 34 - * inode_lock (in set_page_dirty's __mark_inode_dirty) 35 * sb_lock (within inode_lock in fs/fs-writeback.c) 36 * mapping->tree_lock (widely used, in set_page_dirty, 37 * in arch-dependent flush_dcache_mmap_lock, 38 - * within inode_lock in __sync_single_inode) 39 * 40 * (code doesn't rely on that order so it could be switched around) 41 * ->tasklist_lock

··· 31 * swap_lock (in swap_duplicate, swap_info_get) 32 * mmlist_lock (in mmput, drain_mmlist and others) 33 * mapping->private_lock (in __set_page_dirty_buffers) 34 + * inode->i_lock (in set_page_dirty's __mark_inode_dirty) 35 + * inode_wb_list_lock (in set_page_dirty's __mark_inode_dirty) 36 * sb_lock (within inode_lock in fs/fs-writeback.c) 37 * mapping->tree_lock (widely used, in set_page_dirty, 38 * in arch-dependent flush_dcache_mmap_lock, 39 + * within inode_wb_list_lock in __sync_single_inode) 40 * 41 * (code doesn't rely on that order so it could be switched around) 42 * ->tasklist_lock