Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fs: fs_struct use seqlock

Use a seqlock in the fs_struct to enable us to take an atomic copy of the
complete cwd and root paths. Use this in the RCU lookup path to avoid a
thread-shared spinlock in RCU lookup operations.

Multi-threaded apps may now perform path lookups with scalability matching
multi-process apps. Operations such as stat(2) become very scalable for
multi-threaded workload.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>

+34 -13
+10
fs/fs_struct.c
··· 14 14 struct path old_root; 15 15 16 16 spin_lock(&fs->lock); 17 + write_seqcount_begin(&fs->seq); 17 18 old_root = fs->root; 18 19 fs->root = *path; 19 20 path_get(path); 21 + write_seqcount_end(&fs->seq); 20 22 spin_unlock(&fs->lock); 21 23 if (old_root.dentry) 22 24 path_put(&old_root); ··· 33 31 struct path old_pwd; 34 32 35 33 spin_lock(&fs->lock); 34 + write_seqcount_begin(&fs->seq); 36 35 old_pwd = fs->pwd; 37 36 fs->pwd = *path; 38 37 path_get(path); 38 + write_seqcount_end(&fs->seq); 39 39 spin_unlock(&fs->lock); 40 40 41 41 if (old_pwd.dentry) ··· 56 52 fs = p->fs; 57 53 if (fs) { 58 54 spin_lock(&fs->lock); 55 + write_seqcount_begin(&fs->seq); 59 56 if (fs->root.dentry == old_root->dentry 60 57 && fs->root.mnt == old_root->mnt) { 61 58 path_get(new_root); ··· 69 64 fs->pwd = *new_root; 70 65 count++; 71 66 } 67 + write_seqcount_end(&fs->seq); 72 68 spin_unlock(&fs->lock); 73 69 } 74 70 task_unlock(p); ··· 94 88 int kill; 95 89 task_lock(tsk); 96 90 spin_lock(&fs->lock); 91 + write_seqcount_begin(&fs->seq); 97 92 tsk->fs = NULL; 98 93 kill = !--fs->users; 94 + write_seqcount_end(&fs->seq); 99 95 spin_unlock(&fs->lock); 100 96 task_unlock(tsk); 101 97 if (kill) ··· 113 105 fs->users = 1; 114 106 fs->in_exec = 0; 115 107 spin_lock_init(&fs->lock); 108 + seqcount_init(&fs->seq); 116 109 fs->umask = old->umask; 117 110 get_fs_root_and_pwd(old, &fs->root, &fs->pwd); 118 111 } ··· 153 144 struct fs_struct init_fs = { 154 145 .users = 1, 155 146 .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), 147 + .seq = SEQCNT_ZERO, 156 148 .umask = 0022, 157 149 }; 158 150
+21 -13
fs/namei.c
··· 684 684 { 685 685 if (!nd->root.mnt) { 686 686 struct fs_struct *fs = current->fs; 687 - spin_lock(&fs->lock); 688 - nd->root = fs->root; 689 - spin_unlock(&fs->lock); 687 + unsigned seq; 688 + 689 + do { 690 + seq = read_seqcount_begin(&fs->seq); 691 + nd->root = fs->root; 692 + } while (read_seqcount_retry(&fs->seq, seq)); 690 693 } 691 694 } 692 695 ··· 1372 1369 1373 1370 if (*name=='/') { 1374 1371 struct fs_struct *fs = current->fs; 1372 + unsigned seq; 1375 1373 1376 1374 br_read_lock(vfsmount_lock); 1377 1375 rcu_read_lock(); 1378 1376 1379 - spin_lock(&fs->lock); 1380 - nd->root = fs->root; 1381 - nd->path = nd->root; 1382 - nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); 1383 - spin_unlock(&fs->lock); 1377 + do { 1378 + seq = read_seqcount_begin(&fs->seq); 1379 + nd->root = fs->root; 1380 + nd->path = nd->root; 1381 + nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1382 + } while (read_seqcount_retry(&fs->seq, seq)); 1384 1383 1385 1384 } else if (dfd == AT_FDCWD) { 1386 1385 struct fs_struct *fs = current->fs; 1386 + unsigned seq; 1387 1387 1388 1388 br_read_lock(vfsmount_lock); 1389 1389 rcu_read_lock(); 1390 1390 1391 - spin_lock(&fs->lock); 1392 - nd->path = fs->pwd; 1393 - nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); 1394 - spin_unlock(&fs->lock); 1391 + do { 1392 + seq = read_seqcount_begin(&fs->seq); 1393 + nd->path = fs->pwd; 1394 + nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1395 + } while (read_seqcount_retry(&fs->seq, seq)); 1396 + 1395 1397 } else { 1396 1398 struct dentry *dentry; 1397 1399 ··· 1419 1411 if (fput_needed) 1420 1412 nd->file = file; 1421 1413 1422 - nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); 1414 + nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1423 1415 br_read_lock(vfsmount_lock); 1424 1416 rcu_read_lock(); 1425 1417 }
+3
include/linux/fs_struct.h
··· 2 2 #define _LINUX_FS_STRUCT_H 3 3 4 4 #include <linux/path.h> 5 + #include <linux/spinlock.h> 6 + #include <linux/seqlock.h> 5 7 6 8 struct fs_struct { 7 9 int users; 8 10 spinlock_t lock; 11 + seqcount_t seq; 9 12 int umask; 10 13 int in_exec; 11 14 struct path root, pwd;