Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NFS: Convert lookups of the open context to RCU

Reduce contention on the inode->i_lock by ensuring that we use RCU
when looking up the NFS open context.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>

+57 -39
+6 -5
fs/nfs/delegation.c
··· 136 136 int err; 137 137 138 138 again: 139 - spin_lock(&inode->i_lock); 140 - list_for_each_entry(ctx, &nfsi->open_files, list) { 139 + rcu_read_lock(); 140 + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { 141 141 state = ctx->state; 142 142 if (state == NULL) 143 143 continue; ··· 147 147 continue; 148 148 if (!nfs4_stateid_match(&state->stateid, stateid)) 149 149 continue; 150 - get_nfs_open_context(ctx); 151 - spin_unlock(&inode->i_lock); 150 + if (!get_nfs_open_context(ctx)) 151 + continue; 152 + rcu_read_unlock(); 152 153 sp = state->owner; 153 154 /* Block nfs4_proc_unlck */ 154 155 mutex_lock(&sp->so_delegreturn_mutex); ··· 165 164 return err; 166 165 goto again; 167 166 } 168 - spin_unlock(&inode->i_lock); 167 + rcu_read_unlock(); 169 168 return 0; 170 169 } 171 170
+16 -21
fs/nfs/inode.c
··· 977 977 978 978 struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) 979 979 { 980 - if (ctx != NULL) 981 - refcount_inc(&ctx->lock_context.count); 982 - return ctx; 980 + if (ctx != NULL && refcount_inc_not_zero(&ctx->lock_context.count)) 981 + return ctx; 982 + return NULL; 983 983 } 984 984 EXPORT_SYMBOL_GPL(get_nfs_open_context); 985 985 ··· 988 988 struct inode *inode = d_inode(ctx->dentry); 989 989 struct super_block *sb = ctx->dentry->d_sb; 990 990 991 - if (!list_empty(&ctx->list)) { 992 - if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) 993 - return; 994 - list_del(&ctx->list); 995 - spin_unlock(&inode->i_lock); 996 - } else if (!refcount_dec_and_test(&ctx->lock_context.count)) 991 + if (!refcount_dec_and_test(&ctx->lock_context.count)) 997 992 return; 993 + if (!list_empty(&ctx->list)) { 994 + spin_lock(&inode->i_lock); 995 + list_del_rcu(&ctx->list); 996 + spin_unlock(&inode->i_lock); 997 + } 998 998 if (inode != NULL) 999 999 NFS_PROTO(inode)->close_context(ctx, is_sync); 1000 1000 if (ctx->cred != NULL) ··· 1002 1002 dput(ctx->dentry); 1003 1003 nfs_sb_deactive(sb); 1004 1004 kfree(ctx->mdsthreshold); 1005 - kfree(ctx); 1005 + kfree_rcu(ctx, rcu_head); 1006 1006 } 1007 1007 1008 1008 void put_nfs_open_context(struct nfs_open_context *ctx) ··· 1026 1026 struct nfs_inode *nfsi = NFS_I(inode); 1027 1027 1028 1028 spin_lock(&inode->i_lock); 1029 - if (ctx->mode & FMODE_WRITE) 1030 - list_add(&ctx->list, &nfsi->open_files); 1031 - else 1032 - list_add_tail(&ctx->list, &nfsi->open_files); 1029 + list_add_tail_rcu(&ctx->list, &nfsi->open_files); 1033 1030 spin_unlock(&inode->i_lock); 1034 1031 } 1035 1032 EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context); ··· 1047 1050 struct nfs_inode *nfsi = NFS_I(inode); 1048 1051 struct nfs_open_context *pos, *ctx = NULL; 1049 1052 1050 - spin_lock(&inode->i_lock); 1051 - list_for_each_entry(pos, &nfsi->open_files, list) { 1053 + rcu_read_lock(); 1054 + list_for_each_entry_rcu(pos, &nfsi->open_files, list) { 1052 1055 if (cred != NULL && pos->cred != cred) 1053 1056 continue; 1054 1057 if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) 1055 1058 continue; 1056 1059 ctx = get_nfs_open_context(pos); 1057 - break; 1060 + if (ctx) 1061 + break; 1058 1062 } 1059 - spin_unlock(&inode->i_lock); 1063 + rcu_read_unlock(); 1060 1064 return ctx; 1061 1065 } 1062 1066 ··· 1075 1077 if (ctx->error < 0) 1076 1078 invalidate_inode_pages2(inode->i_mapping); 1077 1079 filp->private_data = NULL; 1078 - spin_lock(&inode->i_lock); 1079 - list_move_tail(&ctx->list, &NFS_I(inode)->open_files); 1080 - spin_unlock(&inode->i_lock); 1081 1080 put_nfs_open_context_sync(ctx); 1082 1081 } 1083 1082 }
+24 -6
fs/nfs/nfs4proc.c
··· 1933 1933 return ret; 1934 1934 } 1935 1935 1936 - static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) 1936 + static struct nfs_open_context * 1937 + nfs4_state_find_open_context_mode(struct nfs4_state *state, fmode_t mode) 1937 1938 { 1938 1939 struct nfs_inode *nfsi = NFS_I(state->inode); 1939 1940 struct nfs_open_context *ctx; 1940 1941 1941 - spin_lock(&state->inode->i_lock); 1942 - list_for_each_entry(ctx, &nfsi->open_files, list) { 1942 + rcu_read_lock(); 1943 + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { 1943 1944 if (ctx->state != state) 1944 1945 continue; 1945 - get_nfs_open_context(ctx); 1946 - spin_unlock(&state->inode->i_lock); 1946 + if ((ctx->mode & mode) != mode) 1947 + continue; 1948 + if (!get_nfs_open_context(ctx)) 1949 + continue; 1950 + rcu_read_unlock(); 1947 1951 return ctx; 1948 1952 } 1949 - spin_unlock(&state->inode->i_lock); 1953 + rcu_read_unlock(); 1950 1954 return ERR_PTR(-ENOENT); 1955 + } 1956 + 1957 + static struct nfs_open_context * 1958 + nfs4_state_find_open_context(struct nfs4_state *state) 1959 + { 1960 + struct nfs_open_context *ctx; 1961 + 1962 + ctx = nfs4_state_find_open_context_mode(state, FMODE_READ|FMODE_WRITE); 1963 + if (!IS_ERR(ctx)) 1964 + return ctx; 1965 + ctx = nfs4_state_find_open_context_mode(state, FMODE_WRITE); 1966 + if (!IS_ERR(ctx)) 1967 + return ctx; 1968 + return nfs4_state_find_open_context_mode(state, FMODE_READ); 1951 1969 } 1952 1970 1953 1971 static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx,
+6 -6
fs/nfs/nfs4state.c
··· 1437 1437 struct nfs4_state *state; 1438 1438 bool found = false; 1439 1439 1440 - spin_lock(&inode->i_lock); 1441 - list_for_each_entry(ctx, &nfsi->open_files, list) { 1440 + rcu_read_lock(); 1441 + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { 1442 1442 state = ctx->state; 1443 1443 if (state == NULL) 1444 1444 continue; ··· 1456 1456 nfs4_state_mark_reclaim_nograce(clp, state)) 1457 1457 found = true; 1458 1458 } 1459 - spin_unlock(&inode->i_lock); 1459 + rcu_read_unlock(); 1460 1460 1461 1461 nfs_inode_find_delegation_state_and_recover(inode, stateid); 1462 1462 if (found) ··· 1469 1469 struct nfs_inode *nfsi = NFS_I(inode); 1470 1470 struct nfs_open_context *ctx; 1471 1471 1472 - spin_lock(&inode->i_lock); 1473 - list_for_each_entry(ctx, &nfsi->open_files, list) { 1472 + rcu_read_lock(); 1473 + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { 1474 1474 if (ctx->state != state) 1475 1475 continue; 1476 1476 set_bit(NFS_CONTEXT_BAD, &ctx->flags); 1477 1477 } 1478 - spin_unlock(&inode->i_lock); 1478 + rcu_read_unlock(); 1479 1479 } 1480 1480 1481 1481 static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
+4 -1
fs/nfs/pnfs.c
··· 1339 1339 if (!nfs_have_layout(ino)) 1340 1340 return false; 1341 1341 retry: 1342 + rcu_read_lock(); 1342 1343 spin_lock(&ino->i_lock); 1343 1344 lo = nfsi->layout; 1344 1345 if (!lo || !pnfs_layout_is_valid(lo) || ··· 1350 1349 pnfs_get_layout_hdr(lo); 1351 1350 if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { 1352 1351 spin_unlock(&ino->i_lock); 1352 + rcu_read_unlock(); 1353 1353 wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, 1354 1354 TASK_UNINTERRUPTIBLE); 1355 1355 pnfs_put_layout_hdr(lo); ··· 1364 1362 skip_read = true; 1365 1363 } 1366 1364 1367 - list_for_each_entry(ctx, &nfsi->open_files, list) { 1365 + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { 1368 1366 state = ctx->state; 1369 1367 if (state == NULL) 1370 1368 continue; ··· 1412 1410 1413 1411 out_noroc: 1414 1412 spin_unlock(&ino->i_lock); 1413 + rcu_read_unlock(); 1415 1414 pnfs_layoutcommit_inode(ino, true); 1416 1415 if (roc) { 1417 1416 struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
+1
include/linux/nfs_fs.h
··· 83 83 84 84 struct list_head list; 85 85 struct nfs4_threshold *mdsthreshold; 86 + struct rcu_head rcu_head; 86 87 }; 87 88 88 89 struct nfs_open_dir_context {