Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

SUNRPC/auth: async tasks mustn't block waiting for memory

When memory is short, new worker threads cannot be created and we depend
on the minimum one rpciod thread to be able to handle everything. So it
must not block waiting for memory.

mempools are particularly a problem as memory can only be released back
to the mempool by an async rpc task running. If all available workqueue
threads are waiting on the mempool, no thread is available to return
anything.

lookup_cred() can block on a mempool or kmalloc - and this can cause
deadlocks. So add a new RPCAUTH_LOOKUP flag for async lookups and don't
block on memory. If the -ENOMEM gets back to call_refreshresult(), wait
a short while and try again. HZ>>4 is chosen as it is used elsewhere
for -ENOMEM retries.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>

authored by

NeilBrown and committed by
Trond Myklebust
a41b05ed c487216b

+21 -3
+1
include/linux/sunrpc/auth.h
··· 99 99 100 100 /* Flags for rpcauth_lookupcred() */ 101 101 #define RPCAUTH_LOOKUP_NEW 0x01 /* Accept an uninitialised cred */ 102 + #define RPCAUTH_LOOKUP_ASYNC 0x02 /* Don't block waiting for memory */ 102 103 103 104 /* 104 105 * Client authentication ops
+5 -1
net/sunrpc/auth.c
··· 615 615 }; 616 616 struct rpc_cred *ret; 617 617 618 + if (RPC_IS_ASYNC(task)) 619 + lookupflags |= RPCAUTH_LOOKUP_ASYNC; 618 620 ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); 619 621 put_cred(acred.cred); 620 622 return ret; ··· 633 631 634 632 if (!acred.principal) 635 633 return NULL; 634 + if (RPC_IS_ASYNC(task)) 635 + lookupflags |= RPCAUTH_LOOKUP_ASYNC; 636 636 return auth->au_ops->lookup_cred(auth, &acred, lookupflags); 637 637 } 638 638 ··· 658 654 }; 659 655 660 656 if (flags & RPC_TASK_ASYNC) 661 - lookupflags |= RPCAUTH_LOOKUP_NEW; 657 + lookupflags |= RPCAUTH_LOOKUP_NEW | RPCAUTH_LOOKUP_ASYNC; 662 658 if (task->tk_op_cred) 663 659 /* Task must use exactly this rpc_cred */ 664 660 new = get_rpccred(task->tk_op_cred);
+5 -1
net/sunrpc/auth_gss/auth_gss.c
··· 1341 1341 static struct rpc_cred * 1342 1342 gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) 1343 1343 { 1344 - return rpcauth_lookup_credcache(auth, acred, flags, GFP_KERNEL); 1344 + gfp_t gfp = GFP_KERNEL; 1345 + 1346 + if (flags & RPCAUTH_LOOKUP_ASYNC) 1347 + gfp = GFP_NOWAIT | __GFP_NOWARN; 1348 + return rpcauth_lookup_credcache(auth, acred, flags, gfp); 1345 1349 } 1346 1350 1347 1351 static struct rpc_cred *
+7 -1
net/sunrpc/auth_unix.c
··· 43 43 static struct rpc_cred * 44 44 unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) 45 45 { 46 - struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_KERNEL); 46 + gfp_t gfp = GFP_KERNEL; 47 + struct rpc_cred *ret; 47 48 49 + if (flags & RPCAUTH_LOOKUP_ASYNC) 50 + gfp = GFP_NOWAIT | __GFP_NOWARN; 51 + ret = mempool_alloc(unix_pool, gfp); 52 + if (!ret) 53 + return ERR_PTR(-ENOMEM); 48 54 rpcauth_init_cred(ret, acred, auth, &unix_credops); 49 55 ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; 50 56 return ret;
+3
net/sunrpc/clnt.c
··· 1745 1745 task->tk_cred_retry--; 1746 1746 trace_rpc_retry_refresh_status(task); 1747 1747 return; 1748 + case -ENOMEM: 1749 + rpc_delay(task, HZ >> 4); 1750 + return; 1748 1751 } 1749 1752 trace_rpc_refresh_status(task); 1750 1753 rpc_call_rpcerror(task, status);