Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

slab: allow NUMA restricted allocations to use percpu sheaves

Currently allocations asking for a specific node explicitly or via
mempolicy in strict_numa node bypass percpu sheaves. Since sheaves
contain mostly local objects, we can try allocating from them if the
local node happens to be the requested node or allowed by the mempolicy.
If we find the object from percpu sheaves is not from the expected node,
we skip the sheaves - this should be rare.

Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

+46 -7
+46 -7
mm/slub.c
··· 4882 4882 } 4883 4883 4884 4884 static __fastpath_inline 4885 - void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp) 4885 + void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, int node) 4886 4886 { 4887 4887 struct slub_percpu_sheaves *pcs; 4888 + bool node_requested; 4888 4889 void *object; 4889 4890 4890 4891 #ifdef CONFIG_NUMA 4891 - if (static_branch_unlikely(&strict_numa)) { 4892 - if (current->mempolicy) 4893 - return NULL; 4892 + if (static_branch_unlikely(&strict_numa) && 4893 + node == NUMA_NO_NODE) { 4894 + 4895 + struct mempolicy *mpol = current->mempolicy; 4896 + 4897 + if (mpol) { 4898 + /* 4899 + * Special BIND rule support. If the local node 4900 + * is in permitted set then do not redirect 4901 + * to a particular node. 4902 + * Otherwise we apply the memory policy to get 4903 + * the node we need to allocate on. 4904 + */ 4905 + if (mpol->mode != MPOL_BIND || 4906 + !node_isset(numa_mem_id(), mpol->nodes)) 4907 + 4908 + node = mempolicy_slab_node(); 4909 + } 4894 4910 } 4895 4911 #endif 4912 + 4913 + node_requested = IS_ENABLED(CONFIG_NUMA) && node != NUMA_NO_NODE; 4914 + 4915 + /* 4916 + * We assume the percpu sheaves contain only local objects although it's 4917 + * not completely guaranteed, so we verify later. 4918 + */ 4919 + if (unlikely(node_requested && node != numa_mem_id())) 4920 + return NULL; 4896 4921 4897 4922 if (!local_trylock(&s->cpu_sheaves->lock)) 4898 4923 return NULL; ··· 4930 4905 return NULL; 4931 4906 } 4932 4907 4933 - object = pcs->main->objects[--pcs->main->size]; 4908 + object = pcs->main->objects[pcs->main->size - 1]; 4909 + 4910 + if (unlikely(node_requested)) { 4911 + /* 4912 + * Verify that the object was from the node we want. This could 4913 + * be false because of cpu migration during an unlocked part of 4914 + * the current allocation or previous freeing process. 4915 + */ 4916 + if (folio_nid(virt_to_folio(object)) != node) { 4917 + local_unlock(&s->cpu_sheaves->lock); 4918 + return NULL; 4919 + } 4920 + } 4921 + 4922 + pcs->main->size--; 4934 4923 4935 4924 local_unlock(&s->cpu_sheaves->lock); 4936 4925 ··· 5044 5005 if (unlikely(object)) 5045 5006 goto out; 5046 5007 5047 - if (s->cpu_sheaves && node == NUMA_NO_NODE) 5048 - object = alloc_from_pcs(s, gfpflags); 5008 + if (s->cpu_sheaves) 5009 + object = alloc_from_pcs(s, gfpflags, node); 5049 5010 5050 5011 if (!object) 5051 5012 object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);