Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm/mempolicy: use unified 'nodes' for bind/interleave/prefer policies

Current structure 'mempolicy' uses a union to store the node info for
bind/interleave/perfer policies.

union {
short preferred_node; /* preferred */
nodemask_t nodes; /* interleave/bind */
/* undefined for default */
} v;

Since preferred node can also be represented by a nodemask_t with only ont
bit set, unify these policies with using one nodemask_t 'nodes', which can
remove a union, simplify the code and make it easier to support future's
new policy's node info.

Link: https://lore.kernel.org/r/20200630212517.308045-7-ben.widawsky@intel.com
Link: https://lkml.kernel.org/r/1623399825-75651-1-git-send-email-feng.tang@intel.com
Co-developed-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Ben Widawsky and committed by
Linus Torvalds
269fbe72 e5947d23

+46 -57
+2 -5
include/linux/mempolicy.h
··· 46 46 atomic_t refcnt; 47 47 unsigned short mode; /* See MPOL_* above */ 48 48 unsigned short flags; /* See set_mempolicy() MPOL_F_* above */ 49 - union { 50 - short preferred_node; /* preferred */ 51 - nodemask_t nodes; /* interleave/bind */ 52 - /* undefined for default */ 53 - } v; 49 + nodemask_t nodes; /* interleave/bind/perfer */ 50 + 54 51 union { 55 52 nodemask_t cpuset_mems_allowed; /* relative to these nodes */ 56 53 nodemask_t user_nodemask; /* nodemask passed by user */
+44 -52
mm/mempolicy.c
··· 193 193 { 194 194 if (nodes_empty(*nodes)) 195 195 return -EINVAL; 196 - pol->v.nodes = *nodes; 196 + pol->nodes = *nodes; 197 197 return 0; 198 198 } 199 199 ··· 201 201 { 202 202 if (nodes_empty(*nodes)) 203 203 return -EINVAL; 204 - pol->v.preferred_node = first_node(*nodes); 204 + 205 + nodes_clear(pol->nodes); 206 + node_set(first_node(*nodes), pol->nodes); 205 207 return 0; 206 208 } 207 209 ··· 211 209 { 212 210 if (nodes_empty(*nodes)) 213 211 return -EINVAL; 214 - pol->v.nodes = *nodes; 212 + pol->nodes = *nodes; 215 213 return 0; 216 214 } 217 215 ··· 326 324 else if (pol->flags & MPOL_F_RELATIVE_NODES) 327 325 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); 328 326 else { 329 - nodes_remap(tmp, pol->v.nodes, pol->w.cpuset_mems_allowed, 327 + nodes_remap(tmp, pol->nodes, pol->w.cpuset_mems_allowed, 330 328 *nodes); 331 329 pol->w.cpuset_mems_allowed = *nodes; 332 330 } ··· 334 332 if (nodes_empty(tmp)) 335 333 tmp = *nodes; 336 334 337 - pol->v.nodes = tmp; 335 + pol->nodes = tmp; 338 336 } 339 337 340 338 static void mpol_rebind_preferred(struct mempolicy *pol, ··· 899 897 switch (p->mode) { 900 898 case MPOL_BIND: 901 899 case MPOL_INTERLEAVE: 902 - *nodes = p->v.nodes; 900 + case MPOL_PREFERRED: 901 + *nodes = p->nodes; 903 902 break; 904 903 case MPOL_LOCAL: 905 904 /* return empty node mask for local allocation */ 906 - break; 907 - 908 - case MPOL_PREFERRED: 909 - node_set(p->v.preferred_node, *nodes); 910 905 break; 911 906 default: 912 907 BUG(); ··· 988 989 *policy = err; 989 990 } else if (pol == current->mempolicy && 990 991 pol->mode == MPOL_INTERLEAVE) { 991 - *policy = next_node_in(current->il_prev, pol->v.nodes); 992 + *policy = next_node_in(current->il_prev, pol->nodes); 992 993 } else { 993 994 err = -EINVAL; 994 995 goto out; ··· 1856 1857 BUG_ON(dynamic_policy_zone == ZONE_MOVABLE); 1857 1858 1858 1859 /* 1859 - * if policy->v.nodes has movable memory only, 1860 + * if policy->nodes has movable memory only, 1860 1861 * we apply policy when gfp_zone(gfp) = ZONE_MOVABLE only. 1861 1862 * 1862 - * policy->v.nodes is intersect with node_states[N_MEMORY]. 1863 + * policy->nodes is intersect with node_states[N_MEMORY]. 1863 1864 * so if the following test fails, it implies 1864 - * policy->v.nodes has movable memory only. 1865 + * policy->nodes has movable memory only. 1865 1866 */ 1866 - if (!nodes_intersects(policy->v.nodes, node_states[N_HIGH_MEMORY])) 1867 + if (!nodes_intersects(policy->nodes, node_states[N_HIGH_MEMORY])) 1867 1868 dynamic_policy_zone = ZONE_MOVABLE; 1868 1869 1869 1870 return zone >= dynamic_policy_zone; ··· 1878 1879 /* Lower zones don't get a nodemask applied for MPOL_BIND */ 1879 1880 if (unlikely(policy->mode == MPOL_BIND) && 1880 1881 apply_policy_zone(policy, gfp_zone(gfp)) && 1881 - cpuset_nodemask_valid_mems_allowed(&policy->v.nodes)) 1882 - return &policy->v.nodes; 1882 + cpuset_nodemask_valid_mems_allowed(&policy->nodes)) 1883 + return &policy->nodes; 1883 1884 1884 1885 return NULL; 1885 1886 } ··· 1888 1889 static int policy_node(gfp_t gfp, struct mempolicy *policy, int nd) 1889 1890 { 1890 1891 if (policy->mode == MPOL_PREFERRED) { 1891 - nd = policy->v.preferred_node; 1892 + nd = first_node(policy->nodes); 1892 1893 } else { 1893 1894 /* 1894 1895 * __GFP_THISNODE shouldn't even be used with the bind policy ··· 1907 1908 unsigned next; 1908 1909 struct task_struct *me = current; 1909 1910 1910 - next = next_node_in(me->il_prev, policy->v.nodes); 1911 + next = next_node_in(me->il_prev, policy->nodes); 1911 1912 if (next < MAX_NUMNODES) 1912 1913 me->il_prev = next; 1913 1914 return next; ··· 1931 1932 1932 1933 switch (policy->mode) { 1933 1934 case MPOL_PREFERRED: 1934 - return policy->v.preferred_node; 1935 + return first_node(policy->nodes); 1935 1936 1936 1937 case MPOL_INTERLEAVE: 1937 1938 return interleave_nodes(policy); ··· 1947 1948 enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL); 1948 1949 zonelist = &NODE_DATA(node)->node_zonelists[ZONELIST_FALLBACK]; 1949 1950 z = first_zones_zonelist(zonelist, highest_zoneidx, 1950 - &policy->v.nodes); 1951 + &policy->nodes); 1951 1952 return z->zone ? zone_to_nid(z->zone) : node; 1952 1953 } 1953 1954 case MPOL_LOCAL: ··· 1960 1961 1961 1962 /* 1962 1963 * Do static interleaving for a VMA with known offset @n. Returns the n'th 1963 - * node in pol->v.nodes (starting from n=0), wrapping around if n exceeds the 1964 + * node in pol->nodes (starting from n=0), wrapping around if n exceeds the 1964 1965 * number of present nodes. 1965 1966 */ 1966 1967 static unsigned offset_il_node(struct mempolicy *pol, unsigned long n) 1967 1968 { 1968 - unsigned nnodes = nodes_weight(pol->v.nodes); 1969 + unsigned nnodes = nodes_weight(pol->nodes); 1969 1970 unsigned target; 1970 1971 int i; 1971 1972 int nid; ··· 1973 1974 if (!nnodes) 1974 1975 return numa_node_id(); 1975 1976 target = (unsigned int)n % nnodes; 1976 - nid = first_node(pol->v.nodes); 1977 + nid = first_node(pol->nodes); 1977 1978 for (i = 0; i < target; i++) 1978 - nid = next_node(nid, pol->v.nodes); 1979 + nid = next_node(nid, pol->nodes); 1979 1980 return nid; 1980 1981 } 1981 1982 ··· 2031 2032 } else { 2032 2033 nid = policy_node(gfp_flags, *mpol, numa_node_id()); 2033 2034 if ((*mpol)->mode == MPOL_BIND) 2034 - *nodemask = &(*mpol)->v.nodes; 2035 + *nodemask = &(*mpol)->nodes; 2035 2036 } 2036 2037 return nid; 2037 2038 } ··· 2055 2056 bool init_nodemask_of_mempolicy(nodemask_t *mask) 2056 2057 { 2057 2058 struct mempolicy *mempolicy; 2058 - int nid; 2059 2059 2060 2060 if (!(mask && current->mempolicy)) 2061 2061 return false; ··· 2063 2065 mempolicy = current->mempolicy; 2064 2066 switch (mempolicy->mode) { 2065 2067 case MPOL_PREFERRED: 2066 - nid = mempolicy->v.preferred_node; 2067 - init_nodemask_of_node(mask, nid); 2068 - break; 2069 - 2070 2068 case MPOL_BIND: 2071 2069 case MPOL_INTERLEAVE: 2072 - *mask = mempolicy->v.nodes; 2070 + *mask = mempolicy->nodes; 2073 2071 break; 2074 2072 2075 2073 case MPOL_LOCAL: 2076 - nid = numa_node_id(); 2077 - init_nodemask_of_node(mask, nid); 2074 + init_nodemask_of_node(mask, numa_node_id()); 2078 2075 break; 2079 2076 2080 2077 default: ··· 2103 2110 task_lock(tsk); 2104 2111 mempolicy = tsk->mempolicy; 2105 2112 if (mempolicy && mempolicy->mode == MPOL_BIND) 2106 - ret = nodes_intersects(mempolicy->v.nodes, *mask); 2113 + ret = nodes_intersects(mempolicy->nodes, *mask); 2107 2114 task_unlock(tsk); 2108 2115 2109 2116 return ret; ··· 2177 2184 * node in its nodemask, we allocate the standard way. 2178 2185 */ 2179 2186 if (pol->mode == MPOL_PREFERRED) 2180 - hpage_node = pol->v.preferred_node; 2187 + hpage_node = first_node(pol->nodes); 2181 2188 2182 2189 nmask = policy_nodemask(gfp, pol); 2183 2190 if (!nmask || node_isset(hpage_node, *nmask)) { ··· 2310 2317 switch (a->mode) { 2311 2318 case MPOL_BIND: 2312 2319 case MPOL_INTERLEAVE: 2313 - return !!nodes_equal(a->v.nodes, b->v.nodes); 2314 2320 case MPOL_PREFERRED: 2315 - return a->v.preferred_node == b->v.preferred_node; 2321 + return !!nodes_equal(a->nodes, b->nodes); 2316 2322 case MPOL_LOCAL: 2317 2323 return true; 2318 2324 default: ··· 2451 2459 break; 2452 2460 2453 2461 case MPOL_PREFERRED: 2454 - polnid = pol->v.preferred_node; 2462 + polnid = first_node(pol->nodes); 2455 2463 break; 2456 2464 2457 2465 case MPOL_LOCAL: ··· 2461 2469 case MPOL_BIND: 2462 2470 /* Optimize placement among multiple nodes via NUMA balancing */ 2463 2471 if (pol->flags & MPOL_F_MORON) { 2464 - if (node_isset(thisnid, pol->v.nodes)) 2472 + if (node_isset(thisnid, pol->nodes)) 2465 2473 break; 2466 2474 goto out; 2467 2475 } ··· 2472 2480 * else select nearest allowed node, if any. 2473 2481 * If no allowed nodes, use current [!misplaced]. 2474 2482 */ 2475 - if (node_isset(curnid, pol->v.nodes)) 2483 + if (node_isset(curnid, pol->nodes)) 2476 2484 goto out; 2477 2485 z = first_zones_zonelist( 2478 2486 node_zonelist(numa_node_id(), GFP_HIGHUSER), 2479 2487 gfp_zone(GFP_HIGHUSER), 2480 - &pol->v.nodes); 2488 + &pol->nodes); 2481 2489 polnid = zone_to_nid(z->zone); 2482 2490 break; 2483 2491 ··· 2680 2688 vma->vm_pgoff, 2681 2689 sz, npol ? npol->mode : -1, 2682 2690 npol ? npol->flags : -1, 2683 - npol ? nodes_addr(npol->v.nodes)[0] : NUMA_NO_NODE); 2691 + npol ? nodes_addr(npol->nodes)[0] : NUMA_NO_NODE); 2684 2692 2685 2693 if (npol) { 2686 2694 new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, npol); ··· 2778 2786 .refcnt = ATOMIC_INIT(1), 2779 2787 .mode = MPOL_PREFERRED, 2780 2788 .flags = MPOL_F_MOF | MPOL_F_MORON, 2781 - .v = { .preferred_node = nid, }, 2789 + .nodes = nodemask_of_node(nid), 2782 2790 }; 2783 2791 } 2784 2792 ··· 2937 2945 * Save nodes for mpol_to_str() to show the tmpfs mount options 2938 2946 * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo. 2939 2947 */ 2940 - if (mode != MPOL_PREFERRED) 2941 - new->v.nodes = nodes; 2942 - else if (nodelist) 2943 - new->v.preferred_node = first_node(nodes); 2944 - else 2948 + if (mode != MPOL_PREFERRED) { 2949 + new->nodes = nodes; 2950 + } else if (nodelist) { 2951 + nodes_clear(new->nodes); 2952 + node_set(first_node(nodes), new->nodes); 2953 + } else { 2945 2954 new->mode = MPOL_LOCAL; 2955 + } 2946 2956 2947 2957 /* 2948 2958 * Save nodes for contextualization: this will be used to "clone" ··· 2993 2999 case MPOL_LOCAL: 2994 3000 break; 2995 3001 case MPOL_PREFERRED: 2996 - node_set(pol->v.preferred_node, nodes); 2997 - break; 2998 3002 case MPOL_BIND: 2999 3003 case MPOL_INTERLEAVE: 3000 - nodes = pol->v.nodes; 3004 + nodes = pol->nodes; 3001 3005 break; 3002 3006 default: 3003 3007 WARN_ON_ONCE(1);