commit 68d54d3ff3e872009ff7a003d5c43816e1f7864b · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge branch 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull irq affinity fixes from Thomas Gleixner:

- Fix error path handling in the affinity spreading code

- Make affinity spreading smarter to avoid issues on systems which
claim to have hotpluggable CPUs while in fact they can't hotplug
anything.

So instead of trying to spread the vectors (and thereby the
associated device queues) to all possibe CPUs, spread them on all
present CPUs first. If there are left over vectors after that first
step they are spread among the possible, but not present CPUs which
keeps the code backwards compatible for virtual decives and NVME
which allocate a queue per possible CPU, but makes the spreading
smarter for devices which have less queues than possible or present
CPUs.

* 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
genirq/affinity: Spread irq vectors among present CPUs as far as possible
genirq/affinity: Allow irq spreading from a given starting point
genirq/affinity: Move actual irq vector spreading into a helper function
genirq/affinity: Rename *node_to_possible_cpumask as *node_to_cpumask
genirq/affinity: Don't return with empty affinity masks on error

Linus Torvalds 7 years ago 68d54d3f 9dceab89

+106 -56

1 changed file

expand all

unified split

kernel

irq

affinity.c

+106 -56

kernel/irq/affinity.c

··· 39 } 40 } 41 42 - static cpumask_var_t *alloc_node_to_possible_cpumask(void) 43 { 44 cpumask_var_t *masks; 45 int node; ··· 62 return NULL; 63 } 64 65 - static void free_node_to_possible_cpumask(cpumask_var_t *masks) 66 { 67 int node; 68 ··· 71 kfree(masks); 72 } 73 74 - static void build_node_to_possible_cpumask(cpumask_var_t *masks) 75 { 76 int cpu; 77 ··· 79 cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]); 80 } 81 82 - static int get_nodes_in_cpumask(cpumask_var_t *node_to_possible_cpumask, 83 const struct cpumask *mask, nodemask_t *nodemsk) 84 { 85 int n, nodes = 0; 86 87 /* Calculate the number of nodes in the supplied affinity mask */ 88 for_each_node(n) { 89 - if (cpumask_intersects(mask, node_to_possible_cpumask[n])) { 90 node_set(n, *nodemsk); 91 nodes++; 92 } ··· 94 return nodes; 95 } 96 97 - /** 98 - * irq_create_affinity_masks - Create affinity masks for multiqueue spreading 99 - * @nvecs: The total number of vectors 100 - * @affd: Description of the affinity requirements 101 - * 102 - * Returns the masks pointer or NULL if allocation failed. 103 - */ 104 - struct cpumask * 105 - irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) 106 { 107 - int n, nodes, cpus_per_vec, extra_vecs, curvec; 108 - int affv = nvecs - affd->pre_vectors - affd->post_vectors; 109 - int last_affv = affv + affd->pre_vectors; 110 nodemask_t nodemsk = NODE_MASK_NONE; 111 - struct cpumask *masks; 112 - cpumask_var_t nmsk, *node_to_possible_cpumask; 113 114 - /* 115 - * If there aren't any vectors left after applying the pre/post 116 - * vectors don't bother with assigning affinity. 117 - */ 118 - if (!affv) 119 - return NULL; 120 121 - if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) 122 - return NULL; 123 - 124 - masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); 125 - if (!masks) 126 - goto out; 127 - 128 - node_to_possible_cpumask = alloc_node_to_possible_cpumask(); 129 - if (!node_to_possible_cpumask) 130 - goto out; 131 - 132 - /* Fill out vectors at the beginning that don't need affinity */ 133 - for (curvec = 0; curvec < affd->pre_vectors; curvec++) 134 - cpumask_copy(masks + curvec, irq_default_affinity); 135 - 136 - /* Stabilize the cpumasks */ 137 - get_online_cpus(); 138 - build_node_to_possible_cpumask(node_to_possible_cpumask); 139 - nodes = get_nodes_in_cpumask(node_to_possible_cpumask, cpu_possible_mask, 140 - &nodemsk); 141 142 /* 143 * If the number of nodes in the mask is greater than or equal the 144 * number of vectors we just spread the vectors across the nodes. 145 */ 146 - if (affv <= nodes) { 147 for_each_node_mask(n, nodemsk) { 148 - cpumask_copy(masks + curvec, 149 - node_to_possible_cpumask[n]); 150 - if (++curvec == last_affv) 151 break; 152 } 153 - goto done; 154 } 155 156 for_each_node_mask(n, nodemsk) { 157 int ncpus, v, vecs_to_assign, vecs_per_node; 158 159 /* Spread the vectors per node */ 160 - vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes; 161 162 /* Get the cpus on this node which are in the mask */ 163 - cpumask_and(nmsk, cpu_possible_mask, node_to_possible_cpumask[n]); 164 165 /* Calculate the number of cpus per vector */ 166 ncpus = cpumask_weight(nmsk); ··· 154 irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec); 155 } 156 157 - if (curvec >= last_affv) 158 break; 159 --nodes; 160 } 161 162 - done: 163 put_online_cpus(); 164 165 /* Fill out vectors at the end that don't need affinity */ 166 for (; curvec < nvecs; curvec++) 167 cpumask_copy(masks + curvec, irq_default_affinity); 168 - free_node_to_possible_cpumask(node_to_possible_cpumask); 169 - out: 170 free_cpumask_var(nmsk); 171 return masks; 172 }

··· 39 } 40 } 41 42 + static cpumask_var_t *alloc_node_to_cpumask(void) 43 { 44 cpumask_var_t *masks; 45 int node; ··· 62 return NULL; 63 } 64 65 + static void free_node_to_cpumask(cpumask_var_t *masks) 66 { 67 int node; 68 ··· 71 kfree(masks); 72 } 73 74 + static void build_node_to_cpumask(cpumask_var_t *masks) 75 { 76 int cpu; 77 ··· 79 cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]); 80 } 81 82 + static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, 83 const struct cpumask *mask, nodemask_t *nodemsk) 84 { 85 int n, nodes = 0; 86 87 /* Calculate the number of nodes in the supplied affinity mask */ 88 for_each_node(n) { 89 + if (cpumask_intersects(mask, node_to_cpumask[n])) { 90 node_set(n, *nodemsk); 91 nodes++; 92 } ··· 94 return nodes; 95 } 96 97 + static int irq_build_affinity_masks(const struct irq_affinity *affd, 98 + int startvec, int numvecs, 99 + cpumask_var_t *node_to_cpumask, 100 + const struct cpumask *cpu_mask, 101 + struct cpumask *nmsk, 102 + struct cpumask *masks) 103 { 104 + int n, nodes, cpus_per_vec, extra_vecs, done = 0; 105 + int last_affv = affd->pre_vectors + numvecs; 106 + int curvec = startvec; 107 nodemask_t nodemsk = NODE_MASK_NONE; 108 109 + if (!cpumask_weight(cpu_mask)) 110 + return 0; 111 112 + nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk); 113 114 /* 115 * If the number of nodes in the mask is greater than or equal the 116 * number of vectors we just spread the vectors across the nodes. 117 */ 118 + if (numvecs <= nodes) { 119 for_each_node_mask(n, nodemsk) { 120 + cpumask_copy(masks + curvec, node_to_cpumask[n]); 121 + if (++done == numvecs) 122 break; 123 + if (++curvec == last_affv) 124 + curvec = affd->pre_vectors; 125 } 126 + goto out; 127 } 128 129 for_each_node_mask(n, nodemsk) { 130 int ncpus, v, vecs_to_assign, vecs_per_node; 131 132 /* Spread the vectors per node */ 133 + vecs_per_node = (numvecs - (curvec - affd->pre_vectors)) / nodes; 134 135 /* Get the cpus on this node which are in the mask */ 136 + cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); 137 138 /* Calculate the number of cpus per vector */ 139 ncpus = cpumask_weight(nmsk); ··· 181 irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec); 182 } 183 184 + done += v; 185 + if (done >= numvecs) 186 break; 187 + if (curvec >= last_affv) 188 + curvec = affd->pre_vectors; 189 --nodes; 190 } 191 192 + out: 193 + return done; 194 + } 195 + 196 + /** 197 + * irq_create_affinity_masks - Create affinity masks for multiqueue spreading 198 + * @nvecs: The total number of vectors 199 + * @affd: Description of the affinity requirements 200 + * 201 + * Returns the masks pointer or NULL if allocation failed. 202 + */ 203 + struct cpumask * 204 + irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) 205 + { 206 + int affvecs = nvecs - affd->pre_vectors - affd->post_vectors; 207 + int curvec, usedvecs; 208 + cpumask_var_t nmsk, npresmsk, *node_to_cpumask; 209 + struct cpumask *masks = NULL; 210 + 211 + /* 212 + * If there aren't any vectors left after applying the pre/post 213 + * vectors don't bother with assigning affinity. 214 + */ 215 + if (nvecs == affd->pre_vectors + affd->post_vectors) 216 + return NULL; 217 + 218 + if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) 219 + return NULL; 220 + 221 + if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) 222 + goto outcpumsk; 223 + 224 + node_to_cpumask = alloc_node_to_cpumask(); 225 + if (!node_to_cpumask) 226 + goto outnpresmsk; 227 + 228 + masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); 229 + if (!masks) 230 + goto outnodemsk; 231 + 232 + /* Fill out vectors at the beginning that don't need affinity */ 233 + for (curvec = 0; curvec < affd->pre_vectors; curvec++) 234 + cpumask_copy(masks + curvec, irq_default_affinity); 235 + 236 + /* Stabilize the cpumasks */ 237 + get_online_cpus(); 238 + build_node_to_cpumask(node_to_cpumask); 239 + 240 + /* Spread on present CPUs starting from affd->pre_vectors */ 241 + usedvecs = irq_build_affinity_masks(affd, curvec, affvecs, 242 + node_to_cpumask, cpu_present_mask, 243 + nmsk, masks); 244 + 245 + /* 246 + * Spread on non present CPUs starting from the next vector to be 247 + * handled. If the spreading of present CPUs already exhausted the 248 + * vector space, assign the non present CPUs to the already spread 249 + * out vectors. 250 + */ 251 + if (usedvecs >= affvecs) 252 + curvec = affd->pre_vectors; 253 + else 254 + curvec = affd->pre_vectors + usedvecs; 255 + cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); 256 + usedvecs += irq_build_affinity_masks(affd, curvec, affvecs, 257 + node_to_cpumask, npresmsk, 258 + nmsk, masks); 259 put_online_cpus(); 260 261 /* Fill out vectors at the end that don't need affinity */ 262 + if (usedvecs >= affvecs) 263 + curvec = affd->pre_vectors + affvecs; 264 + else 265 + curvec = affd->pre_vectors + usedvecs; 266 for (; curvec < nvecs; curvec++) 267 cpumask_copy(masks + curvec, irq_default_affinity); 268 + 269 + outnodemsk: 270 + free_node_to_cpumask(node_to_cpumask); 271 + outnpresmsk: 272 + free_cpumask_var(npresmsk); 273 + outcpumsk: 274 free_cpumask_var(nmsk); 275 return masks; 276 }