Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

lib: cpu_rmap: CPU affinity reverse-mapping

When initiating I/O on a multiqueue and multi-IRQ device, we may want
to select a queue for which the response will be handled on the same
or a nearby CPU. This requires a reverse-map of IRQ affinity. Add
library functions to support a generic reverse-mapping from CPUs to
objects with affinity and the specific case where the objects are
IRQs.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Ben Hutchings and committed by
David S. Miller
c39649c3 c2df88cb

+348
+73
include/linux/cpu_rmap.h
··· 1 + /* 2 + * cpu_rmap.c: CPU affinity reverse-map support 3 + * Copyright 2011 Solarflare Communications Inc. 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms of the GNU General Public License version 2 as published 7 + * by the Free Software Foundation, incorporated herein by reference. 8 + */ 9 + 10 + #include <linux/cpumask.h> 11 + #include <linux/gfp.h> 12 + #include <linux/slab.h> 13 + 14 + /** 15 + * struct cpu_rmap - CPU affinity reverse-map 16 + * @size: Number of objects to be reverse-mapped 17 + * @used: Number of objects added 18 + * @obj: Pointer to array of object pointers 19 + * @near: For each CPU, the index and distance to the nearest object, 20 + * based on affinity masks 21 + */ 22 + struct cpu_rmap { 23 + u16 size, used; 24 + void **obj; 25 + struct { 26 + u16 index; 27 + u16 dist; 28 + } near[0]; 29 + }; 30 + #define CPU_RMAP_DIST_INF 0xffff 31 + 32 + extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags); 33 + 34 + /** 35 + * free_cpu_rmap - free CPU affinity reverse-map 36 + * @rmap: Reverse-map allocated with alloc_cpu_rmap(), or %NULL 37 + */ 38 + static inline void free_cpu_rmap(struct cpu_rmap *rmap) 39 + { 40 + kfree(rmap); 41 + } 42 + 43 + extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj); 44 + extern int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, 45 + const struct cpumask *affinity); 46 + 47 + static inline u16 cpu_rmap_lookup_index(struct cpu_rmap *rmap, unsigned int cpu) 48 + { 49 + return rmap->near[cpu].index; 50 + } 51 + 52 + static inline void *cpu_rmap_lookup_obj(struct cpu_rmap *rmap, unsigned int cpu) 53 + { 54 + return rmap->obj[rmap->near[cpu].index]; 55 + } 56 + 57 + #ifdef CONFIG_GENERIC_HARDIRQS 58 + 59 + /** 60 + * alloc_irq_cpu_rmap - allocate CPU affinity reverse-map for IRQs 61 + * @size: Number of objects to be mapped 62 + * 63 + * Must be called in process context. 64 + */ 65 + static inline struct cpu_rmap *alloc_irq_cpu_rmap(unsigned int size) 66 + { 67 + return alloc_cpu_rmap(size, GFP_KERNEL); 68 + } 69 + extern void free_irq_cpu_rmap(struct cpu_rmap *rmap); 70 + 71 + extern int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq); 72 + 73 + #endif
+4
lib/Kconfig
··· 201 201 bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS 202 202 depends on EXPERIMENTAL && BROKEN 203 203 204 + config CPU_RMAP 205 + bool 206 + depends on SMP 207 + 204 208 # 205 209 # Netlink attribute parsing support is select'ed if needed 206 210 #
+2
lib/Makefile
··· 110 110 111 111 obj-$(CONFIG_AVERAGE) += average.o 112 112 113 + obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o 114 + 113 115 hostprogs-y := gen_crc32table 114 116 clean-files := crc32table.h 115 117
+269
lib/cpu_rmap.c
··· 1 + /* 2 + * cpu_rmap.c: CPU affinity reverse-map support 3 + * Copyright 2011 Solarflare Communications Inc. 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms of the GNU General Public License version 2 as published 7 + * by the Free Software Foundation, incorporated herein by reference. 8 + */ 9 + 10 + #include <linux/cpu_rmap.h> 11 + #ifdef CONFIG_GENERIC_HARDIRQS 12 + #include <linux/interrupt.h> 13 + #endif 14 + #include <linux/module.h> 15 + 16 + /* 17 + * These functions maintain a mapping from CPUs to some ordered set of 18 + * objects with CPU affinities. This can be seen as a reverse-map of 19 + * CPU affinity. However, we do not assume that the object affinities 20 + * cover all CPUs in the system. For those CPUs not directly covered 21 + * by object affinities, we attempt to find a nearest object based on 22 + * CPU topology. 23 + */ 24 + 25 + /** 26 + * alloc_cpu_rmap - allocate CPU affinity reverse-map 27 + * @size: Number of objects to be mapped 28 + * @flags: Allocation flags e.g. %GFP_KERNEL 29 + */ 30 + struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) 31 + { 32 + struct cpu_rmap *rmap; 33 + unsigned int cpu; 34 + size_t obj_offset; 35 + 36 + /* This is a silly number of objects, and we use u16 indices. */ 37 + if (size > 0xffff) 38 + return NULL; 39 + 40 + /* Offset of object pointer array from base structure */ 41 + obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]), 42 + sizeof(void *)); 43 + 44 + rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags); 45 + if (!rmap) 46 + return NULL; 47 + 48 + rmap->obj = (void **)((char *)rmap + obj_offset); 49 + 50 + /* Initially assign CPUs to objects on a rota, since we have 51 + * no idea where the objects are. Use infinite distance, so 52 + * any object with known distance is preferable. Include the 53 + * CPUs that are not present/online, since we definitely want 54 + * any newly-hotplugged CPUs to have some object assigned. 55 + */ 56 + for_each_possible_cpu(cpu) { 57 + rmap->near[cpu].index = cpu % size; 58 + rmap->near[cpu].dist = CPU_RMAP_DIST_INF; 59 + } 60 + 61 + rmap->size = size; 62 + return rmap; 63 + } 64 + EXPORT_SYMBOL(alloc_cpu_rmap); 65 + 66 + /* Reevaluate nearest object for given CPU, comparing with the given 67 + * neighbours at the given distance. 68 + */ 69 + static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu, 70 + const struct cpumask *mask, u16 dist) 71 + { 72 + int neigh; 73 + 74 + for_each_cpu(neigh, mask) { 75 + if (rmap->near[cpu].dist > dist && 76 + rmap->near[neigh].dist <= dist) { 77 + rmap->near[cpu].index = rmap->near[neigh].index; 78 + rmap->near[cpu].dist = dist; 79 + return true; 80 + } 81 + } 82 + return false; 83 + } 84 + 85 + #ifdef DEBUG 86 + static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) 87 + { 88 + unsigned index; 89 + unsigned int cpu; 90 + 91 + pr_info("cpu_rmap %p, %s:\n", rmap, prefix); 92 + 93 + for_each_possible_cpu(cpu) { 94 + index = rmap->near[cpu].index; 95 + pr_info("cpu %d -> obj %u (distance %u)\n", 96 + cpu, index, rmap->near[cpu].dist); 97 + } 98 + } 99 + #else 100 + static inline void 101 + debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) 102 + { 103 + } 104 + #endif 105 + 106 + /** 107 + * cpu_rmap_add - add object to a rmap 108 + * @rmap: CPU rmap allocated with alloc_cpu_rmap() 109 + * @obj: Object to add to rmap 110 + * 111 + * Return index of object. 112 + */ 113 + int cpu_rmap_add(struct cpu_rmap *rmap, void *obj) 114 + { 115 + u16 index; 116 + 117 + BUG_ON(rmap->used >= rmap->size); 118 + index = rmap->used++; 119 + rmap->obj[index] = obj; 120 + return index; 121 + } 122 + EXPORT_SYMBOL(cpu_rmap_add); 123 + 124 + /** 125 + * cpu_rmap_update - update CPU rmap following a change of object affinity 126 + * @rmap: CPU rmap to update 127 + * @index: Index of object whose affinity changed 128 + * @affinity: New CPU affinity of object 129 + */ 130 + int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, 131 + const struct cpumask *affinity) 132 + { 133 + cpumask_var_t update_mask; 134 + unsigned int cpu; 135 + 136 + if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL))) 137 + return -ENOMEM; 138 + 139 + /* Invalidate distance for all CPUs for which this used to be 140 + * the nearest object. Mark those CPUs for update. 141 + */ 142 + for_each_online_cpu(cpu) { 143 + if (rmap->near[cpu].index == index) { 144 + rmap->near[cpu].dist = CPU_RMAP_DIST_INF; 145 + cpumask_set_cpu(cpu, update_mask); 146 + } 147 + } 148 + 149 + debug_print_rmap(rmap, "after invalidating old distances"); 150 + 151 + /* Set distance to 0 for all CPUs in the new affinity mask. 152 + * Mark all CPUs within their NUMA nodes for update. 153 + */ 154 + for_each_cpu(cpu, affinity) { 155 + rmap->near[cpu].index = index; 156 + rmap->near[cpu].dist = 0; 157 + cpumask_or(update_mask, update_mask, 158 + cpumask_of_node(cpu_to_node(cpu))); 159 + } 160 + 161 + debug_print_rmap(rmap, "after updating neighbours"); 162 + 163 + /* Update distances based on topology */ 164 + for_each_cpu(cpu, update_mask) { 165 + if (cpu_rmap_copy_neigh(rmap, cpu, 166 + topology_thread_cpumask(cpu), 1)) 167 + continue; 168 + if (cpu_rmap_copy_neigh(rmap, cpu, 169 + topology_core_cpumask(cpu), 2)) 170 + continue; 171 + if (cpu_rmap_copy_neigh(rmap, cpu, 172 + cpumask_of_node(cpu_to_node(cpu)), 3)) 173 + continue; 174 + /* We could continue into NUMA node distances, but for now 175 + * we give up. 176 + */ 177 + } 178 + 179 + debug_print_rmap(rmap, "after copying neighbours"); 180 + 181 + free_cpumask_var(update_mask); 182 + return 0; 183 + } 184 + EXPORT_SYMBOL(cpu_rmap_update); 185 + 186 + #ifdef CONFIG_GENERIC_HARDIRQS 187 + 188 + /* Glue between IRQ affinity notifiers and CPU rmaps */ 189 + 190 + struct irq_glue { 191 + struct irq_affinity_notify notify; 192 + struct cpu_rmap *rmap; 193 + u16 index; 194 + }; 195 + 196 + /** 197 + * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs 198 + * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL 199 + * 200 + * Must be called in process context, before freeing the IRQs, and 201 + * without holding any locks required by global workqueue items. 202 + */ 203 + void free_irq_cpu_rmap(struct cpu_rmap *rmap) 204 + { 205 + struct irq_glue *glue; 206 + u16 index; 207 + 208 + if (!rmap) 209 + return; 210 + 211 + for (index = 0; index < rmap->used; index++) { 212 + glue = rmap->obj[index]; 213 + irq_set_affinity_notifier(glue->notify.irq, NULL); 214 + } 215 + irq_run_affinity_notifiers(); 216 + 217 + kfree(rmap); 218 + } 219 + EXPORT_SYMBOL(free_irq_cpu_rmap); 220 + 221 + static void 222 + irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) 223 + { 224 + struct irq_glue *glue = 225 + container_of(notify, struct irq_glue, notify); 226 + int rc; 227 + 228 + rc = cpu_rmap_update(glue->rmap, glue->index, mask); 229 + if (rc) 230 + pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc); 231 + } 232 + 233 + static void irq_cpu_rmap_release(struct kref *ref) 234 + { 235 + struct irq_glue *glue = 236 + container_of(ref, struct irq_glue, notify.kref); 237 + kfree(glue); 238 + } 239 + 240 + /** 241 + * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map 242 + * @rmap: The reverse-map 243 + * @irq: The IRQ number 244 + * 245 + * This adds an IRQ affinity notifier that will update the reverse-map 246 + * automatically. 247 + * 248 + * Must be called in process context, after the IRQ is allocated but 249 + * before it is bound with request_irq(). 250 + */ 251 + int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) 252 + { 253 + struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL); 254 + int rc; 255 + 256 + if (!glue) 257 + return -ENOMEM; 258 + glue->notify.notify = irq_cpu_rmap_notify; 259 + glue->notify.release = irq_cpu_rmap_release; 260 + glue->rmap = rmap; 261 + glue->index = cpu_rmap_add(rmap, glue); 262 + rc = irq_set_affinity_notifier(irq, &glue->notify); 263 + if (rc) 264 + kfree(glue); 265 + return rc; 266 + } 267 + EXPORT_SYMBOL(irq_cpu_rmap_add); 268 + 269 + #endif /* CONFIG_GENERIC_HARDIRQS */