Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

flex_array: avoid divisions when accessing elements

On most architectures division is an expensive operation and accessing an
element currently requires four of them. This performance penalty
effectively precludes flex arrays from being used on any kind of fast
path. However, two of these divisions can be handled at creation time and
the others can be replaced by a reciprocal divide, completely avoiding
real divisions on access.

[eparis@redhat.com: rebase on top of changes to support 0 len elements]
[eparis@redhat.com: initialize part_nr when array fits entirely in base]
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Jesse Gross and committed by
Linus Torvalds
704f15dd 5bf54a97

+31 -22
+2
include/linux/flex_array.h
··· 21 21 struct { 22 22 int element_size; 23 23 int total_nr_elements; 24 + int elems_per_part; 25 + u32 reciprocal_elems; 24 26 struct flex_array_part *parts[]; 25 27 }; 26 28 /*
+29 -22
lib/flex_array.c
··· 24 24 #include <linux/slab.h> 25 25 #include <linux/stddef.h> 26 26 #include <linux/module.h> 27 + #include <linux/reciprocal_div.h> 27 28 28 29 struct flex_array_part { 29 30 char elements[FLEX_ARRAY_PART_SIZE]; ··· 71 70 * Element size | Objects | Objects | 72 71 * PAGE_SIZE=4k | 32-bit | 64-bit | 73 72 * ---------------------------------| 74 - * 1 bytes | 4186112 | 2093056 | 75 - * 2 bytes | 2093056 | 1046528 | 76 - * 3 bytes | 1395030 | 697515 | 77 - * 4 bytes | 1046528 | 523264 | 78 - * 32 bytes | 130816 | 65408 | 79 - * 33 bytes | 126728 | 63364 | 80 - * 2048 bytes | 2044 | 1022 | 81 - * 2049 bytes | 1022 | 511 | 82 - * void * | 1046528 | 261632 | 73 + * 1 bytes | 4177920 | 2088960 | 74 + * 2 bytes | 2088960 | 1044480 | 75 + * 3 bytes | 1392300 | 696150 | 76 + * 4 bytes | 1044480 | 522240 | 77 + * 32 bytes | 130560 | 65408 | 78 + * 33 bytes | 126480 | 63240 | 79 + * 2048 bytes | 2040 | 1020 | 80 + * 2049 bytes | 1020 | 510 | 81 + * void * | 1044480 | 261120 | 83 82 * 84 83 * Since 64-bit pointers are twice the size, we lose half the 85 84 * capacity in the base structure. Also note that no effort is made ··· 89 88 gfp_t flags) 90 89 { 91 90 struct flex_array *ret; 91 + int elems_per_part = 0; 92 + int reciprocal_elems = 0; 92 93 int max_size = 0; 93 94 94 - if (element_size) 95 - max_size = FLEX_ARRAY_NR_BASE_PTRS * 96 - FLEX_ARRAY_ELEMENTS_PER_PART(element_size); 95 + if (element_size) { 96 + elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size); 97 + reciprocal_elems = reciprocal_value(elems_per_part); 98 + max_size = FLEX_ARRAY_NR_BASE_PTRS * elems_per_part; 99 + } 97 100 98 101 /* max_size will end up 0 if element_size > PAGE_SIZE */ 99 102 if (total > max_size) ··· 107 102 return NULL; 108 103 ret->element_size = element_size; 109 104 ret->total_nr_elements = total; 105 + ret->elems_per_part = elems_per_part; 106 + ret->reciprocal_elems = reciprocal_elems; 110 107 if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO)) 111 108 memset(&ret->parts[0], FLEX_ARRAY_FREE, 112 109 FLEX_ARRAY_BASE_BYTES_LEFT); ··· 119 112 static int fa_element_to_part_nr(struct flex_array *fa, 120 113 unsigned int element_nr) 121 114 { 122 - return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size); 115 + return reciprocal_divide(element_nr, fa->reciprocal_elems); 123 116 } 124 117 125 118 /** ··· 148 141 EXPORT_SYMBOL(flex_array_free); 149 142 150 143 static unsigned int index_inside_part(struct flex_array *fa, 151 - unsigned int element_nr) 144 + unsigned int element_nr, 145 + unsigned int part_nr) 152 146 { 153 147 unsigned int part_offset; 154 148 155 - part_offset = element_nr % 156 - FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size); 149 + part_offset = element_nr - part_nr * fa->elems_per_part; 157 150 return part_offset * fa->element_size; 158 151 } 159 152 ··· 193 186 int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, 194 187 gfp_t flags) 195 188 { 196 - int part_nr; 189 + int part_nr = 0; 197 190 struct flex_array_part *part; 198 191 void *dst; 199 192 ··· 209 202 if (!part) 210 203 return -ENOMEM; 211 204 } 212 - dst = &part->elements[index_inside_part(fa, element_nr)]; 205 + dst = &part->elements[index_inside_part(fa, element_nr, part_nr)]; 213 206 memcpy(dst, src, fa->element_size); 214 207 return 0; 215 208 } ··· 224 217 */ 225 218 int flex_array_clear(struct flex_array *fa, unsigned int element_nr) 226 219 { 227 - int part_nr; 220 + int part_nr = 0; 228 221 struct flex_array_part *part; 229 222 void *dst; 230 223 ··· 240 233 if (!part) 241 234 return -EINVAL; 242 235 } 243 - dst = &part->elements[index_inside_part(fa, element_nr)]; 236 + dst = &part->elements[index_inside_part(fa, element_nr, part_nr)]; 244 237 memset(dst, FLEX_ARRAY_FREE, fa->element_size); 245 238 return 0; 246 239 } ··· 309 302 */ 310 303 void *flex_array_get(struct flex_array *fa, unsigned int element_nr) 311 304 { 312 - int part_nr; 305 + int part_nr = 0; 313 306 struct flex_array_part *part; 314 307 315 308 if (!fa->element_size) ··· 324 317 if (!part) 325 318 return NULL; 326 319 } 327 - return &part->elements[index_inside_part(fa, element_nr)]; 320 + return &part->elements[index_inside_part(fa, element_nr, part_nr)]; 328 321 } 329 322 EXPORT_SYMBOL(flex_array_get); 330 323