Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

genalloc: make it possible to use a custom allocation algorithm

Premit use of another algorithm than the default first-fit one. For
example a custom algorithm could be used to manage alignment requirements.

As I can't predict all the possible requirements/needs for all allocation
uses cases, I add a "free" field 'void *data' to pass any needed
information to the allocation function. For example 'data' could be used
to handle a structure where you store the alignment, the expected memory
bank, the requester device, or any information that could influence the
allocation algorithm.

An usage example may look like this:
struct my_pool_constraints {
int align;
int bank;
...
};

unsigned long my_custom_algo(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr, void *data)
{
struct my_pool_constraints *constraints = data;
...
deal with allocation contraints
...
return the index in bitmap where perform the allocation
}

void create_my_pool()
{
struct my_pool_constraints c;
struct gen_pool *pool = gen_pool_create(...);
gen_pool_add(pool, ...);
gen_pool_set_algo(pool, my_custom_algo, &c);
}

Add of best-fit algorithm function:
most of the time best-fit is slower then first-fit but memory fragmentation
is lower. The random buffer allocation/free tests don't show any arithmetic
relation between the allocation time and fragmentation but the
best-fit algorithm
is sometime able to perform the allocation when the first-fit can't.

This new algorithm help to remove static allocations on ESRAM, a small but
fast on-chip RAM of few KB, used for high-performance uses cases like DMA
linked lists, graphic accelerators, encoders/decoders. On the Ux500
(in the ARM tree) we have define 5 ESRAM banks of 128 KB each and use of
static allocations becomes unmaintainable:
cd arch/arm/mach-ux500 && grep -r ESRAM .
./include/mach/db8500-regs.h:/* Base address and bank offsets for ESRAM */
./include/mach/db8500-regs.h:#define U8500_ESRAM_BASE 0x40000000
./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK_SIZE 0x00020000
./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK0 U8500_ESRAM_BASE
./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK1 (U8500_ESRAM_BASE + U8500_ESRAM_BANK_SIZE)
./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK2 (U8500_ESRAM_BANK1 + U8500_ESRAM_BANK_SIZE)
./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK3 (U8500_ESRAM_BANK2 + U8500_ESRAM_BANK_SIZE)
./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK4 (U8500_ESRAM_BANK3 + U8500_ESRAM_BANK_SIZE)
./include/mach/db8500-regs.h:#define U8500_ESRAM_DMA_LCPA_OFFSET 0x10000
./include/mach/db8500-regs.h:#define U8500_DMA_LCPA_BASE
(U8500_ESRAM_BANK0 + U8500_ESRAM_DMA_LCPA_OFFSET)
./include/mach/db8500-regs.h:#define U8500_DMA_LCLA_BASE U8500_ESRAM_BANK4

I want to use genalloc to do dynamic allocations but I need to be able to
fine tune the allocation algorithm. I my case best-fit algorithm give
better results than first-fit, but it will not be true for every use case.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@stericsson.com>
Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Benjamin Gaignard and committed by
Linus Torvalds
ca279cf1 e9687567

+111 -4
+27
include/linux/genalloc.h
··· 29 29 30 30 #ifndef __GENALLOC_H__ 31 31 #define __GENALLOC_H__ 32 + /** 33 + * Allocation callback function type definition 34 + * @map: Pointer to bitmap 35 + * @size: The bitmap size in bits 36 + * @start: The bitnumber to start searching at 37 + * @nr: The number of zeroed bits we're looking for 38 + * @data: optional additional data used by @genpool_algo_t 39 + */ 40 + typedef unsigned long (*genpool_algo_t)(unsigned long *map, 41 + unsigned long size, 42 + unsigned long start, 43 + unsigned int nr, 44 + void *data); 45 + 32 46 /* 33 47 * General purpose special memory pool descriptor. 34 48 */ ··· 50 36 spinlock_t lock; 51 37 struct list_head chunks; /* list of chunks in this pool */ 52 38 int min_alloc_order; /* minimum allocation order */ 39 + 40 + genpool_algo_t algo; /* allocation function */ 41 + void *data; 53 42 }; 54 43 55 44 /* ··· 95 78 void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *); 96 79 extern size_t gen_pool_avail(struct gen_pool *); 97 80 extern size_t gen_pool_size(struct gen_pool *); 81 + 82 + extern void gen_pool_set_algo(struct gen_pool *pool, genpool_algo_t algo, 83 + void *data); 84 + 85 + extern unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size, 86 + unsigned long start, unsigned int nr, void *data); 87 + 88 + extern unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, 89 + unsigned long start, unsigned int nr, void *data); 90 + 98 91 #endif /* __GENALLOC_H__ */
+84 -4
lib/genalloc.c
··· 152 152 spin_lock_init(&pool->lock); 153 153 INIT_LIST_HEAD(&pool->chunks); 154 154 pool->min_alloc_order = min_alloc_order; 155 + pool->algo = gen_pool_first_fit; 156 + pool->data = NULL; 155 157 } 156 158 return pool; 157 159 } ··· 257 255 * @size: number of bytes to allocate from the pool 258 256 * 259 257 * Allocate the requested number of bytes from the specified pool. 260 - * Uses a first-fit algorithm. Can not be used in NMI handler on 261 - * architectures without NMI-safe cmpxchg implementation. 258 + * Uses the pool allocation function (with first-fit algorithm by default). 259 + * Can not be used in NMI handler on architectures without 260 + * NMI-safe cmpxchg implementation. 262 261 */ 263 262 unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) 264 263 { ··· 283 280 284 281 end_bit = (chunk->end_addr - chunk->start_addr) >> order; 285 282 retry: 286 - start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 287 - start_bit, nbits, 0); 283 + start_bit = pool->algo(chunk->bits, end_bit, start_bit, nbits, 284 + pool->data); 288 285 if (start_bit >= end_bit) 289 286 continue; 290 287 remain = bitmap_set_ll(chunk->bits, start_bit, nbits); ··· 403 400 return size; 404 401 } 405 402 EXPORT_SYMBOL_GPL(gen_pool_size); 403 + 404 + /** 405 + * gen_pool_set_algo - set the allocation algorithm 406 + * @pool: pool to change allocation algorithm 407 + * @algo: custom algorithm function 408 + * @data: additional data used by @algo 409 + * 410 + * Call @algo for each memory allocation in the pool. 411 + * If @algo is NULL use gen_pool_first_fit as default 412 + * memory allocation function. 413 + */ 414 + void gen_pool_set_algo(struct gen_pool *pool, genpool_algo_t algo, void *data) 415 + { 416 + rcu_read_lock(); 417 + 418 + pool->algo = algo; 419 + if (!pool->algo) 420 + pool->algo = gen_pool_first_fit; 421 + 422 + pool->data = data; 423 + 424 + rcu_read_unlock(); 425 + } 426 + EXPORT_SYMBOL(gen_pool_set_algo); 427 + 428 + /** 429 + * gen_pool_first_fit - find the first available region 430 + * of memory matching the size requirement (no alignment constraint) 431 + * @map: The address to base the search on 432 + * @size: The bitmap size in bits 433 + * @start: The bitnumber to start searching at 434 + * @nr: The number of zeroed bits we're looking for 435 + * @data: additional data - unused 436 + */ 437 + unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size, 438 + unsigned long start, unsigned int nr, void *data) 439 + { 440 + return bitmap_find_next_zero_area(map, size, start, nr, 0); 441 + } 442 + EXPORT_SYMBOL(gen_pool_first_fit); 443 + 444 + /** 445 + * gen_pool_best_fit - find the best fitting region of memory 446 + * macthing the size requirement (no alignment constraint) 447 + * @map: The address to base the search on 448 + * @size: The bitmap size in bits 449 + * @start: The bitnumber to start searching at 450 + * @nr: The number of zeroed bits we're looking for 451 + * @data: additional data - unused 452 + * 453 + * Iterate over the bitmap to find the smallest free region 454 + * which we can allocate the memory. 455 + */ 456 + unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, 457 + unsigned long start, unsigned int nr, void *data) 458 + { 459 + unsigned long start_bit = size; 460 + unsigned long len = size + 1; 461 + unsigned long index; 462 + 463 + index = bitmap_find_next_zero_area(map, size, start, nr, 0); 464 + 465 + while (index < size) { 466 + int next_bit = find_next_bit(map, size, index + nr); 467 + if ((next_bit - index) < len) { 468 + len = next_bit - index; 469 + start_bit = index; 470 + if (len == nr) 471 + return start_bit; 472 + } 473 + index = bitmap_find_next_zero_area(map, size, 474 + next_bit + 1, nr, 0); 475 + } 476 + 477 + return start_bit; 478 + } 479 + EXPORT_SYMBOL(gen_pool_best_fit);