drm/buddy: Add KUnit tests for allocator performance under fragmentation

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Add KUnit test cases that create severe memory fragmentation and
measure allocation/free performance.

The tests simulate two scenarios -

1. Allocation under severe fragmentation
- Allocate the entire 4 GiB space as 8 KiB blocks with 64 KiB alignment,
split them into two groups and free with mixed flags to block coalescing.
- Repeatedly allocate and free 64 KiB blocks while timing the loop.
- Freelist runtime: 76475 ms(76.5 seconds), soft-lockup triggered.
RB-tree runtime: 186 ms.

2. Reverse free order under fragmentation
- Create a similarly fragmented space, free half the blocks, reverse
the order of the remainder, and release them with the cleared flag.
- Freelist runtime: 85620 ms(85.6 seconds).
RB-tree runtime: 114 ms.

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://lore.kernel.org/r/20251006095124.1663-3-Arunpravin.PaneerSelvam@amd.com

Arunpravin Paneer Selvam 6 months ago c07823f8 d4cd665c

+105

1 changed file

expand all

drivers

gpu

drm

tests

drm_buddy_test.c

+105

drivers/gpu/drm/tests/drm_buddy_test.c

··· 21 21 return (1 << order) * chunk_size; 22 22 } 23 23 24 + static void drm_test_buddy_fragmentation_performance(struct kunit *test) 25 + { 26 + struct drm_buddy_block *block, *tmp; 27 + int num_blocks, i, ret, count = 0; 28 + LIST_HEAD(allocated_blocks); 29 + unsigned long elapsed_ms; 30 + LIST_HEAD(reverse_list); 31 + LIST_HEAD(test_blocks); 32 + LIST_HEAD(clear_list); 33 + LIST_HEAD(dirty_list); 34 + LIST_HEAD(free_list); 35 + struct drm_buddy mm; 36 + u64 mm_size = SZ_4G; 37 + ktime_t start, end; 38 + 39 + /* 40 + * Allocation under severe fragmentation 41 + * 42 + * Create severe fragmentation by allocating the entire 4 GiB address space 43 + * as tiny 8 KiB blocks but forcing a 64 KiB alignment. The resulting pattern 44 + * leaves many scattered holes. Split the allocations into two groups and 45 + * return them with different flags to block coalescing, then repeatedly 46 + * allocate and free 64 KiB blocks while timing the loop. This stresses how 47 + * quickly the allocator can satisfy larger, aligned requests from a pool of 48 + * highly fragmented space. 49 + */ 50 + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), 51 + "buddy_init failed\n"); 52 + 53 + num_blocks = mm_size / SZ_64K; 54 + 55 + start = ktime_get(); 56 + /* Allocate with maximum fragmentation - 8K blocks with 64K alignment */ 57 + for (i = 0; i < num_blocks; i++) 58 + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, 59 + &allocated_blocks, 0), 60 + "buddy_alloc hit an error size=%u\n", SZ_8K); 61 + 62 + list_for_each_entry_safe(block, tmp, &allocated_blocks, link) { 63 + if (count % 4 == 0 || count % 4 == 3) 64 + list_move_tail(&block->link, &clear_list); 65 + else 66 + list_move_tail(&block->link, &dirty_list); 67 + count++; 68 + } 69 + 70 + /* Free with different flags to ensure no coalescing */ 71 + drm_buddy_free_list(&mm, &clear_list, DRM_BUDDY_CLEARED); 72 + drm_buddy_free_list(&mm, &dirty_list, 0); 73 + 74 + for (i = 0; i < num_blocks; i++) 75 + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K, 76 + &test_blocks, 0), 77 + "buddy_alloc hit an error size=%u\n", SZ_64K); 78 + drm_buddy_free_list(&mm, &test_blocks, 0); 79 + 80 + end = ktime_get(); 81 + elapsed_ms = ktime_to_ms(ktime_sub(end, start)); 82 + 83 + kunit_info(test, "Fragmented allocation took %lu ms\n", elapsed_ms); 84 + 85 + drm_buddy_fini(&mm); 86 + 87 + /* 88 + * Reverse free order under fragmentation 89 + * 90 + * Construct a fragmented 4 GiB space by allocating every 8 KiB block with 91 + * 64 KiB alignment, creating a dense scatter of small regions. Half of the 92 + * blocks are selectively freed to form sparse gaps, while the remaining 93 + * allocations are preserved, reordered in reverse, and released back with 94 + * the cleared flag. This models a pathological reverse-ordered free pattern 95 + * and measures how quickly the allocator can merge and reclaim space when 96 + * deallocation occurs in the opposite order of allocation, exposing the 97 + * cost difference between a linear freelist scan and an ordered tree lookup. 98 + */ 99 + ret = drm_buddy_init(&mm, mm_size, SZ_4K); 100 + KUNIT_ASSERT_EQ(test, ret, 0); 101 + 102 + start = ktime_get(); 103 + /* Allocate maximum fragmentation */ 104 + for (i = 0; i < num_blocks; i++) 105 + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, 106 + &allocated_blocks, 0), 107 + "buddy_alloc hit an error size=%u\n", SZ_8K); 108 + 109 + list_for_each_entry_safe(block, tmp, &allocated_blocks, link) { 110 + if (count % 2 == 0) 111 + list_move_tail(&block->link, &free_list); 112 + count++; 113 + } 114 + drm_buddy_free_list(&mm, &free_list, DRM_BUDDY_CLEARED); 115 + 116 + list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link) 117 + list_move(&block->link, &reverse_list); 118 + drm_buddy_free_list(&mm, &reverse_list, DRM_BUDDY_CLEARED); 119 + 120 + end = ktime_get(); 121 + elapsed_ms = ktime_to_ms(ktime_sub(end, start)); 122 + 123 + kunit_info(test, "Reverse-ordered free took %lu ms\n", elapsed_ms); 124 + 125 + drm_buddy_fini(&mm); 126 + } 127 + 24 128 static void drm_test_buddy_alloc_range_bias(struct kunit *test) 25 129 { 26 130 u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem; ··· 876 772 KUNIT_CASE(drm_test_buddy_alloc_contiguous), 877 773 KUNIT_CASE(drm_test_buddy_alloc_clear), 878 774 KUNIT_CASE(drm_test_buddy_alloc_range_bias), 775 + KUNIT_CASE(drm_test_buddy_fragmentation_performance), 879 776 {} 880 777 }; 881 778