Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: Give symbolic names to mballoc criterias

mballoc criterias have historically been called by numbers
like CR0, CR1... however this makes it confusing to understand
what each criteria is about.

Change these criterias from numbers to symbolic names and add
relevant comments. While we are at it, also reformat and add some
comments to ext4_seq_mb_stats_show() for better readability.

Additionally, define CR_FAST which signifies the criteria
below which we can make quicker decisions like:
* quitting early if (free block < requested len)
* avoiding to scan free extents smaller than required len.
* avoiding to initialize buddy cache and work with existing cache
* limiting prefetches

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
Link: https://lore.kernel.org/r/a2dc6ec5aea5e5e68cf8e788c2a964ffead9c8b0.1685449706.git.ojaswin@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

authored by

Ojaswin Mujoo and committed by
Theodore Ts'o
f52f3d2b 7e170922

+210 -146
+42 -13
fs/ext4/ext4.h
··· 135 135 */ 136 136 #define EXT4_MB_NUM_CRS 5 137 137 /* 138 - * All possible allocation criterias for mballoc 138 + * All possible allocation criterias for mballoc. Lower are faster. 139 139 */ 140 140 enum criteria { 141 - CR0, 142 - CR1, 143 - CR1_5, 144 - CR2, 145 - CR3, 141 + /* 142 + * Used when number of blocks needed is a power of 2. This doesn't 143 + * trigger any disk IO except prefetch and is the fastest criteria. 144 + */ 145 + CR_POWER2_ALIGNED, 146 + 147 + /* 148 + * Tries to lookup in-memory data structures to find the most suitable 149 + * group that satisfies goal request. No disk IO except block prefetch. 150 + */ 151 + CR_GOAL_LEN_FAST, 152 + 153 + /* 154 + * Same as CR_GOAL_LEN_FAST but is allowed to reduce the goal length to 155 + * the best available length for faster allocation. 156 + */ 157 + CR_BEST_AVAIL_LEN, 158 + 159 + /* 160 + * Reads each block group sequentially, performing disk IO if necessary, to 161 + * find find_suitable block group. Tries to allocate goal length but might trim 162 + * the request if nothing is found after enough tries. 163 + */ 164 + CR_GOAL_LEN_SLOW, 165 + 166 + /* 167 + * Finds the first free set of blocks and allocates those. This is only 168 + * used in rare cases when CR_GOAL_LEN_SLOW also fails to allocate 169 + * anything. 170 + */ 171 + CR_ANY_FREE, 146 172 }; 173 + 174 + /* criteria below which we use fast block scanning and avoid unnecessary IO */ 175 + #define CR_FAST CR_GOAL_LEN_SLOW 147 176 148 177 /* 149 178 * Flags used in mballoc's allocation_context flags field. ··· 212 183 /* Do strict check for free blocks while retrying block allocation */ 213 184 #define EXT4_MB_STRICT_CHECK 0x4000 214 185 /* Large fragment size list lookup succeeded at least once for cr = 0 */ 215 - #define EXT4_MB_CR0_OPTIMIZED 0x8000 186 + #define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000 216 187 /* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */ 217 - #define EXT4_MB_CR1_OPTIMIZED 0x00010000 188 + #define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000 218 189 /* Avg fragment size rb tree lookup succeeded at least once for cr = 1.5 */ 219 - #define EXT4_MB_CR1_5_OPTIMIZED 0x00020000 190 + #define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000 220 191 221 192 struct ext4_allocation_request { 222 193 /* target inode for block we're allocating */ ··· 1582 1553 unsigned long s_mb_last_start; 1583 1554 unsigned int s_mb_prefetch; 1584 1555 unsigned int s_mb_prefetch_limit; 1585 - unsigned int s_mb_cr1_5_max_trim_order; 1556 + unsigned int s_mb_best_avail_max_trim_order; 1586 1557 1587 1558 /* stats for buddy allocator */ 1588 1559 atomic_t s_bal_reqs; /* number of reqs with len > 1 */ ··· 1595 1566 atomic_t s_bal_len_goals; /* len goal hits */ 1596 1567 atomic_t s_bal_breaks; /* too long searches */ 1597 1568 atomic_t s_bal_2orders; /* 2^order hits */ 1598 - atomic_t s_bal_cr0_bad_suggestions; 1599 - atomic_t s_bal_cr1_bad_suggestions; 1600 - atomic_t s_bal_cr1_5_bad_suggestions; 1569 + atomic_t s_bal_p2_aligned_bad_suggestions; 1570 + atomic_t s_bal_goal_fast_bad_suggestions; 1571 + atomic_t s_bal_best_avail_bad_suggestions; 1601 1572 atomic64_t s_bal_cX_groups_considered[EXT4_MB_NUM_CRS]; 1602 1573 atomic64_t s_bal_cX_hits[EXT4_MB_NUM_CRS]; 1603 1574 atomic64_t s_bal_cX_failed[EXT4_MB_NUM_CRS]; /* cX loop didn't find blocks */
+150 -115
fs/ext4/mballoc.c
··· 154 154 * structures to decide the order in which groups are to be traversed for 155 155 * fulfilling an allocation request. 156 156 * 157 - * At CR0 , we look for groups which have the largest_free_order >= the order 158 - * of the request. We directly look at the largest free order list in the data 159 - * structure (1) above where largest_free_order = order of the request. If that 160 - * list is empty, we look at remaining list in the increasing order of 161 - * largest_free_order. This allows us to perform CR0 lookup in O(1) time. 157 + * At CR_POWER2_ALIGNED , we look for groups which have the largest_free_order 158 + * >= the order of the request. We directly look at the largest free order list 159 + * in the data structure (1) above where largest_free_order = order of the 160 + * request. If that list is empty, we look at remaining list in the increasing 161 + * order of largest_free_order. This allows us to perform CR_POWER2_ALIGNED 162 + * lookup in O(1) time. 162 163 * 163 - * At CR1, we only consider groups where average fragment size > request 164 - * size. So, we lookup a group which has average fragment size just above or 165 - * equal to request size using our average fragment size group lists (data 166 - * structure 2) in O(1) time. 164 + * At CR_GOAL_LEN_FAST, we only consider groups where 165 + * average fragment size > request size. So, we lookup a group which has average 166 + * fragment size just above or equal to request size using our average fragment 167 + * size group lists (data structure 2) in O(1) time. 167 168 * 168 - * At CR1.5 (aka CR1_5), we aim to optimize allocations which can't be satisfied 169 - * in CR1. The fact that we couldn't find a group in CR1 suggests that there is 170 - * no BG that has average fragment size > goal length. So before falling to the 171 - * slower CR2, in CR1.5 we proactively trim goal length and then use the same 172 - * fragment lists as CR1 to find a BG with a big enough average fragment size. 173 - * This increases the chances of finding a suitable block group in O(1) time and 174 - * results * in faster allocation at the cost of reduced size of allocation. 169 + * At CR_BEST_AVAIL_LEN, we aim to optimize allocations which can't be satisfied 170 + * in CR_GOAL_LEN_FAST. The fact that we couldn't find a group in 171 + * CR_GOAL_LEN_FAST suggests that there is no BG that has avg 172 + * fragment size > goal length. So before falling to the slower 173 + * CR_GOAL_LEN_SLOW, in CR_BEST_AVAIL_LEN we proactively trim goal length and 174 + * then use the same fragment lists as CR_GOAL_LEN_FAST to find a BG with a big 175 + * enough average fragment size. This increases the chances of finding a 176 + * suitable block group in O(1) time and results in faster allocation at the 177 + * cost of reduced size of allocation. 175 178 * 176 179 * If "mb_optimize_scan" mount option is not set, mballoc traverses groups in 177 - * linear order which requires O(N) search time for each CR0 and CR1 phase. 180 + * linear order which requires O(N) search time for each CR_POWER2_ALIGNED and 181 + * CR_GOAL_LEN_FAST phase. 178 182 * 179 183 * The regular allocator (using the buddy cache) supports a few tunables. 180 184 * ··· 363 359 * - bitlock on a group (group) 364 360 * - object (inode/locality) (object) 365 361 * - per-pa lock (pa) 366 - * - cr0 lists lock (cr0) 367 - * - cr1 tree lock (cr1) 362 + * - cr_power2_aligned lists lock (cr_power2_aligned) 363 + * - cr_goal_len_fast lists lock (cr_goal_len_fast) 368 364 * 369 365 * Paths: 370 366 * - new pa ··· 396 392 * 397 393 * - allocation path (ext4_mb_regular_allocator) 398 394 * group 399 - * cr0/cr1 395 + * cr_power2_aligned/cr_goal_len_fast 400 396 */ 401 397 static struct kmem_cache *ext4_pspace_cachep; 402 398 static struct kmem_cache *ext4_ac_cachep; ··· 870 866 * Choose next group by traversing largest_free_order lists. Updates *new_cr if 871 867 * cr level needs an update. 872 868 */ 873 - static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac, 869 + static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context *ac, 874 870 enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) 875 871 { 876 872 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); ··· 880 876 if (ac->ac_status == AC_STATUS_FOUND) 881 877 return; 882 878 883 - if (unlikely(sbi->s_mb_stats && ac->ac_flags & EXT4_MB_CR0_OPTIMIZED)) 884 - atomic_inc(&sbi->s_bal_cr0_bad_suggestions); 879 + if (unlikely(sbi->s_mb_stats && ac->ac_flags & EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED)) 880 + atomic_inc(&sbi->s_bal_p2_aligned_bad_suggestions); 885 881 886 882 grp = NULL; 887 883 for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) { ··· 896 892 list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i], 897 893 bb_largest_free_order_node) { 898 894 if (sbi->s_mb_stats) 899 - atomic64_inc(&sbi->s_bal_cX_groups_considered[CR0]); 900 - if (likely(ext4_mb_good_group(ac, iter->bb_group, CR0))) { 895 + atomic64_inc(&sbi->s_bal_cX_groups_considered[CR_POWER2_ALIGNED]); 896 + if (likely(ext4_mb_good_group(ac, iter->bb_group, CR_POWER2_ALIGNED))) { 901 897 grp = iter; 902 898 break; 903 899 } ··· 909 905 910 906 if (!grp) { 911 907 /* Increment cr and search again */ 912 - *new_cr = CR1; 908 + *new_cr = CR_GOAL_LEN_FAST; 913 909 } else { 914 910 *group = grp->bb_group; 915 - ac->ac_flags |= EXT4_MB_CR0_OPTIMIZED; 911 + ac->ac_flags |= EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED; 916 912 } 917 913 } 918 914 ··· 951 947 * Choose next group by traversing average fragment size list of suitable 952 948 * order. Updates *new_cr if cr level needs an update. 953 949 */ 954 - static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac, 950 + static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *ac, 955 951 enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) 956 952 { 957 953 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 958 954 struct ext4_group_info *grp = NULL; 959 955 int i; 960 956 961 - if (unlikely(ac->ac_flags & EXT4_MB_CR1_OPTIMIZED)) { 957 + if (unlikely(ac->ac_flags & EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED)) { 962 958 if (sbi->s_mb_stats) 963 - atomic_inc(&sbi->s_bal_cr1_bad_suggestions); 959 + atomic_inc(&sbi->s_bal_goal_fast_bad_suggestions); 964 960 } 965 961 966 962 for (i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len); ··· 972 968 973 969 if (grp) { 974 970 *group = grp->bb_group; 975 - ac->ac_flags |= EXT4_MB_CR1_OPTIMIZED; 971 + ac->ac_flags |= EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED; 976 972 } else { 977 - *new_cr = CR1_5; 973 + *new_cr = CR_BEST_AVAIL_LEN; 978 974 } 979 975 } 980 976 981 977 /* 982 - * We couldn't find a group in CR1 so try to find the highest free fragment 978 + * We couldn't find a group in CR_GOAL_LEN_FAST so try to find the highest free fragment 983 979 * order we have and proactively trim the goal request length to that order to 984 980 * find a suitable group faster. 985 981 * 986 982 * This optimizes allocation speed at the cost of slightly reduced 987 983 * preallocations. However, we make sure that we don't trim the request too 988 - * much and fall to CR2 in that case. 984 + * much and fall to CR_GOAL_LEN_SLOW in that case. 989 985 */ 990 - static void ext4_mb_choose_next_group_cr1_5(struct ext4_allocation_context *ac, 986 + static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context *ac, 991 987 enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) 992 988 { 993 989 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); ··· 995 991 int i, order, min_order; 996 992 unsigned long num_stripe_clusters = 0; 997 993 998 - if (unlikely(ac->ac_flags & EXT4_MB_CR1_5_OPTIMIZED)) { 994 + if (unlikely(ac->ac_flags & EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED)) { 999 995 if (sbi->s_mb_stats) 1000 - atomic_inc(&sbi->s_bal_cr1_5_bad_suggestions); 996 + atomic_inc(&sbi->s_bal_best_avail_bad_suggestions); 1001 997 } 1002 998 1003 999 /* ··· 1007 1003 * goal length. 1008 1004 */ 1009 1005 order = fls(ac->ac_g_ex.fe_len); 1010 - min_order = order - sbi->s_mb_cr1_5_max_trim_order; 1006 + min_order = order - sbi->s_mb_best_avail_max_trim_order; 1011 1007 if (min_order < 0) 1012 1008 min_order = 0; 1013 1009 ··· 1055 1051 1056 1052 if (grp) { 1057 1053 *group = grp->bb_group; 1058 - ac->ac_flags |= EXT4_MB_CR1_5_OPTIMIZED; 1054 + ac->ac_flags |= EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED; 1059 1055 } else { 1060 - /* Reset goal length to original goal length before falling into CR2 */ 1056 + /* Reset goal length to original goal length before falling into CR_GOAL_LEN_SLOW */ 1061 1057 ac->ac_g_ex.fe_len = ac->ac_orig_goal_len; 1062 - *new_cr = CR2; 1058 + *new_cr = CR_GOAL_LEN_SLOW; 1063 1059 } 1064 1060 } 1065 1061 ··· 1067 1063 { 1068 1064 if (unlikely(!test_opt2(ac->ac_sb, MB_OPTIMIZE_SCAN))) 1069 1065 return 0; 1070 - if (ac->ac_criteria >= CR2) 1066 + if (ac->ac_criteria >= CR_GOAL_LEN_SLOW) 1071 1067 return 0; 1072 1068 if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) 1073 1069 return 0; ··· 1121 1117 return; 1122 1118 } 1123 1119 1124 - if (*new_cr == CR0) { 1125 - ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups); 1126 - } else if (*new_cr == CR1) { 1127 - ext4_mb_choose_next_group_cr1(ac, new_cr, group, ngroups); 1128 - } else if (*new_cr == CR1_5) { 1129 - ext4_mb_choose_next_group_cr1_5(ac, new_cr, group, ngroups); 1120 + if (*new_cr == CR_POWER2_ALIGNED) { 1121 + ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group, ngroups); 1122 + } else if (*new_cr == CR_GOAL_LEN_FAST) { 1123 + ext4_mb_choose_next_group_goal_fast(ac, new_cr, group, ngroups); 1124 + } else if (*new_cr == CR_BEST_AVAIL_LEN) { 1125 + ext4_mb_choose_next_group_best_avail(ac, new_cr, group, ngroups); 1130 1126 } else { 1131 1127 /* 1132 1128 * TODO: For CR=2, we can arrange groups in an rb tree sorted by ··· 2448 2444 break; 2449 2445 } 2450 2446 2451 - if (ac->ac_criteria < CR2) { 2447 + if (ac->ac_criteria < CR_FAST) { 2452 2448 /* 2453 - * In CR1 and CR1_5, we are sure that this group will 2454 - * have a large enough continuous free extent, so skip 2455 - * over the smaller free extents 2449 + * In CR_GOAL_LEN_FAST and CR_BEST_AVAIL_LEN, we are 2450 + * sure that this group will have a large enough 2451 + * continuous free extent, so skip over the smaller free 2452 + * extents 2456 2453 */ 2457 2454 j = mb_find_next_bit(bitmap, 2458 2455 EXT4_CLUSTERS_PER_GROUP(sb), i); ··· 2549 2544 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); 2550 2545 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); 2551 2546 2552 - BUG_ON(cr < CR0 || cr >= EXT4_MB_NUM_CRS); 2547 + BUG_ON(cr < CR_POWER2_ALIGNED || cr >= EXT4_MB_NUM_CRS); 2553 2548 2554 2549 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp) || !grp)) 2555 2550 return false; ··· 2563 2558 return false; 2564 2559 2565 2560 switch (cr) { 2566 - case CR0: 2561 + case CR_POWER2_ALIGNED: 2567 2562 BUG_ON(ac->ac_2order == 0); 2568 2563 2569 2564 /* Avoid using the first bg of a flexgroup for data files */ ··· 2582 2577 return false; 2583 2578 2584 2579 return true; 2585 - case CR1: 2586 - case CR1_5: 2580 + case CR_GOAL_LEN_FAST: 2581 + case CR_BEST_AVAIL_LEN: 2587 2582 if ((free / fragments) >= ac->ac_g_ex.fe_len) 2588 2583 return true; 2589 2584 break; 2590 - case CR2: 2585 + case CR_GOAL_LEN_SLOW: 2591 2586 if (free >= ac->ac_g_ex.fe_len) 2592 2587 return true; 2593 2588 break; 2594 - case CR3: 2589 + case CR_ANY_FREE: 2595 2590 return true; 2596 2591 default: 2597 2592 BUG(); ··· 2632 2627 free = grp->bb_free; 2633 2628 if (free == 0) 2634 2629 goto out; 2635 - if (cr <= CR2 && free < ac->ac_g_ex.fe_len) 2630 + if (cr <= CR_FAST && free < ac->ac_g_ex.fe_len) 2636 2631 goto out; 2637 2632 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) 2638 2633 goto out; ··· 2647 2642 ext4_get_group_desc(sb, group, NULL); 2648 2643 int ret; 2649 2644 2650 - /* cr=CR0/CR1 is a very optimistic search to find large 2651 - * good chunks almost for free. If buddy data is not 2652 - * ready, then this optimization makes no sense. But 2653 - * we never skip the first block group in a flex_bg, 2654 - * since this gets used for metadata block allocation, 2655 - * and we want to make sure we locate metadata blocks 2656 - * in the first block group in the flex_bg if possible. 2645 + /* 2646 + * cr=CR_POWER2_ALIGNED/CR_GOAL_LEN_FAST is a very optimistic 2647 + * search to find large good chunks almost for free. If buddy 2648 + * data is not ready, then this optimization makes no sense. But 2649 + * we never skip the first block group in a flex_bg, since this 2650 + * gets used for metadata block allocation, and we want to make 2651 + * sure we locate metadata blocks in the first block group in 2652 + * the flex_bg if possible. 2657 2653 */ 2658 - if (cr < CR2 && 2654 + if (cr < CR_FAST && 2659 2655 (!sbi->s_log_groups_per_flex || 2660 2656 ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) && 2661 2657 !(ext4_has_group_desc_csum(sb) && ··· 2816 2810 } 2817 2811 2818 2812 /* Let's just scan groups to find more-less suitable blocks */ 2819 - cr = ac->ac_2order ? CR0 : CR1; 2813 + cr = ac->ac_2order ? CR_POWER2_ALIGNED : CR_GOAL_LEN_FAST; 2820 2814 /* 2821 - * cr == CR0 try to get exact allocation, 2822 - * cr == CR3 try to get anything 2815 + * cr == CR_POWER2_ALIGNED try to get exact allocation, 2816 + * cr == CR_ANY_FREE try to get anything 2823 2817 */ 2824 2818 repeat: 2825 2819 for (; cr < EXT4_MB_NUM_CRS && ac->ac_status == AC_STATUS_CONTINUE; cr++) { ··· 2849 2843 * spend a lot of time loading imperfect groups 2850 2844 */ 2851 2845 if ((prefetch_grp == group) && 2852 - (cr > CR1_5 || 2846 + (cr >= CR_FAST || 2853 2847 prefetch_ios < sbi->s_mb_prefetch_limit)) { 2854 2848 nr = sbi->s_mb_prefetch; 2855 2849 if (ext4_has_feature_flex_bg(sb)) { ··· 2887 2881 } 2888 2882 2889 2883 ac->ac_groups_scanned++; 2890 - if (cr == CR0) 2884 + if (cr == CR_POWER2_ALIGNED) 2891 2885 ext4_mb_simple_scan_group(ac, &e4b); 2892 - else if ((cr == CR1 || cr == CR1_5) && sbi->s_stripe && 2886 + else if ((cr == CR_GOAL_LEN_FAST || 2887 + cr == CR_BEST_AVAIL_LEN) && 2888 + sbi->s_stripe && 2893 2889 !(ac->ac_g_ex.fe_len % 2894 2890 EXT4_B2C(sbi, sbi->s_stripe))) 2895 2891 ext4_mb_scan_aligned(ac, &e4b); ··· 2908 2900 if (sbi->s_mb_stats && i == ngroups) 2909 2901 atomic64_inc(&sbi->s_bal_cX_failed[cr]); 2910 2902 2911 - if (i == ngroups && ac->ac_criteria == CR1_5) 2903 + if (i == ngroups && ac->ac_criteria == CR_BEST_AVAIL_LEN) 2912 2904 /* Reset goal length to original goal length before 2913 - * falling into CR2 */ 2905 + * falling into CR_GOAL_LEN_SLOW */ 2914 2906 ac->ac_g_ex.fe_len = ac->ac_orig_goal_len; 2915 2907 } 2916 2908 ··· 2937 2929 ac->ac_b_ex.fe_len = 0; 2938 2930 ac->ac_status = AC_STATUS_CONTINUE; 2939 2931 ac->ac_flags |= EXT4_MB_HINT_FIRST; 2940 - cr = CR3; 2932 + cr = CR_ANY_FREE; 2941 2933 goto repeat; 2942 2934 } 2943 2935 } ··· 3053 3045 seq_puts(seq, "mballoc:\n"); 3054 3046 if (!sbi->s_mb_stats) { 3055 3047 seq_puts(seq, "\tmb stats collection turned off.\n"); 3056 - seq_puts(seq, "\tTo enable, please write \"1\" to sysfs file mb_stats.\n"); 3048 + seq_puts( 3049 + seq, 3050 + "\tTo enable, please write \"1\" to sysfs file mb_stats.\n"); 3057 3051 return 0; 3058 3052 } 3059 3053 seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs)); 3060 3054 seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success)); 3061 3055 3062 - seq_printf(seq, "\tgroups_scanned: %u\n", atomic_read(&sbi->s_bal_groups_scanned)); 3056 + seq_printf(seq, "\tgroups_scanned: %u\n", 3057 + atomic_read(&sbi->s_bal_groups_scanned)); 3063 3058 3064 - seq_puts(seq, "\tcr0_stats:\n"); 3065 - seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR0])); 3066 - seq_printf(seq, "\t\tgroups_considered: %llu\n", 3067 - atomic64_read(&sbi->s_bal_cX_groups_considered[CR0])); 3068 - seq_printf(seq, "\t\textents_scanned: %u\n", atomic_read(&sbi->s_bal_cX_ex_scanned[CR0])); 3059 + /* CR_POWER2_ALIGNED stats */ 3060 + seq_puts(seq, "\tcr_p2_aligned_stats:\n"); 3061 + seq_printf(seq, "\t\thits: %llu\n", 3062 + atomic64_read(&sbi->s_bal_cX_hits[CR_POWER2_ALIGNED])); 3063 + seq_printf( 3064 + seq, "\t\tgroups_considered: %llu\n", 3065 + atomic64_read( 3066 + &sbi->s_bal_cX_groups_considered[CR_POWER2_ALIGNED])); 3067 + seq_printf(seq, "\t\textents_scanned: %u\n", 3068 + atomic_read(&sbi->s_bal_cX_ex_scanned[CR_POWER2_ALIGNED])); 3069 3069 seq_printf(seq, "\t\tuseless_loops: %llu\n", 3070 - atomic64_read(&sbi->s_bal_cX_failed[CR0])); 3070 + atomic64_read(&sbi->s_bal_cX_failed[CR_POWER2_ALIGNED])); 3071 3071 seq_printf(seq, "\t\tbad_suggestions: %u\n", 3072 - atomic_read(&sbi->s_bal_cr0_bad_suggestions)); 3072 + atomic_read(&sbi->s_bal_p2_aligned_bad_suggestions)); 3073 3073 3074 - seq_puts(seq, "\tcr1_stats:\n"); 3075 - seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR1])); 3074 + /* CR_GOAL_LEN_FAST stats */ 3075 + seq_puts(seq, "\tcr_goal_fast_stats:\n"); 3076 + seq_printf(seq, "\t\thits: %llu\n", 3077 + atomic64_read(&sbi->s_bal_cX_hits[CR_GOAL_LEN_FAST])); 3076 3078 seq_printf(seq, "\t\tgroups_considered: %llu\n", 3077 - atomic64_read(&sbi->s_bal_cX_groups_considered[CR1])); 3078 - seq_printf(seq, "\t\textents_scanned: %u\n", atomic_read(&sbi->s_bal_cX_ex_scanned[CR1])); 3079 + atomic64_read( 3080 + &sbi->s_bal_cX_groups_considered[CR_GOAL_LEN_FAST])); 3081 + seq_printf(seq, "\t\textents_scanned: %u\n", 3082 + atomic_read(&sbi->s_bal_cX_ex_scanned[CR_GOAL_LEN_FAST])); 3079 3083 seq_printf(seq, "\t\tuseless_loops: %llu\n", 3080 - atomic64_read(&sbi->s_bal_cX_failed[CR1])); 3084 + atomic64_read(&sbi->s_bal_cX_failed[CR_GOAL_LEN_FAST])); 3081 3085 seq_printf(seq, "\t\tbad_suggestions: %u\n", 3082 - atomic_read(&sbi->s_bal_cr1_bad_suggestions)); 3086 + atomic_read(&sbi->s_bal_goal_fast_bad_suggestions)); 3083 3087 3084 - seq_puts(seq, "\tcr1.5_stats:\n"); 3085 - seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR1_5])); 3086 - seq_printf(seq, "\t\tgroups_considered: %llu\n", 3087 - atomic64_read(&sbi->s_bal_cX_groups_considered[CR1_5])); 3088 - seq_printf(seq, "\t\textents_scanned: %u\n", atomic_read(&sbi->s_bal_cX_ex_scanned[CR1_5])); 3088 + /* CR_BEST_AVAIL_LEN stats */ 3089 + seq_puts(seq, "\tcr_best_avail_stats:\n"); 3090 + seq_printf(seq, "\t\thits: %llu\n", 3091 + atomic64_read(&sbi->s_bal_cX_hits[CR_BEST_AVAIL_LEN])); 3092 + seq_printf( 3093 + seq, "\t\tgroups_considered: %llu\n", 3094 + atomic64_read( 3095 + &sbi->s_bal_cX_groups_considered[CR_BEST_AVAIL_LEN])); 3096 + seq_printf(seq, "\t\textents_scanned: %u\n", 3097 + atomic_read(&sbi->s_bal_cX_ex_scanned[CR_BEST_AVAIL_LEN])); 3089 3098 seq_printf(seq, "\t\tuseless_loops: %llu\n", 3090 - atomic64_read(&sbi->s_bal_cX_failed[CR1_5])); 3099 + atomic64_read(&sbi->s_bal_cX_failed[CR_BEST_AVAIL_LEN])); 3091 3100 seq_printf(seq, "\t\tbad_suggestions: %u\n", 3092 - atomic_read(&sbi->s_bal_cr1_5_bad_suggestions)); 3101 + atomic_read(&sbi->s_bal_best_avail_bad_suggestions)); 3093 3102 3094 - seq_puts(seq, "\tcr2_stats:\n"); 3095 - seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR2])); 3103 + /* CR_GOAL_LEN_SLOW stats */ 3104 + seq_puts(seq, "\tcr_goal_slow_stats:\n"); 3105 + seq_printf(seq, "\t\thits: %llu\n", 3106 + atomic64_read(&sbi->s_bal_cX_hits[CR_GOAL_LEN_SLOW])); 3096 3107 seq_printf(seq, "\t\tgroups_considered: %llu\n", 3097 - atomic64_read(&sbi->s_bal_cX_groups_considered[CR2])); 3098 - seq_printf(seq, "\t\textents_scanned: %u\n", atomic_read(&sbi->s_bal_cX_ex_scanned[CR2])); 3108 + atomic64_read( 3109 + &sbi->s_bal_cX_groups_considered[CR_GOAL_LEN_SLOW])); 3110 + seq_printf(seq, "\t\textents_scanned: %u\n", 3111 + atomic_read(&sbi->s_bal_cX_ex_scanned[CR_GOAL_LEN_SLOW])); 3099 3112 seq_printf(seq, "\t\tuseless_loops: %llu\n", 3100 - atomic64_read(&sbi->s_bal_cX_failed[CR2])); 3113 + atomic64_read(&sbi->s_bal_cX_failed[CR_GOAL_LEN_SLOW])); 3101 3114 3102 - seq_puts(seq, "\tcr3_stats:\n"); 3103 - seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR3])); 3104 - seq_printf(seq, "\t\tgroups_considered: %llu\n", 3105 - atomic64_read(&sbi->s_bal_cX_groups_considered[CR3])); 3106 - seq_printf(seq, "\t\textents_scanned: %u\n", atomic_read(&sbi->s_bal_cX_ex_scanned[CR3])); 3115 + /* CR_ANY_FREE stats */ 3116 + seq_puts(seq, "\tcr_any_free_stats:\n"); 3117 + seq_printf(seq, "\t\thits: %llu\n", 3118 + atomic64_read(&sbi->s_bal_cX_hits[CR_ANY_FREE])); 3119 + seq_printf( 3120 + seq, "\t\tgroups_considered: %llu\n", 3121 + atomic64_read(&sbi->s_bal_cX_groups_considered[CR_ANY_FREE])); 3122 + seq_printf(seq, "\t\textents_scanned: %u\n", 3123 + atomic_read(&sbi->s_bal_cX_ex_scanned[CR_ANY_FREE])); 3107 3124 seq_printf(seq, "\t\tuseless_loops: %llu\n", 3108 - atomic64_read(&sbi->s_bal_cX_failed[CR3])); 3109 - seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned)); 3125 + atomic64_read(&sbi->s_bal_cX_failed[CR_ANY_FREE])); 3126 + 3127 + /* Aggregates */ 3128 + seq_printf(seq, "\textents_scanned: %u\n", 3129 + atomic_read(&sbi->s_bal_ex_scanned)); 3110 3130 seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals)); 3111 - seq_printf(seq, "\t\tlen_goal_hits: %u\n", atomic_read(&sbi->s_bal_len_goals)); 3131 + seq_printf(seq, "\t\tlen_goal_hits: %u\n", 3132 + atomic_read(&sbi->s_bal_len_goals)); 3112 3133 seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders)); 3113 3134 seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks)); 3114 3135 seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks)); 3115 - 3116 3136 seq_printf(seq, "\tbuddies_generated: %u/%u\n", 3117 3137 atomic_read(&sbi->s_mb_buddies_generated), 3118 3138 ext4_get_groups_count(sb)); ··· 3148 3112 atomic64_read(&sbi->s_mb_generation_time)); 3149 3113 seq_printf(seq, "\tpreallocated: %u\n", 3150 3114 atomic_read(&sbi->s_mb_preallocated)); 3151 - seq_printf(seq, "\tdiscarded: %u\n", 3152 - atomic_read(&sbi->s_mb_discarded)); 3115 + seq_printf(seq, "\tdiscarded: %u\n", atomic_read(&sbi->s_mb_discarded)); 3153 3116 return 0; 3154 3117 } 3155 3118 ··· 3635 3600 sbi->s_mb_stats = MB_DEFAULT_STATS; 3636 3601 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; 3637 3602 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; 3638 - sbi->s_mb_cr1_5_max_trim_order = MB_DEFAULT_CR1_5_TRIM_ORDER; 3603 + sbi->s_mb_best_avail_max_trim_order = MB_DEFAULT_BEST_AVAIL_TRIM_ORDER; 3639 3604 3640 3605 /* 3641 3606 * The default group preallocation is 512, which for 4k block
+4 -4
fs/ext4/mballoc.h
··· 86 86 #define MB_DEFAULT_LINEAR_SCAN_THRESHOLD 16 87 87 88 88 /* 89 - * The maximum order upto which CR1.5 can trim a particular allocation request. 90 - * Example, if we have an order 7 request and max trim order of 3, CR1.5 can 91 - * trim this upto order 4. 89 + * The maximum order upto which CR_BEST_AVAIL_LEN can trim a particular 90 + * allocation request. Example, if we have an order 7 request and max trim order 91 + * of 3, we can trim this request upto order 4. 92 92 */ 93 - #define MB_DEFAULT_CR1_5_TRIM_ORDER 3 93 + #define MB_DEFAULT_BEST_AVAIL_TRIM_ORDER 3 94 94 95 95 /* 96 96 * Number of valid buddy orders
+2 -2
fs/ext4/sysfs.c
··· 223 223 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); 224 224 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); 225 225 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); 226 - EXT4_RW_ATTR_SBI_UI(mb_cr1_5_max_trim_order, s_mb_cr1_5_max_trim_order); 226 + EXT4_RW_ATTR_SBI_UI(mb_best_avail_max_trim_order, s_mb_best_avail_max_trim_order); 227 227 #ifdef CONFIG_EXT4_DEBUG 228 228 EXT4_RW_ATTR_SBI_UL(simulate_fail, s_simulate_fail); 229 229 #endif ··· 274 274 ATTR_LIST(warning_ratelimit_burst), 275 275 ATTR_LIST(msg_ratelimit_interval_ms), 276 276 ATTR_LIST(msg_ratelimit_burst), 277 - ATTR_LIST(mb_cr1_5_max_trim_order), 277 + ATTR_LIST(mb_best_avail_max_trim_order), 278 278 ATTR_LIST(errors_count), 279 279 ATTR_LIST(warning_count), 280 280 ATTR_LIST(msg_count),
+12 -12
include/trace/events/ext4.h
··· 120 120 { EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}, \ 121 121 { EXT4_FC_REASON_ENCRYPTED_FILENAME, "ENCRYPTED_FILENAME"}) 122 122 123 - TRACE_DEFINE_ENUM(CR0); 124 - TRACE_DEFINE_ENUM(CR1); 125 - TRACE_DEFINE_ENUM(CR1_5); 126 - TRACE_DEFINE_ENUM(CR2); 127 - TRACE_DEFINE_ENUM(CR3); 123 + TRACE_DEFINE_ENUM(CR_POWER2_ALIGNED); 124 + TRACE_DEFINE_ENUM(CR_GOAL_LEN_FAST); 125 + TRACE_DEFINE_ENUM(CR_BEST_AVAIL_LEN); 126 + TRACE_DEFINE_ENUM(CR_GOAL_LEN_SLOW); 127 + TRACE_DEFINE_ENUM(CR_ANY_FREE); 128 128 129 - #define show_criteria(cr) \ 130 - __print_symbolic(cr, \ 131 - { CR0, "CR0" }, \ 132 - { CR1, "CR1" }, \ 133 - { CR1_5, "CR1.5" } \ 134 - { CR2, "CR2" }, \ 135 - { CR3, "CR3" }) 129 + #define show_criteria(cr) \ 130 + __print_symbolic(cr, \ 131 + { CR_POWER2_ALIGNED, "CR_POWER2_ALIGNED" }, \ 132 + { CR_GOAL_LEN_FAST, "CR_GOAL_LEN_FAST" }, \ 133 + { CR_BEST_AVAIL_LEN, "CR_BEST_AVAIL_LEN" }, \ 134 + { CR_GOAL_LEN_SLOW, "CR_GOAL_LEN_SLOW" }, \ 135 + { CR_ANY_FREE, "CR_ANY_FREE" }) 136 136 137 137 TRACE_EVENT(ext4_other_inode_update_time, 138 138 TP_PROTO(struct inode *inode, ino_t orig_ino),