Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/resctrl: Add Cache Allocation Technology (CAT) selftest

Cache Allocation Technology (CAT) selftest allocates a portion of
last level cache and starts a benchmark to read each cache
line in this portion of cache. Measure the cache misses in perf and
the misses should be equal to the number of cache lines in this
portion of cache.

We don't use CQM to calculate cache usage because some CAT enabled
platforms don't have CQM.

Co-developed-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Co-developed-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>

authored by

Fenghua Yu and committed by
Shuah Khan
790bf585 78941183

+451 -5
+174 -1
tools/testing/selftests/resctrl/cache.c
··· 10 10 } values[2]; 11 11 }; 12 12 13 + static struct perf_event_attr pea_llc_miss; 14 + static struct read_format rf_cqm; 15 + static int fd_lm; 13 16 char llc_occup_path[1024]; 17 + 18 + static void initialize_perf_event_attr(void) 19 + { 20 + pea_llc_miss.type = PERF_TYPE_HARDWARE; 21 + pea_llc_miss.size = sizeof(struct perf_event_attr); 22 + pea_llc_miss.read_format = PERF_FORMAT_GROUP; 23 + pea_llc_miss.exclude_kernel = 1; 24 + pea_llc_miss.exclude_hv = 1; 25 + pea_llc_miss.exclude_idle = 1; 26 + pea_llc_miss.exclude_callchain_kernel = 1; 27 + pea_llc_miss.inherit = 1; 28 + pea_llc_miss.exclude_guest = 1; 29 + pea_llc_miss.disabled = 1; 30 + } 31 + 32 + static void ioctl_perf_event_ioc_reset_enable(void) 33 + { 34 + ioctl(fd_lm, PERF_EVENT_IOC_RESET, 0); 35 + ioctl(fd_lm, PERF_EVENT_IOC_ENABLE, 0); 36 + } 37 + 38 + static int perf_event_open_llc_miss(pid_t pid, int cpu_no) 39 + { 40 + fd_lm = perf_event_open(&pea_llc_miss, pid, cpu_no, -1, 41 + PERF_FLAG_FD_CLOEXEC); 42 + if (fd_lm == -1) { 43 + perror("Error opening leader"); 44 + ctrlc_handler(0, NULL, NULL); 45 + return -1; 46 + } 47 + 48 + return 0; 49 + } 50 + 51 + static int initialize_llc_perf(void) 52 + { 53 + memset(&pea_llc_miss, 0, sizeof(struct perf_event_attr)); 54 + memset(&rf_cqm, 0, sizeof(struct read_format)); 55 + 56 + /* Initialize perf_event_attr structures for HW_CACHE_MISSES */ 57 + initialize_perf_event_attr(); 58 + 59 + pea_llc_miss.config = PERF_COUNT_HW_CACHE_MISSES; 60 + 61 + rf_cqm.nr = 1; 62 + 63 + return 0; 64 + } 65 + 66 + static int reset_enable_llc_perf(pid_t pid, int cpu_no) 67 + { 68 + int ret = 0; 69 + 70 + ret = perf_event_open_llc_miss(pid, cpu_no); 71 + if (ret < 0) 72 + return ret; 73 + 74 + /* Start counters to log values */ 75 + ioctl_perf_event_ioc_reset_enable(); 76 + 77 + return 0; 78 + } 79 + 80 + /* 81 + * get_llc_perf: llc cache miss through perf events 82 + * @cpu_no: CPU number that the benchmark PID is binded to 83 + * 84 + * Perf events like HW_CACHE_MISSES could be used to validate number of 85 + * cache lines allocated. 86 + * 87 + * Return: =0 on success. <0 on failure. 88 + */ 89 + static int get_llc_perf(unsigned long *llc_perf_miss) 90 + { 91 + __u64 total_misses; 92 + 93 + /* Stop counters after one span to get miss rate */ 94 + 95 + ioctl(fd_lm, PERF_EVENT_IOC_DISABLE, 0); 96 + 97 + if (read(fd_lm, &rf_cqm, sizeof(struct read_format)) == -1) { 98 + perror("Could not get llc misses through perf"); 99 + 100 + return -1; 101 + } 102 + 103 + total_misses = rf_cqm.values[0].value; 104 + 105 + close(fd_lm); 106 + 107 + *llc_perf_miss = total_misses; 108 + 109 + return 0; 110 + } 14 111 15 112 /* 16 113 * Get LLC Occupancy as reported by RESCTRL FS ··· 176 79 177 80 int measure_cache_vals(struct resctrl_val_param *param, int bm_pid) 178 81 { 179 - unsigned long llc_occu_resc = 0, llc_value = 0; 82 + unsigned long llc_perf_miss = 0, llc_occu_resc = 0, llc_value = 0; 180 83 int ret; 84 + 85 + /* 86 + * Measure cache miss from perf. 87 + */ 88 + if (!strcmp(param->resctrl_val, "cat")) { 89 + ret = get_llc_perf(&llc_perf_miss); 90 + if (ret < 0) 91 + return ret; 92 + llc_value = llc_perf_miss; 93 + } 181 94 182 95 /* 183 96 * Measure llc occupancy from resctrl. ··· 203 96 return ret; 204 97 205 98 return 0; 99 + } 100 + 101 + /* 102 + * cache_val: execute benchmark and measure LLC occupancy resctrl 103 + * and perf cache miss for the benchmark 104 + * @param: parameters passed to cache_val() 105 + * 106 + * Return: 0 on success. non-zero on failure. 107 + */ 108 + int cat_val(struct resctrl_val_param *param) 109 + { 110 + int malloc_and_init_memory = 1, memflush = 1, operation = 0, ret = 0; 111 + char *resctrl_val = param->resctrl_val; 112 + pid_t bm_pid; 113 + 114 + if (strcmp(param->filename, "") == 0) 115 + sprintf(param->filename, "stdio"); 116 + 117 + bm_pid = getpid(); 118 + 119 + /* Taskset benchmark to specified cpu */ 120 + ret = taskset_benchmark(bm_pid, param->cpu_no); 121 + if (ret) 122 + return ret; 123 + 124 + /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/ 125 + ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp, 126 + resctrl_val); 127 + if (ret) 128 + return ret; 129 + 130 + if ((strcmp(resctrl_val, "cat") == 0)) { 131 + ret = initialize_llc_perf(); 132 + if (ret) 133 + return ret; 134 + } 135 + 136 + /* Test runs until the callback setup() tells the test to stop. */ 137 + while (1) { 138 + if (strcmp(resctrl_val, "cat") == 0) { 139 + ret = param->setup(1, param); 140 + if (ret) { 141 + ret = 0; 142 + break; 143 + } 144 + ret = reset_enable_llc_perf(bm_pid, param->cpu_no); 145 + if (ret) 146 + break; 147 + 148 + if (run_fill_buf(param->span, malloc_and_init_memory, 149 + memflush, operation, resctrl_val)) { 150 + fprintf(stderr, "Error-running fill buffer\n"); 151 + ret = -1; 152 + break; 153 + } 154 + 155 + sleep(1); 156 + ret = measure_cache_vals(param, bm_pid); 157 + if (ret) 158 + break; 159 + } else { 160 + break; 161 + } 162 + } 163 + 164 + return ret; 206 165 }
+250
tools/testing/selftests/resctrl/cat_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Cache Allocation Technology (CAT) test 4 + * 5 + * Copyright (C) 2018 Intel Corporation 6 + * 7 + * Authors: 8 + * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, 9 + * Fenghua Yu <fenghua.yu@intel.com> 10 + */ 11 + #include "resctrl.h" 12 + #include <unistd.h> 13 + 14 + #define RESULT_FILE_NAME1 "result_cat1" 15 + #define RESULT_FILE_NAME2 "result_cat2" 16 + #define NUM_OF_RUNS 5 17 + #define MAX_DIFF_PERCENT 4 18 + #define MAX_DIFF 1000000 19 + 20 + int count_of_bits; 21 + char cbm_mask[256]; 22 + unsigned long long_mask; 23 + unsigned long cache_size; 24 + 25 + /* 26 + * Change schemata. Write schemata to specified 27 + * con_mon grp, mon_grp in resctrl FS. 28 + * Run 5 times in order to get average values. 29 + */ 30 + static int cat_setup(int num, ...) 31 + { 32 + struct resctrl_val_param *p; 33 + char schemata[64]; 34 + va_list param; 35 + int ret = 0; 36 + 37 + va_start(param, num); 38 + p = va_arg(param, struct resctrl_val_param *); 39 + va_end(param); 40 + 41 + /* Run NUM_OF_RUNS times */ 42 + if (p->num_of_runs >= NUM_OF_RUNS) 43 + return -1; 44 + 45 + if (p->num_of_runs == 0) { 46 + sprintf(schemata, "%lx", p->mask); 47 + ret = write_schemata(p->ctrlgrp, schemata, p->cpu_no, 48 + p->resctrl_val); 49 + } 50 + p->num_of_runs++; 51 + 52 + return ret; 53 + } 54 + 55 + static void show_cache_info(unsigned long sum_llc_perf_miss, int no_of_bits, 56 + unsigned long span) 57 + { 58 + unsigned long allocated_cache_lines = span / 64; 59 + unsigned long avg_llc_perf_miss = 0; 60 + float diff_percent; 61 + 62 + avg_llc_perf_miss = sum_llc_perf_miss / (NUM_OF_RUNS - 1); 63 + diff_percent = ((float)allocated_cache_lines - avg_llc_perf_miss) / 64 + allocated_cache_lines * 100; 65 + 66 + printf("%sok CAT: cache miss rate within %d%%\n", 67 + abs((int)diff_percent) > MAX_DIFF_PERCENT ? "not " : "", 68 + MAX_DIFF_PERCENT); 69 + tests_run++; 70 + printf("# Percent diff=%d\n", abs((int)diff_percent)); 71 + printf("# Number of bits: %d\n", no_of_bits); 72 + printf("# Avg_llc_perf_miss: %lu\n", avg_llc_perf_miss); 73 + printf("# Allocated cache lines: %lu\n", allocated_cache_lines); 74 + } 75 + 76 + static int check_results(struct resctrl_val_param *param) 77 + { 78 + char *token_array[8], temp[512]; 79 + unsigned long sum_llc_perf_miss = 0; 80 + int runs = 0, no_of_bits = 0; 81 + FILE *fp; 82 + 83 + printf("# Checking for pass/fail\n"); 84 + fp = fopen(param->filename, "r"); 85 + if (!fp) { 86 + perror("# Cannot open file"); 87 + 88 + return errno; 89 + } 90 + 91 + while (fgets(temp, sizeof(temp), fp)) { 92 + char *token = strtok(temp, ":\t"); 93 + int fields = 0; 94 + 95 + while (token) { 96 + token_array[fields++] = token; 97 + token = strtok(NULL, ":\t"); 98 + } 99 + /* 100 + * Discard the first value which is inaccurate due to monitoring 101 + * setup transition phase. 102 + */ 103 + if (runs > 0) 104 + sum_llc_perf_miss += strtoul(token_array[3], NULL, 0); 105 + runs++; 106 + } 107 + 108 + fclose(fp); 109 + no_of_bits = count_bits(param->mask); 110 + 111 + show_cache_info(sum_llc_perf_miss, no_of_bits, param->span); 112 + 113 + return 0; 114 + } 115 + 116 + void cat_test_cleanup(void) 117 + { 118 + remove(RESULT_FILE_NAME1); 119 + remove(RESULT_FILE_NAME2); 120 + } 121 + 122 + int cat_perf_miss_val(int cpu_no, int n, char *cache_type) 123 + { 124 + unsigned long l_mask, l_mask_1; 125 + int ret, pipefd[2], sibling_cpu_no; 126 + char pipe_message; 127 + pid_t bm_pid; 128 + 129 + cache_size = 0; 130 + 131 + ret = remount_resctrlfs(true); 132 + if (ret) 133 + return ret; 134 + 135 + if (!validate_resctrl_feature_request("cat")) 136 + return -1; 137 + 138 + /* Get default cbm mask for L3/L2 cache */ 139 + ret = get_cbm_mask(cache_type); 140 + if (ret) 141 + return ret; 142 + 143 + long_mask = strtoul(cbm_mask, NULL, 16); 144 + 145 + /* Get L3/L2 cache size */ 146 + ret = get_cache_size(cpu_no, cache_type, &cache_size); 147 + if (ret) 148 + return ret; 149 + printf("cache size :%lu\n", cache_size); 150 + 151 + /* Get max number of bits from default-cabm mask */ 152 + count_of_bits = count_bits(long_mask); 153 + 154 + if (n < 1 || n > count_of_bits - 1) { 155 + printf("Invalid input value for no_of_bits n!\n"); 156 + printf("Please Enter value in range 1 to %d\n", 157 + count_of_bits - 1); 158 + return -1; 159 + } 160 + 161 + /* Get core id from same socket for running another thread */ 162 + sibling_cpu_no = get_core_sibling(cpu_no); 163 + if (sibling_cpu_no < 0) 164 + return -1; 165 + 166 + struct resctrl_val_param param = { 167 + .resctrl_val = "cat", 168 + .cpu_no = cpu_no, 169 + .mum_resctrlfs = 0, 170 + .setup = cat_setup, 171 + }; 172 + 173 + l_mask = long_mask >> n; 174 + l_mask_1 = ~l_mask & long_mask; 175 + 176 + /* Set param values for parent thread which will be allocated bitmask 177 + * with (max_bits - n) bits 178 + */ 179 + param.span = cache_size * (count_of_bits - n) / count_of_bits; 180 + strcpy(param.ctrlgrp, "c2"); 181 + strcpy(param.mongrp, "m2"); 182 + strcpy(param.filename, RESULT_FILE_NAME2); 183 + param.mask = l_mask; 184 + param.num_of_runs = 0; 185 + 186 + if (pipe(pipefd)) { 187 + perror("# Unable to create pipe"); 188 + return errno; 189 + } 190 + 191 + bm_pid = fork(); 192 + 193 + /* Set param values for child thread which will be allocated bitmask 194 + * with n bits 195 + */ 196 + if (bm_pid == 0) { 197 + param.mask = l_mask_1; 198 + strcpy(param.ctrlgrp, "c1"); 199 + strcpy(param.mongrp, "m1"); 200 + param.span = cache_size * n / count_of_bits; 201 + strcpy(param.filename, RESULT_FILE_NAME1); 202 + param.num_of_runs = 0; 203 + param.cpu_no = sibling_cpu_no; 204 + } 205 + 206 + remove(param.filename); 207 + 208 + ret = cat_val(&param); 209 + if (ret) 210 + return ret; 211 + 212 + ret = check_results(&param); 213 + if (ret) 214 + return ret; 215 + 216 + if (bm_pid == 0) { 217 + /* Tell parent that child is ready */ 218 + close(pipefd[0]); 219 + pipe_message = 1; 220 + if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) < 221 + sizeof(pipe_message)) { 222 + close(pipefd[1]); 223 + perror("# failed signaling parent process"); 224 + return errno; 225 + } 226 + 227 + close(pipefd[1]); 228 + while (1) 229 + ; 230 + } else { 231 + /* Parent waits for child to be ready. */ 232 + close(pipefd[1]); 233 + pipe_message = 0; 234 + while (pipe_message != 1) { 235 + if (read(pipefd[0], &pipe_message, 236 + sizeof(pipe_message)) < sizeof(pipe_message)) { 237 + perror("# failed reading from child process"); 238 + break; 239 + } 240 + } 241 + close(pipefd[0]); 242 + kill(bm_pid, SIGKILL); 243 + } 244 + 245 + cat_test_cleanup(); 246 + if (bm_pid) 247 + umount_resctrlfs(); 248 + 249 + return 0; 250 + }
+8 -2
tools/testing/selftests/resctrl/fill_buf.c
··· 113 113 int ret = 0; 114 114 FILE *fp; 115 115 116 - while (1) 116 + while (1) { 117 117 ret = fill_one_span_read(start_ptr, end_ptr); 118 + if (!strcmp(resctrl_val, "cat")) 119 + break; 120 + } 118 121 119 122 /* Consume read result so that reading memory is not optimized out. */ 120 123 fp = fopen("/dev/null", "w"); ··· 132 129 static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr, 133 130 char *resctrl_val) 134 131 { 135 - while (1) 132 + while (1) { 136 133 fill_one_span_write(start_ptr, end_ptr); 134 + if (!strcmp(resctrl_val, "cat")) 135 + break; 136 + } 137 137 138 138 return 0; 139 139 }
+3
tools/testing/selftests/resctrl/resctrl.h
··· 94 94 int get_cbm_mask(char *cache_type); 95 95 int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size); 96 96 void ctrlc_handler(int signum, siginfo_t *info, void *ptr); 97 + int cat_val(struct resctrl_val_param *param); 98 + void cat_test_cleanup(void); 99 + int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type); 97 100 int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd); 98 101 unsigned int count_bits(unsigned long n); 99 102 void cqm_test_cleanup(void);
+14 -1
tools/testing/selftests/resctrl/resctrl_tests.c
··· 19 19 printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM"); 20 20 printf("\t default benchmark is builtin fill_buf\n"); 21 21 printf("\t-t test list: run tests specified in the test list, "); 22 - printf("e.g. -t mbm, mba, cqm\n"); 22 + printf("e.g. -t mbm, mba, cqm, cat\n"); 23 23 printf("\t-n no_of_bits: run cache tests using specified no of bits in cache bit mask\n"); 24 24 printf("\t-p cpu_no: specify CPU number to run the test. 1 is default\n"); 25 25 printf("\t-h: help\n"); ··· 30 30 mbm_test_cleanup(); 31 31 mba_test_cleanup(); 32 32 cqm_test_cleanup(); 33 + cat_test_cleanup(); 33 34 } 34 35 35 36 int main(int argc, char **argv) ··· 40 39 char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64]; 41 40 char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE]; 42 41 int ben_ind, ben_count; 42 + bool cat_test = true; 43 43 44 44 for (i = 0; i < argc; i++) { 45 45 if (strcmp(argv[i], "-b") == 0) { ··· 62 60 mbm_test = false; 63 61 mba_test = false; 64 62 cqm_test = false; 63 + cat_test = false; 65 64 while (token) { 66 65 if (!strcmp(token, "mbm")) { 67 66 mbm_test = true; ··· 70 67 mba_test = true; 71 68 } else if (!strcmp(token, "cqm")) { 72 69 cqm_test = true; 70 + } else if (!strcmp(token, "cat")) { 71 + cat_test = true; 73 72 } else { 74 73 printf("invalid argument\n"); 75 74 ··· 162 157 printf("%sok CQM: test\n", res ? "not " : ""); 163 158 cqm_test_cleanup(); 164 159 tests_run++; 160 + } 161 + 162 + if (cat_test) { 163 + printf("# Starting CAT test ...\n"); 164 + res = cat_perf_miss_val(cpu_no, no_of_bits, "L3"); 165 + printf("%sok CAT: test\n", res ? "not " : ""); 166 + tests_run++; 167 + cat_test_cleanup(); 165 168 } 166 169 167 170 printf("1..%d\n", tests_run);
+2 -1
tools/testing/selftests/resctrl/resctrlfs.c
··· 501 501 FILE *fp; 502 502 503 503 if ((strcmp(resctrl_val, "mba") != 0) && 504 + (strcmp(resctrl_val, "cat") != 0) && 504 505 (strcmp(resctrl_val, "cqm") != 0)) 505 506 return -ENOENT; 506 507 ··· 523 522 else 524 523 sprintf(controlgroup, "%s/schemata", RESCTRL_PATH); 525 524 526 - if (!strcmp(resctrl_val, "cqm")) 525 + if (!strcmp(resctrl_val, "cat") || !strcmp(resctrl_val, "cqm")) 527 526 sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata); 528 527 if (strcmp(resctrl_val, "mba") == 0) 529 528 sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata);