Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm, memcg: cg2 memory{.swap,}.peak write tests

Extend two existing tests to cover extracting memory usage through the
newly mutable memory.peak and memory.swap.peak handlers.

In particular, make sure to exercise adding and removing watchers with
overlapping lifetimes so the less-trivial logic gets tested.

The new/updated tests attempt to detect a lack of the write handler by
fstat'ing the memory.peak and memory.swap.peak files and skip the tests if
that's the case. Additionally, skip if the file doesn't exist at all.

[davidf@vimeo.com: update tests]
Link: https://lkml.kernel.org/r/20240730231304.761942-3-davidf@vimeo.com
Link: https://lkml.kernel.org/r/20240729143743.34236-3-davidf@vimeo.com
Signed-off-by: David Finkel <davidf@vimeo.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

David Finkel and committed by
Andrew Morton
d075bcce c6f53ed8

+280 -8
+22
tools/testing/selftests/cgroup/cgroup_util.c
··· 141 141 return atol(buf); 142 142 } 143 143 144 + long cg_read_long_fd(int fd) 145 + { 146 + char buf[128]; 147 + 148 + if (pread(fd, buf, sizeof(buf), 0) <= 0) 149 + return -1; 150 + 151 + return atol(buf); 152 + } 153 + 144 154 long cg_read_key_long(const char *cgroup, const char *control, const char *key) 145 155 { 146 156 char buf[PAGE_SIZE]; ··· 191 181 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 192 182 ret = write_text(path, buf, len); 193 183 return ret == len ? 0 : ret; 184 + } 185 + 186 + /* 187 + * Returns fd on success, or -1 on failure. 188 + * (fd should be closed with close() as usual) 189 + */ 190 + int cg_open(const char *cgroup, const char *control, int flags) 191 + { 192 + char path[PATH_MAX]; 193 + 194 + snprintf(path, sizeof(path), "%s/%s", cgroup, control); 195 + return open(path, flags); 194 196 } 195 197 196 198 int cg_write_numeric(const char *cgroup, const char *control, long value)
+2
tools/testing/selftests/cgroup/cgroup_util.h
··· 34 34 extern int cg_read_strstr(const char *cgroup, const char *control, 35 35 const char *needle); 36 36 extern long cg_read_long(const char *cgroup, const char *control); 37 + extern long cg_read_long_fd(int fd); 37 38 long cg_read_key_long(const char *cgroup, const char *control, const char *key); 38 39 extern long cg_read_lc(const char *cgroup, const char *control); 39 40 extern int cg_write(const char *cgroup, const char *control, char *buf); 41 + extern int cg_open(const char *cgroup, const char *control, int flags); 40 42 int cg_write_numeric(const char *cgroup, const char *control, long value); 41 43 extern int cg_run(const char *cgroup, 42 44 int (*fn)(const char *cgroup, void *arg),
+256 -8
tools/testing/selftests/cgroup/test_memcontrol.c
··· 161 161 /* 162 162 * This test create a memory cgroup, allocates 163 163 * some anonymous memory and some pagecache 164 - * and check memory.current and some memory.stat values. 164 + * and checks memory.current, memory.peak, and some memory.stat values. 165 165 */ 166 - static int test_memcg_current(const char *root) 166 + static int test_memcg_current_peak(const char *root) 167 167 { 168 168 int ret = KSFT_FAIL; 169 - long current; 169 + long current, peak, peak_reset; 170 170 char *memcg; 171 + bool fd2_closed = false, fd3_closed = false, fd4_closed = false; 172 + int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1; 173 + struct stat ss; 171 174 172 175 memcg = cg_name(root, "memcg_test"); 173 176 if (!memcg) ··· 183 180 if (current != 0) 184 181 goto cleanup; 185 182 183 + peak = cg_read_long(memcg, "memory.peak"); 184 + if (peak != 0) 185 + goto cleanup; 186 + 186 187 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 188 + goto cleanup; 189 + 190 + peak = cg_read_long(memcg, "memory.peak"); 191 + if (peak < MB(50)) 192 + goto cleanup; 193 + 194 + /* 195 + * We'll open a few FDs for the same memory.peak file to exercise the free-path 196 + * We need at least three to be closed in a different order than writes occurred to test 197 + * the linked-list handling. 198 + */ 199 + peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 200 + 201 + if (peak_fd == -1) { 202 + if (errno == ENOENT) 203 + ret = KSFT_SKIP; 204 + goto cleanup; 205 + } 206 + 207 + /* 208 + * Before we try to use memory.peak's fd, try to figure out whether 209 + * this kernel supports writing to that file in the first place. (by 210 + * checking the writable bit on the file's st_mode) 211 + */ 212 + if (fstat(peak_fd, &ss)) 213 + goto cleanup; 214 + 215 + if ((ss.st_mode & S_IWUSR) == 0) { 216 + ret = KSFT_SKIP; 217 + goto cleanup; 218 + } 219 + 220 + peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 221 + 222 + if (peak_fd2 == -1) 223 + goto cleanup; 224 + 225 + peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 226 + 227 + if (peak_fd3 == -1) 228 + goto cleanup; 229 + 230 + /* any non-empty string resets, but make it clear */ 231 + static const char reset_string[] = "reset\n"; 232 + 233 + peak_reset = write(peak_fd, reset_string, sizeof(reset_string)); 234 + if (peak_reset != sizeof(reset_string)) 235 + goto cleanup; 236 + 237 + peak_reset = write(peak_fd2, reset_string, sizeof(reset_string)); 238 + if (peak_reset != sizeof(reset_string)) 239 + goto cleanup; 240 + 241 + peak_reset = write(peak_fd3, reset_string, sizeof(reset_string)); 242 + if (peak_reset != sizeof(reset_string)) 243 + goto cleanup; 244 + 245 + /* Make sure a completely independent read isn't affected by our FD-local reset above*/ 246 + peak = cg_read_long(memcg, "memory.peak"); 247 + if (peak < MB(50)) 248 + goto cleanup; 249 + 250 + fd2_closed = true; 251 + if (close(peak_fd2)) 252 + goto cleanup; 253 + 254 + peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 255 + 256 + if (peak_fd4 == -1) 257 + goto cleanup; 258 + 259 + peak_reset = write(peak_fd4, reset_string, sizeof(reset_string)); 260 + if (peak_reset != sizeof(reset_string)) 261 + goto cleanup; 262 + 263 + peak = cg_read_long_fd(peak_fd); 264 + if (peak > MB(30) || peak < 0) 187 265 goto cleanup; 188 266 189 267 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 190 268 goto cleanup; 191 269 270 + peak = cg_read_long(memcg, "memory.peak"); 271 + if (peak < MB(50)) 272 + goto cleanup; 273 + 274 + /* Make sure everything is back to normal */ 275 + peak = cg_read_long_fd(peak_fd); 276 + if (peak < MB(50)) 277 + goto cleanup; 278 + 279 + peak = cg_read_long_fd(peak_fd4); 280 + if (peak < MB(50)) 281 + goto cleanup; 282 + 283 + fd3_closed = true; 284 + if (close(peak_fd3)) 285 + goto cleanup; 286 + 287 + fd4_closed = true; 288 + if (close(peak_fd4)) 289 + goto cleanup; 290 + 192 291 ret = KSFT_PASS; 193 292 194 293 cleanup: 294 + close(peak_fd); 295 + if (!fd2_closed) 296 + close(peak_fd2); 297 + if (!fd3_closed) 298 + close(peak_fd3); 299 + if (!fd4_closed) 300 + close(peak_fd4); 195 301 cg_destroy(memcg); 196 302 free(memcg); 197 303 ··· 929 817 930 818 /* 931 819 * This test checks that memory.swap.max limits the amount of 932 - * anonymous memory which can be swapped out. 820 + * anonymous memory which can be swapped out. Additionally, it verifies that 821 + * memory.swap.peak reflects the high watermark and can be reset. 933 822 */ 934 - static int test_memcg_swap_max(const char *root) 823 + static int test_memcg_swap_max_peak(const char *root) 935 824 { 936 825 int ret = KSFT_FAIL; 937 826 char *memcg; 938 - long max; 827 + long max, peak; 828 + struct stat ss; 829 + int swap_peak_fd = -1, mem_peak_fd = -1; 830 + 831 + /* any non-empty string resets */ 832 + static const char reset_string[] = "foobarbaz"; 939 833 940 834 if (!is_swap_enabled()) 941 835 return KSFT_SKIP; ··· 957 839 ret = KSFT_SKIP; 958 840 goto cleanup; 959 841 } 842 + 843 + swap_peak_fd = cg_open(memcg, "memory.swap.peak", 844 + O_RDWR | O_APPEND | O_CLOEXEC); 845 + 846 + if (swap_peak_fd == -1) { 847 + if (errno == ENOENT) 848 + ret = KSFT_SKIP; 849 + goto cleanup; 850 + } 851 + 852 + /* 853 + * Before we try to use memory.swap.peak's fd, try to figure out 854 + * whether this kernel supports writing to that file in the first 855 + * place. (by checking the writable bit on the file's st_mode) 856 + */ 857 + if (fstat(swap_peak_fd, &ss)) 858 + goto cleanup; 859 + 860 + if ((ss.st_mode & S_IWUSR) == 0) { 861 + ret = KSFT_SKIP; 862 + goto cleanup; 863 + } 864 + 865 + mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 866 + 867 + if (mem_peak_fd == -1) 868 + goto cleanup; 869 + 870 + if (cg_read_long(memcg, "memory.swap.peak")) 871 + goto cleanup; 872 + 873 + if (cg_read_long_fd(swap_peak_fd)) 874 + goto cleanup; 875 + 876 + /* switch the swap and mem fds into local-peak tracking mode*/ 877 + int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 878 + 879 + if (peak_reset != sizeof(reset_string)) 880 + goto cleanup; 881 + 882 + if (cg_read_long_fd(swap_peak_fd)) 883 + goto cleanup; 884 + 885 + if (cg_read_long(memcg, "memory.peak")) 886 + goto cleanup; 887 + 888 + if (cg_read_long_fd(mem_peak_fd)) 889 + goto cleanup; 890 + 891 + peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 892 + if (peak_reset != sizeof(reset_string)) 893 + goto cleanup; 894 + 895 + if (cg_read_long_fd(mem_peak_fd)) 896 + goto cleanup; 960 897 961 898 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 962 899 goto cleanup; ··· 1035 862 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1036 863 goto cleanup; 1037 864 865 + peak = cg_read_long(memcg, "memory.peak"); 866 + if (peak < MB(29)) 867 + goto cleanup; 868 + 869 + peak = cg_read_long(memcg, "memory.swap.peak"); 870 + if (peak < MB(29)) 871 + goto cleanup; 872 + 873 + peak = cg_read_long_fd(mem_peak_fd); 874 + if (peak < MB(29)) 875 + goto cleanup; 876 + 877 + peak = cg_read_long_fd(swap_peak_fd); 878 + if (peak < MB(29)) 879 + goto cleanup; 880 + 881 + /* 882 + * open, reset and close the peak swap on another FD to make sure 883 + * multiple extant fds don't corrupt the linked-list 884 + */ 885 + peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string); 886 + if (peak_reset) 887 + goto cleanup; 888 + 889 + peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string); 890 + if (peak_reset) 891 + goto cleanup; 892 + 893 + /* actually reset on the fds */ 894 + peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 895 + if (peak_reset != sizeof(reset_string)) 896 + goto cleanup; 897 + 898 + peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 899 + if (peak_reset != sizeof(reset_string)) 900 + goto cleanup; 901 + 902 + peak = cg_read_long_fd(swap_peak_fd); 903 + if (peak > MB(10)) 904 + goto cleanup; 905 + 906 + /* 907 + * The cgroup is now empty, but there may be a page or two associated 908 + * with the open FD accounted to it. 909 + */ 910 + peak = cg_read_long_fd(mem_peak_fd); 911 + if (peak > MB(1)) 912 + goto cleanup; 913 + 914 + if (cg_read_long(memcg, "memory.peak") < MB(29)) 915 + goto cleanup; 916 + 917 + if (cg_read_long(memcg, "memory.swap.peak") < MB(29)) 918 + goto cleanup; 919 + 1038 920 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 1039 921 goto cleanup; 1040 922 ··· 1097 869 if (max <= 0) 1098 870 goto cleanup; 1099 871 872 + peak = cg_read_long(memcg, "memory.peak"); 873 + if (peak < MB(29)) 874 + goto cleanup; 875 + 876 + peak = cg_read_long(memcg, "memory.swap.peak"); 877 + if (peak < MB(29)) 878 + goto cleanup; 879 + 880 + peak = cg_read_long_fd(mem_peak_fd); 881 + if (peak < MB(29)) 882 + goto cleanup; 883 + 884 + peak = cg_read_long_fd(swap_peak_fd); 885 + if (peak < MB(19)) 886 + goto cleanup; 887 + 1100 888 ret = KSFT_PASS; 1101 889 1102 890 cleanup: 891 + if (mem_peak_fd != -1 && close(mem_peak_fd)) 892 + ret = KSFT_FAIL; 893 + if (swap_peak_fd != -1 && close(swap_peak_fd)) 894 + ret = KSFT_FAIL; 1103 895 cg_destroy(memcg); 1104 896 free(memcg); 1105 897 ··· 1543 1295 const char *name; 1544 1296 } tests[] = { 1545 1297 T(test_memcg_subtree_control), 1546 - T(test_memcg_current), 1298 + T(test_memcg_current_peak), 1547 1299 T(test_memcg_min), 1548 1300 T(test_memcg_low), 1549 1301 T(test_memcg_high), ··· 1551 1303 T(test_memcg_max), 1552 1304 T(test_memcg_reclaim), 1553 1305 T(test_memcg_oom_events), 1554 - T(test_memcg_swap_max), 1306 + T(test_memcg_swap_max_peak), 1555 1307 T(test_memcg_sock), 1556 1308 T(test_memcg_oom_group_leaf_events), 1557 1309 T(test_memcg_oom_group_parent_events),