Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

samples/cgroup: introduce memcg memory.events listener

This is a simple listener for memory events that handles counter changes
in runtime. It can be set up for a specific memory cgroup v2.

The output example:
=====
$ /tmp/memcg_event_listener test
Initialized MEMCG events with counters:
MEMCG events:
low: 0
high: 0
max: 0
oom: 0
oom_kill: 0
oom_group_kill: 0
Started monitoring memory events from '/sys/fs/cgroup/test/memory.events'...
Received event in /sys/fs/cgroup/test/memory.events:
*** 1 MEMCG oom_kill event, change counter 0 => 1
Received event in /sys/fs/cgroup/test/memory.events:
*** 1 MEMCG oom_kill event, change counter 1 => 2
Received event in /sys/fs/cgroup/test/memory.events:
*** 1 MEMCG oom_kill event, change counter 2 => 3
Received event in /sys/fs/cgroup/test/memory.events:
*** 1 MEMCG oom_kill event, change counter 3 => 4
Received event in /sys/fs/cgroup/test/memory.events:
*** 2 MEMCG max events, change counter 0 => 2
Received event in /sys/fs/cgroup/test/memory.events:
*** 8 MEMCG max events, change counter 2 => 10
*** 1 MEMCG oom event, change counter 0 => 1
Received event in /sys/fs/cgroup/test/memory.events:
*** 1 MEMCG oom_kill event, change counter 4 => 5
^CExiting memcg event listener...
=====

Link: https://lkml.kernel.org/r/20231123071945.25811-3-ddrokosov@salutedevices.com
Signed-off-by: Dmitry Rokosov <ddrokosov@salutedevices.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Dmitry Rokosov and committed by
Andrew Morton
becf6529 60433a9d

+331 -1
+1 -1
samples/cgroup/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 3 - userprogs-always-y += cgroup_event_listener 3 + userprogs-always-y += cgroup_event_listener memcg_event_listener 4 4 5 5 userccflags += -I usr/include
+330
samples/cgroup/memcg_event_listener.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * memcg_event_listener.c - Simple listener of memcg memory.events 4 + * 5 + * Copyright (c) 2023, SaluteDevices. All Rights Reserved. 6 + * 7 + * Author: Dmitry Rokosov <ddrokosov@salutedevices.com> 8 + */ 9 + 10 + #include <err.h> 11 + #include <errno.h> 12 + #include <limits.h> 13 + #include <poll.h> 14 + #include <stdbool.h> 15 + #include <stdio.h> 16 + #include <stdlib.h> 17 + #include <string.h> 18 + #include <sys/inotify.h> 19 + #include <unistd.h> 20 + 21 + #define MEMCG_EVENTS "memory.events" 22 + 23 + /* Size of buffer to use when reading inotify events */ 24 + #define INOTIFY_BUFFER_SIZE 8192 25 + 26 + #define INOTIFY_EVENT_NEXT(event, length) ({ \ 27 + (length) -= sizeof(*(event)) + (event)->len; \ 28 + (event)++; \ 29 + }) 30 + 31 + #define INOTIFY_EVENT_OK(event, length) ((length) >= (ssize_t)sizeof(*(event))) 32 + 33 + #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 34 + 35 + struct memcg_counters { 36 + long low; 37 + long high; 38 + long max; 39 + long oom; 40 + long oom_kill; 41 + long oom_group_kill; 42 + }; 43 + 44 + struct memcg_events { 45 + struct memcg_counters counters; 46 + char path[PATH_MAX]; 47 + int inotify_fd; 48 + int inotify_wd; 49 + }; 50 + 51 + static void print_memcg_counters(const struct memcg_counters *counters) 52 + { 53 + printf("MEMCG events:\n"); 54 + printf("\tlow: %ld\n", counters->low); 55 + printf("\thigh: %ld\n", counters->high); 56 + printf("\tmax: %ld\n", counters->max); 57 + printf("\toom: %ld\n", counters->oom); 58 + printf("\toom_kill: %ld\n", counters->oom_kill); 59 + printf("\toom_group_kill: %ld\n", counters->oom_group_kill); 60 + } 61 + 62 + static int get_memcg_counter(char *line, const char *name, long *counter) 63 + { 64 + size_t len = strlen(name); 65 + char *endptr; 66 + long tmp; 67 + 68 + if (memcmp(line, name, len)) { 69 + warnx("Counter line %s has wrong name, %s is expected", 70 + line, name); 71 + return -EINVAL; 72 + } 73 + 74 + /* skip the whitespace delimiter */ 75 + len += 1; 76 + 77 + errno = 0; 78 + tmp = strtol(&line[len], &endptr, 10); 79 + if (((tmp == LONG_MAX || tmp == LONG_MIN) && errno == ERANGE) || 80 + (errno && !tmp)) { 81 + warnx("Failed to parse: %s", &line[len]); 82 + return -ERANGE; 83 + } 84 + 85 + if (endptr == &line[len]) { 86 + warnx("Not digits were found in line %s", &line[len]); 87 + return -EINVAL; 88 + } 89 + 90 + if (!(*endptr == '\0' || (*endptr == '\n' && *++endptr == '\0'))) { 91 + warnx("Further characters after number: %s", endptr); 92 + return -EINVAL; 93 + } 94 + 95 + *counter = tmp; 96 + 97 + return 0; 98 + } 99 + 100 + static int read_memcg_events(struct memcg_events *events, bool show_diff) 101 + { 102 + FILE *fp = fopen(events->path, "re"); 103 + size_t i; 104 + int ret = 0; 105 + bool any_new_events = false; 106 + char *line = NULL; 107 + size_t len = 0; 108 + struct memcg_counters new_counters; 109 + struct memcg_counters *counters = &events->counters; 110 + struct { 111 + const char *name; 112 + long *new; 113 + long *old; 114 + } map[] = { 115 + { 116 + .name = "low", 117 + .new = &new_counters.low, 118 + .old = &counters->low, 119 + }, 120 + { 121 + .name = "high", 122 + .new = &new_counters.high, 123 + .old = &counters->high, 124 + }, 125 + { 126 + .name = "max", 127 + .new = &new_counters.max, 128 + .old = &counters->max, 129 + }, 130 + { 131 + .name = "oom", 132 + .new = &new_counters.oom, 133 + .old = &counters->oom, 134 + }, 135 + { 136 + .name = "oom_kill", 137 + .new = &new_counters.oom_kill, 138 + .old = &counters->oom_kill, 139 + }, 140 + { 141 + .name = "oom_group_kill", 142 + .new = &new_counters.oom_group_kill, 143 + .old = &counters->oom_group_kill, 144 + }, 145 + }; 146 + 147 + if (!fp) { 148 + warn("Failed to open memcg events file %s", events->path); 149 + return -EBADF; 150 + } 151 + 152 + /* Read new values for memcg counters */ 153 + for (i = 0; i < ARRAY_SIZE(map); ++i) { 154 + ssize_t nread; 155 + 156 + errno = 0; 157 + nread = getline(&line, &len, fp); 158 + if (nread == -1) { 159 + if (errno) { 160 + warn("Failed to read line for counter %s", 161 + map[i].name); 162 + ret = -EIO; 163 + goto exit; 164 + } 165 + 166 + break; 167 + } 168 + 169 + ret = get_memcg_counter(line, map[i].name, map[i].new); 170 + if (ret) { 171 + warnx("Failed to get counter value from line %s", line); 172 + goto exit; 173 + } 174 + } 175 + 176 + for (i = 0; i < ARRAY_SIZE(map); ++i) { 177 + long diff; 178 + 179 + if (*map[i].new > *map[i].old) { 180 + diff = *map[i].new - *map[i].old; 181 + 182 + if (show_diff) 183 + printf("*** %ld MEMCG %s event%s, " 184 + "change counter %ld => %ld\n", 185 + diff, map[i].name, 186 + (diff == 1) ? "" : "s", 187 + *map[i].old, *map[i].new); 188 + 189 + *map[i].old += diff; 190 + any_new_events = true; 191 + } 192 + } 193 + 194 + if (show_diff && !any_new_events) 195 + printf("*** No new untracked memcg events available\n"); 196 + 197 + exit: 198 + free(line); 199 + fclose(fp); 200 + 201 + return ret; 202 + } 203 + 204 + static void process_memcg_events(struct memcg_events *events, 205 + struct inotify_event *event) 206 + { 207 + int ret; 208 + 209 + if (events->inotify_wd != event->wd) { 210 + warnx("Unknown inotify event %d, should be %d", event->wd, 211 + events->inotify_wd); 212 + return; 213 + } 214 + 215 + printf("Received event in %s:\n", events->path); 216 + 217 + if (!(event->mask & IN_MODIFY)) { 218 + warnx("No IN_MODIFY event, skip it"); 219 + return; 220 + } 221 + 222 + ret = read_memcg_events(events, /* show_diff = */true); 223 + if (ret) 224 + warnx("Can't read memcg events"); 225 + } 226 + 227 + static void monitor_events(struct memcg_events *events) 228 + { 229 + struct pollfd fds[1]; 230 + int ret; 231 + 232 + printf("Started monitoring memory events from '%s'...\n", events->path); 233 + 234 + fds[0].fd = events->inotify_fd; 235 + fds[0].events = POLLIN; 236 + 237 + for (;;) { 238 + ret = poll(fds, ARRAY_SIZE(fds), -1); 239 + if (ret < 0 && errno != EAGAIN) 240 + err(EXIT_FAILURE, "Can't poll memcg events (%d)", ret); 241 + 242 + if (fds[0].revents & POLLERR) 243 + err(EXIT_FAILURE, "Got POLLERR during monitor events"); 244 + 245 + if (fds[0].revents & POLLIN) { 246 + struct inotify_event *event; 247 + char buffer[INOTIFY_BUFFER_SIZE]; 248 + ssize_t length; 249 + 250 + length = read(fds[0].fd, buffer, INOTIFY_BUFFER_SIZE); 251 + if (length <= 0) 252 + continue; 253 + 254 + event = (struct inotify_event *)buffer; 255 + while (INOTIFY_EVENT_OK(event, length)) { 256 + process_memcg_events(events, event); 257 + event = INOTIFY_EVENT_NEXT(event, length); 258 + } 259 + } 260 + } 261 + } 262 + 263 + static int initialize_memcg_events(struct memcg_events *events, 264 + const char *cgroup) 265 + { 266 + int ret; 267 + 268 + memset(events, 0, sizeof(struct memcg_events)); 269 + 270 + ret = snprintf(events->path, PATH_MAX, 271 + "/sys/fs/cgroup/%s/memory.events", cgroup); 272 + if (ret >= PATH_MAX) { 273 + warnx("Path to cgroup memory.events is too long"); 274 + return -EMSGSIZE; 275 + } else if (ret < 0) { 276 + warn("Can't generate cgroup event full name"); 277 + return ret; 278 + } 279 + 280 + ret = read_memcg_events(events, /* show_diff = */false); 281 + if (ret) { 282 + warnx("Failed to read initial memcg events state (%d)", ret); 283 + return ret; 284 + } 285 + 286 + events->inotify_fd = inotify_init(); 287 + if (events->inotify_fd < 0) { 288 + warn("Failed to setup new inotify device"); 289 + return -EMFILE; 290 + } 291 + 292 + events->inotify_wd = inotify_add_watch(events->inotify_fd, 293 + events->path, IN_MODIFY); 294 + if (events->inotify_wd < 0) { 295 + warn("Couldn't add monitor in dir %s", events->path); 296 + return -EIO; 297 + } 298 + 299 + printf("Initialized MEMCG events with counters:\n"); 300 + print_memcg_counters(&events->counters); 301 + 302 + return 0; 303 + } 304 + 305 + static void cleanup_memcg_events(struct memcg_events *events) 306 + { 307 + inotify_rm_watch(events->inotify_fd, events->inotify_wd); 308 + close(events->inotify_fd); 309 + } 310 + 311 + int main(int argc, const char **argv) 312 + { 313 + struct memcg_events events; 314 + ssize_t ret; 315 + 316 + if (argc != 2) 317 + errx(EXIT_FAILURE, "Usage: %s <cgroup>", argv[0]); 318 + 319 + ret = initialize_memcg_events(&events, argv[1]); 320 + if (ret) 321 + errx(EXIT_FAILURE, "Can't initialize memcg events (%zd)", ret); 322 + 323 + monitor_events(&events); 324 + 325 + cleanup_memcg_events(&events); 326 + 327 + printf("Exiting memcg event listener...\n"); 328 + 329 + return EXIT_SUCCESS; 330 + }