Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf mmap: Lazily initialize zstd streams to save memory when not using it

Zstd streams create dictionaries that can require significant RAM,
especially when there is one per-CPU. Tools like 'perf record' won't use
the streams without the -z option, and so the creation of the streams
is pure overhead. Switch to creating the streams on first use.

Committer notes:

ssize_t comes from sys/types.h, size_t from stddef.h. This worked on
glibc as stdlib.h includes both, but not on musl libc. So do what 'man
size_t' says and include sys/types.h and stddef.h instead of stdlib.h

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Changbin Du <changbin.du@huawei.com>
Cc: Colin Ian King <colin.i.king@gmail.com>
Cc: Dmitrii Dolgov <9erthalion6@gmail.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Li Dong <lidong@vivo.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Ming Wang <wangming01@loongson.cn>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nick Terrell <terrelln@fb.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Steinar H. Gunderson <sesse@google.com>
Cc: Vincent Whitchurch <vincent.whitchurch@axis.com>
Cc: Wenyu Liu <liuwenyu7@huawei.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Link: https://lore.kernel.org/r/20231102175735.2272696-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ian Rogers and committed by
Arnaldo Carvalho de Melo
5940a20a d60469d7

+59 -43
+19 -7
tools/perf/builtin-record.c
··· 270 270 271 271 static int record__aio_enabled(struct record *rec); 272 272 static int record__comp_enabled(struct record *rec); 273 - static size_t zstd_compress(struct perf_session *session, struct mmap *map, 273 + static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 274 274 void *dst, size_t dst_size, void *src, size_t src_size); 275 275 276 276 #ifdef HAVE_AIO_SUPPORT ··· 405 405 */ 406 406 407 407 if (record__comp_enabled(aio->rec)) { 408 - size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 409 - mmap__mmap_len(map) - aio->size, 410 - buf, size); 408 + ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 409 + mmap__mmap_len(map) - aio->size, 410 + buf, size); 411 + if (compressed < 0) 412 + return (int)compressed; 413 + 414 + size = compressed; 411 415 } else { 412 416 memcpy(aio->data + aio->size, buf, size); 413 417 } ··· 637 633 struct record *rec = to; 638 634 639 635 if (record__comp_enabled(rec)) { 640 - size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); 636 + ssize_t compressed = zstd_compress(rec->session, map, map->data, 637 + mmap__mmap_len(map), bf, size); 638 + 639 + if (compressed < 0) 640 + return (int)compressed; 641 + 642 + size = compressed; 641 643 bf = map->data; 642 644 } 643 645 ··· 1537 1527 return size; 1538 1528 } 1539 1529 1540 - static size_t zstd_compress(struct perf_session *session, struct mmap *map, 1530 + static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 1541 1531 void *dst, size_t dst_size, void *src, size_t src_size) 1542 1532 { 1543 - size_t compressed; 1533 + ssize_t compressed; 1544 1534 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1545 1535 struct zstd_data *zstd_data = &session->zstd_data; 1546 1536 ··· 1549 1539 1550 1540 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1551 1541 max_record_size, process_comp_header); 1542 + if (compressed < 0) 1543 + return compressed; 1552 1544 1553 1545 if (map && map->file) { 1554 1546 thread->bytes_transferred += src_size;
+5 -2
tools/perf/util/compress.h
··· 3 3 #define PERF_COMPRESS_H 4 4 5 5 #include <stdbool.h> 6 + #include <stddef.h> 7 + #include <sys/types.h> 6 8 #ifdef HAVE_ZSTD_SUPPORT 7 9 #include <zstd.h> 8 10 #endif ··· 23 21 #ifdef HAVE_ZSTD_SUPPORT 24 22 ZSTD_CStream *cstream; 25 23 ZSTD_DStream *dstream; 24 + int comp_level; 26 25 #endif 27 26 }; 28 27 ··· 32 29 int zstd_init(struct zstd_data *data, int level); 33 30 int zstd_fini(struct zstd_data *data); 34 31 35 - size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, 32 + ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, 36 33 void *src, size_t src_size, size_t max_record_size, 37 34 size_t process_header(void *record, size_t increment)); 38 35 ··· 51 48 } 52 49 53 50 static inline 54 - size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, 51 + ssize_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, 55 52 void *dst __maybe_unused, size_t dst_size __maybe_unused, 56 53 void *src __maybe_unused, size_t src_size __maybe_unused, 57 54 size_t max_record_size __maybe_unused,
+2 -3
tools/perf/util/mmap.c
··· 295 295 296 296 map->core.flush = mp->flush; 297 297 298 - map->comp_level = mp->comp_level; 299 298 #ifndef PYTHON_PERF 300 - if (zstd_init(&map->zstd_data, map->comp_level)) { 299 + if (zstd_init(&map->zstd_data, mp->comp_level)) { 301 300 pr_debug2("failed to init mmap compressor, error %d\n", errno); 302 301 return -1; 303 302 } 304 303 #endif 305 304 306 - if (map->comp_level && !perf_mmap__aio_enabled(map)) { 305 + if (mp->comp_level && !perf_mmap__aio_enabled(map)) { 307 306 map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, 308 307 MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); 309 308 if (map->data == MAP_FAILED) {
-1
tools/perf/util/mmap.h
··· 39 39 #endif 40 40 struct mmap_cpu_mask affinity_mask; 41 41 void *data; 42 - int comp_level; 43 42 struct perf_data_file *file; 44 43 struct zstd_data zstd_data; 45 44 };
+33 -30
tools/perf/util/zstd.c
··· 7 7 8 8 int zstd_init(struct zstd_data *data, int level) 9 9 { 10 - size_t ret; 11 - 12 - data->dstream = ZSTD_createDStream(); 13 - if (data->dstream == NULL) { 14 - pr_err("Couldn't create decompression stream.\n"); 15 - return -1; 16 - } 17 - 18 - ret = ZSTD_initDStream(data->dstream); 19 - if (ZSTD_isError(ret)) { 20 - pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret)); 21 - return -1; 22 - } 23 - 24 - if (!level) 25 - return 0; 26 - 27 - data->cstream = ZSTD_createCStream(); 28 - if (data->cstream == NULL) { 29 - pr_err("Couldn't create compression stream.\n"); 30 - return -1; 31 - } 32 - 33 - ret = ZSTD_initCStream(data->cstream, level); 34 - if (ZSTD_isError(ret)) { 35 - pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret)); 36 - return -1; 37 - } 38 - 10 + data->comp_level = level; 11 + data->dstream = NULL; 12 + data->cstream = NULL; 39 13 return 0; 40 14 } 41 15 ··· 28 54 return 0; 29 55 } 30 56 31 - size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, 57 + ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, 32 58 void *src, size_t src_size, size_t max_record_size, 33 59 size_t process_header(void *record, size_t increment)) 34 60 { ··· 36 62 ZSTD_inBuffer input = { src, src_size, 0 }; 37 63 ZSTD_outBuffer output; 38 64 void *record; 65 + 66 + if (!data->cstream) { 67 + data->cstream = ZSTD_createCStream(); 68 + if (data->cstream == NULL) { 69 + pr_err("Couldn't create compression stream.\n"); 70 + return -1; 71 + } 72 + 73 + ret = ZSTD_initCStream(data->cstream, data->comp_level); 74 + if (ZSTD_isError(ret)) { 75 + pr_err("Failed to initialize compression stream: %s\n", 76 + ZSTD_getErrorName(ret)); 77 + return -1; 78 + } 79 + } 39 80 40 81 while (input.pos < input.size) { 41 82 record = dst; ··· 85 96 ZSTD_inBuffer input = { src, src_size, 0 }; 86 97 ZSTD_outBuffer output = { dst, dst_size, 0 }; 87 98 99 + if (!data->dstream) { 100 + data->dstream = ZSTD_createDStream(); 101 + if (data->dstream == NULL) { 102 + pr_err("Couldn't create decompression stream.\n"); 103 + return 0; 104 + } 105 + 106 + ret = ZSTD_initDStream(data->dstream); 107 + if (ZSTD_isError(ret)) { 108 + pr_err("Failed to initialize decompression stream: %s\n", 109 + ZSTD_getErrorName(ret)); 110 + return 0; 111 + } 112 + } 88 113 while (input.pos < input.size) { 89 114 ret = ZSTD_decompressStream(data->dstream, &output, &input); 90 115 if (ZSTD_isError(ret)) {