1
linux/tools/perf/util/zstd.c

115 lines
2.7 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
#include <string.h>
#include "util/compress.h"
#include "util/debug.h"
int zstd_init(struct zstd_data *data, int level)
{
perf mmap: Lazily initialize zstd streams to save memory when not using it Zstd streams create dictionaries that can require significant RAM, especially when there is one per-CPU. Tools like 'perf record' won't use the streams without the -z option, and so the creation of the streams is pure overhead. Switch to creating the streams on first use. Committer notes: ssize_t comes from sys/types.h, size_t from stddef.h. This worked on glibc as stdlib.h includes both, but not on musl libc. So do what 'man size_t' says and include sys/types.h and stddef.h instead of stdlib.h Signed-off-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Cc: Changbin Du <changbin.du@huawei.com> Cc: Colin Ian King <colin.i.king@gmail.com> Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez <german.gomez@arm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: K Prateek Nayak <kprateek.nayak@amd.com> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Li Dong <lidong@vivo.com> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org> Cc: Miguel Ojeda <ojeda@kernel.org> Cc: Ming Wang <wangming01@loongson.cn> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Nick Terrell <terrelln@fb.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Sandipan Das <sandipan.das@amd.com> Cc: Sean Christopherson <seanjc@google.com> Cc: Steinar H. Gunderson <sesse@google.com> Cc: Vincent Whitchurch <vincent.whitchurch@axis.com> Cc: Wenyu Liu <liuwenyu7@huawei.com> Cc: Yang Jihong <yangjihong1@huawei.com> Link: https://lore.kernel.org/r/20231102175735.2272696-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-11-02 10:56:46 -07:00
data->comp_level = level;
data->dstream = NULL;
data->cstream = NULL;
return 0;
}
int zstd_fini(struct zstd_data *data)
{
perf report: Implement perf.data record decompression zstd_init(, comp_level = 0) initializes decompression part of API only hat now consists of zstd_decompress_stream() function. The perf.data PERF_RECORD_COMPRESSED records are decompressed using zstd_decompress_stream() function into a linked list of mmaped memory regions of mmap_comp_len size (struct decomp). After decompression of one COMPRESSED record its content is iterated and fetched for usual processing. The mmaped memory regions with decompressed events are kept in the linked list till the tool process termination. When dumping raw records (e.g., perf report -D --header) file offsets of events from compressed records are printed as zero. Committer notes: Since now we have support for processing PERF_RECORD_COMPRESSED, we see none, in raw form, like we saw in the previous patch commiter notes, they were decompressed into the usual PERF_RECORD_{FORK,MMAP,COMM,etc} records, we only see the stats for those PERF_RECORD_COMPRESSED events, and since I used the file generated in the commiter notes for the previous patch, there they are, 2 compressed records: $ perf report --header-only | grep cmdline # cmdline : /home/acme/bin/perf record -z2 sleep 1 $ perf report -D | grep COMPRESS COMPRESSED events: 2 COMPRESSED events: 0 $ perf report --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 15 of event 'cycles:u' # Event count (approx.): 962227 # # Overhead Command Shared Object Symbol # ........ ....... ................ ........................... # 46.99% sleep libc-2.28.so [.] _dl_addr 29.24% sleep [unknown] [k] 0xffffffffaea00a67 16.45% sleep libc-2.28.so [.] __GI__IO_un_link.part.1 5.92% sleep ld-2.28.so [.] _dl_setup_hash 1.40% sleep libc-2.28.so [.] __nanosleep 0.00% sleep [unknown] [k] 0xffffffffaea00163 # # (Tip: To see callchains in a more compact form: perf report -g folded) # $ Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/304b0a59-942c-3fe1-da02-aa749f87108b@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-03-18 10:45:11 -07:00
if (data->dstream) {
ZSTD_freeDStream(data->dstream);
data->dstream = NULL;
}
if (data->cstream) {
ZSTD_freeCStream(data->cstream);
data->cstream = NULL;
}
return 0;
}
perf mmap: Lazily initialize zstd streams to save memory when not using it Zstd streams create dictionaries that can require significant RAM, especially when there is one per-CPU. Tools like 'perf record' won't use the streams without the -z option, and so the creation of the streams is pure overhead. Switch to creating the streams on first use. Committer notes: ssize_t comes from sys/types.h, size_t from stddef.h. This worked on glibc as stdlib.h includes both, but not on musl libc. So do what 'man size_t' says and include sys/types.h and stddef.h instead of stdlib.h Signed-off-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Cc: Changbin Du <changbin.du@huawei.com> Cc: Colin Ian King <colin.i.king@gmail.com> Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez <german.gomez@arm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: K Prateek Nayak <kprateek.nayak@amd.com> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Li Dong <lidong@vivo.com> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org> Cc: Miguel Ojeda <ojeda@kernel.org> Cc: Ming Wang <wangming01@loongson.cn> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Nick Terrell <terrelln@fb.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Sandipan Das <sandipan.das@amd.com> Cc: Sean Christopherson <seanjc@google.com> Cc: Steinar H. Gunderson <sesse@google.com> Cc: Vincent Whitchurch <vincent.whitchurch@axis.com> Cc: Wenyu Liu <liuwenyu7@huawei.com> Cc: Yang Jihong <yangjihong1@huawei.com> Link: https://lore.kernel.org/r/20231102175735.2272696-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-11-02 10:56:46 -07:00
ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size,
void *src, size_t src_size, size_t max_record_size,
size_t process_header(void *record, size_t increment))
{
size_t ret, size, compressed = 0;
ZSTD_inBuffer input = { src, src_size, 0 };
ZSTD_outBuffer output;
void *record;
perf mmap: Lazily initialize zstd streams to save memory when not using it Zstd streams create dictionaries that can require significant RAM, especially when there is one per-CPU. Tools like 'perf record' won't use the streams without the -z option, and so the creation of the streams is pure overhead. Switch to creating the streams on first use. Committer notes: ssize_t comes from sys/types.h, size_t from stddef.h. This worked on glibc as stdlib.h includes both, but not on musl libc. So do what 'man size_t' says and include sys/types.h and stddef.h instead of stdlib.h Signed-off-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Cc: Changbin Du <changbin.du@huawei.com> Cc: Colin Ian King <colin.i.king@gmail.com> Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez <german.gomez@arm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: K Prateek Nayak <kprateek.nayak@amd.com> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Li Dong <lidong@vivo.com> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org> Cc: Miguel Ojeda <ojeda@kernel.org> Cc: Ming Wang <wangming01@loongson.cn> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Nick Terrell <terrelln@fb.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Sandipan Das <sandipan.das@amd.com> Cc: Sean Christopherson <seanjc@google.com> Cc: Steinar H. Gunderson <sesse@google.com> Cc: Vincent Whitchurch <vincent.whitchurch@axis.com> Cc: Wenyu Liu <liuwenyu7@huawei.com> Cc: Yang Jihong <yangjihong1@huawei.com> Link: https://lore.kernel.org/r/20231102175735.2272696-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-11-02 10:56:46 -07:00
if (!data->cstream) {
data->cstream = ZSTD_createCStream();
if (data->cstream == NULL) {
pr_err("Couldn't create compression stream.\n");
return -1;
}
ret = ZSTD_initCStream(data->cstream, data->comp_level);
if (ZSTD_isError(ret)) {
pr_err("Failed to initialize compression stream: %s\n",
ZSTD_getErrorName(ret));
return -1;
}
}
while (input.pos < input.size) {
record = dst;
size = process_header(record, 0);
compressed += size;
dst += size;
dst_size -= size;
output = (ZSTD_outBuffer){ dst, (dst_size > max_record_size) ?
max_record_size : dst_size, 0 };
ret = ZSTD_compressStream(data->cstream, &output, &input);
ZSTD_flushStream(data->cstream, &output);
if (ZSTD_isError(ret)) {
pr_err("failed to compress %ld bytes: %s\n",
(long)src_size, ZSTD_getErrorName(ret));
memcpy(dst, src, src_size);
return src_size;
}
size = output.pos;
size = process_header(record, size);
compressed += size;
dst += size;
dst_size -= size;
}
return compressed;
}
perf report: Implement perf.data record decompression zstd_init(, comp_level = 0) initializes decompression part of API only hat now consists of zstd_decompress_stream() function. The perf.data PERF_RECORD_COMPRESSED records are decompressed using zstd_decompress_stream() function into a linked list of mmaped memory regions of mmap_comp_len size (struct decomp). After decompression of one COMPRESSED record its content is iterated and fetched for usual processing. The mmaped memory regions with decompressed events are kept in the linked list till the tool process termination. When dumping raw records (e.g., perf report -D --header) file offsets of events from compressed records are printed as zero. Committer notes: Since now we have support for processing PERF_RECORD_COMPRESSED, we see none, in raw form, like we saw in the previous patch commiter notes, they were decompressed into the usual PERF_RECORD_{FORK,MMAP,COMM,etc} records, we only see the stats for those PERF_RECORD_COMPRESSED events, and since I used the file generated in the commiter notes for the previous patch, there they are, 2 compressed records: $ perf report --header-only | grep cmdline # cmdline : /home/acme/bin/perf record -z2 sleep 1 $ perf report -D | grep COMPRESS COMPRESSED events: 2 COMPRESSED events: 0 $ perf report --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 15 of event 'cycles:u' # Event count (approx.): 962227 # # Overhead Command Shared Object Symbol # ........ ....... ................ ........................... # 46.99% sleep libc-2.28.so [.] _dl_addr 29.24% sleep [unknown] [k] 0xffffffffaea00a67 16.45% sleep libc-2.28.so [.] __GI__IO_un_link.part.1 5.92% sleep ld-2.28.so [.] _dl_setup_hash 1.40% sleep libc-2.28.so [.] __nanosleep 0.00% sleep [unknown] [k] 0xffffffffaea00163 # # (Tip: To see callchains in a more compact form: perf report -g folded) # $ Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/304b0a59-942c-3fe1-da02-aa749f87108b@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-03-18 10:45:11 -07:00
size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size,
void *dst, size_t dst_size)
{
size_t ret;
ZSTD_inBuffer input = { src, src_size, 0 };
ZSTD_outBuffer output = { dst, dst_size, 0 };
perf mmap: Lazily initialize zstd streams to save memory when not using it Zstd streams create dictionaries that can require significant RAM, especially when there is one per-CPU. Tools like 'perf record' won't use the streams without the -z option, and so the creation of the streams is pure overhead. Switch to creating the streams on first use. Committer notes: ssize_t comes from sys/types.h, size_t from stddef.h. This worked on glibc as stdlib.h includes both, but not on musl libc. So do what 'man size_t' says and include sys/types.h and stddef.h instead of stdlib.h Signed-off-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Cc: Changbin Du <changbin.du@huawei.com> Cc: Colin Ian King <colin.i.king@gmail.com> Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez <german.gomez@arm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: K Prateek Nayak <kprateek.nayak@amd.com> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Li Dong <lidong@vivo.com> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org> Cc: Miguel Ojeda <ojeda@kernel.org> Cc: Ming Wang <wangming01@loongson.cn> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Nick Terrell <terrelln@fb.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Sandipan Das <sandipan.das@amd.com> Cc: Sean Christopherson <seanjc@google.com> Cc: Steinar H. Gunderson <sesse@google.com> Cc: Vincent Whitchurch <vincent.whitchurch@axis.com> Cc: Wenyu Liu <liuwenyu7@huawei.com> Cc: Yang Jihong <yangjihong1@huawei.com> Link: https://lore.kernel.org/r/20231102175735.2272696-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-11-02 10:56:46 -07:00
if (!data->dstream) {
data->dstream = ZSTD_createDStream();
if (data->dstream == NULL) {
pr_err("Couldn't create decompression stream.\n");
return 0;
}
ret = ZSTD_initDStream(data->dstream);
if (ZSTD_isError(ret)) {
pr_err("Failed to initialize decompression stream: %s\n",
ZSTD_getErrorName(ret));
return 0;
}
}
perf report: Implement perf.data record decompression zstd_init(, comp_level = 0) initializes decompression part of API only hat now consists of zstd_decompress_stream() function. The perf.data PERF_RECORD_COMPRESSED records are decompressed using zstd_decompress_stream() function into a linked list of mmaped memory regions of mmap_comp_len size (struct decomp). After decompression of one COMPRESSED record its content is iterated and fetched for usual processing. The mmaped memory regions with decompressed events are kept in the linked list till the tool process termination. When dumping raw records (e.g., perf report -D --header) file offsets of events from compressed records are printed as zero. Committer notes: Since now we have support for processing PERF_RECORD_COMPRESSED, we see none, in raw form, like we saw in the previous patch commiter notes, they were decompressed into the usual PERF_RECORD_{FORK,MMAP,COMM,etc} records, we only see the stats for those PERF_RECORD_COMPRESSED events, and since I used the file generated in the commiter notes for the previous patch, there they are, 2 compressed records: $ perf report --header-only | grep cmdline # cmdline : /home/acme/bin/perf record -z2 sleep 1 $ perf report -D | grep COMPRESS COMPRESSED events: 2 COMPRESSED events: 0 $ perf report --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 15 of event 'cycles:u' # Event count (approx.): 962227 # # Overhead Command Shared Object Symbol # ........ ....... ................ ........................... # 46.99% sleep libc-2.28.so [.] _dl_addr 29.24% sleep [unknown] [k] 0xffffffffaea00a67 16.45% sleep libc-2.28.so [.] __GI__IO_un_link.part.1 5.92% sleep ld-2.28.so [.] _dl_setup_hash 1.40% sleep libc-2.28.so [.] __nanosleep 0.00% sleep [unknown] [k] 0xffffffffaea00163 # # (Tip: To see callchains in a more compact form: perf report -g folded) # $ Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/304b0a59-942c-3fe1-da02-aa749f87108b@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-03-18 10:45:11 -07:00
while (input.pos < input.size) {
ret = ZSTD_decompressStream(data->dstream, &output, &input);
if (ZSTD_isError(ret)) {
pr_err("failed to decompress (B): %zd -> %zd, dst_size %zd : %s\n",
src_size, output.size, dst_size, ZSTD_getErrorName(ret));
perf report: Implement perf.data record decompression zstd_init(, comp_level = 0) initializes decompression part of API only hat now consists of zstd_decompress_stream() function. The perf.data PERF_RECORD_COMPRESSED records are decompressed using zstd_decompress_stream() function into a linked list of mmaped memory regions of mmap_comp_len size (struct decomp). After decompression of one COMPRESSED record its content is iterated and fetched for usual processing. The mmaped memory regions with decompressed events are kept in the linked list till the tool process termination. When dumping raw records (e.g., perf report -D --header) file offsets of events from compressed records are printed as zero. Committer notes: Since now we have support for processing PERF_RECORD_COMPRESSED, we see none, in raw form, like we saw in the previous patch commiter notes, they were decompressed into the usual PERF_RECORD_{FORK,MMAP,COMM,etc} records, we only see the stats for those PERF_RECORD_COMPRESSED events, and since I used the file generated in the commiter notes for the previous patch, there they are, 2 compressed records: $ perf report --header-only | grep cmdline # cmdline : /home/acme/bin/perf record -z2 sleep 1 $ perf report -D | grep COMPRESS COMPRESSED events: 2 COMPRESSED events: 0 $ perf report --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 15 of event 'cycles:u' # Event count (approx.): 962227 # # Overhead Command Shared Object Symbol # ........ ....... ................ ........................... # 46.99% sleep libc-2.28.so [.] _dl_addr 29.24% sleep [unknown] [k] 0xffffffffaea00a67 16.45% sleep libc-2.28.so [.] __GI__IO_un_link.part.1 5.92% sleep ld-2.28.so [.] _dl_setup_hash 1.40% sleep libc-2.28.so [.] __nanosleep 0.00% sleep [unknown] [k] 0xffffffffaea00163 # # (Tip: To see callchains in a more compact form: perf report -g folded) # $ Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/304b0a59-942c-3fe1-da02-aa749f87108b@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-03-18 10:45:11 -07:00
break;
}
output.dst = dst + output.pos;
output.size = dst_size - output.pos;
}
return output.pos;
}