N 个函数 x M 个数据集,具有 google 个基准
N functions x M datasets with google benchmark
假设我想针对三个不同的数据集测试 serialization/deserializtion 例程。这导致 2x3=6 个基准。
理想情况下,我想实现以下目标:
- 避免代码重复
- 每次可执行调用仅调用一次数据集生成器函数,并且仅在未被
--benchmark_filter=...
排除时调用(生成器函数很昂贵)
- 有意义的基准名称(例如 "Serialize/DatasetAlpha")
指南中提到的功能似乎都不完全符合目的。
到目前为止,我找到的最接近的解决方案是使用可变参数化的 Serialize()
/Deserialize()
函数以及生成器函数,这将 return 生成数据作为单例。
有没有更好的方法?
这是我想避免的:
#include <benchmark/benchmark.h>
/* library */
std::string serialize(const std::string& data) {
return data;
}
std::string deserialize(const std::string& data) {
return data;
}
/* helpers */
void SerializeHelper(benchmark::State& state, const std::string& data) {
for (auto _ : state) {
std::string bytes = serialize(data);
benchmark::DoNotOptimize(bytes);
}
}
void DeserializeHelper(benchmark::State& state, const std::string& data) {
std::string bytes = serialize(data);
for (auto _ : state) {
std::string data_out = deserialize(data);
benchmark::DoNotOptimize(data_out);
}
}
std::string GenerateDatasetAlpha() {
return "";
}
std::string GenerateDatasetBeta() {
return "";
}
std::string GenerateDatasetGamma() {
return "";
}
/* oh, my... */
void SerializeAlpha(benchmark::State& state) {
SerializeHelper(state, GenerateDatasetAlpha());
}
void DeserializeAlpha(benchmark::State& state) {
DeserializeHelper(state, GenerateDatasetAlpha());
}
void SerializeBeta(benchmark::State& state) {
SerializeHelper(state, GenerateDatasetBeta());
}
void DeserializeBeta(benchmark::State& state) {
DeserializeHelper(state, GenerateDatasetBeta());
}
void SerializeGamma(benchmark::State& state) {
SerializeHelper(state, GenerateDatasetGamma());
}
void DeserializeGamma(benchmark::State& state) {
DeserializeHelper(state, GenerateDatasetGamma());
}
BENCHMARK(SerializeAlpha);
BENCHMARK(DeserializeAlpha);
BENCHMARK(SerializeBeta);
BENCHMARK(DeserializeBeta);
BENCHMARK(SerializeGamma);
BENCHMARK(DeserializeGamma);
BENCHMARK_MAIN();
//g++ wtf.cc -o wtf -I benchmark/include/ -lbenchmark -L benchmark/build/src -lpthread -O3
到目前为止我找到的最接近的解决方案是使用带有每个数据集生成器的模板基准测试 类:
#include <benchmark/benchmark.h>
/* library */
std::string serialize(const std::string& data) {
return data;
}
std::string deserialize(const std::string& data) {
return data;
}
/* benchmarks routines */
template<typename Dataset>
void SerializeBenchmark(benchmark::State& state) {
std::string data = Dataset()();
for (auto _ : state) {
std::string bytes = serialize(data);
benchmark::DoNotOptimize(bytes);
}
}
template<typename Dataset>
void DeserializeBenchmark(benchmark::State& state) {
std::string data = Dataset()();
std::string bytes = serialize(data);
for (auto _ : state) {
std::string data_out = deserialize(data);
benchmark::DoNotOptimize(data_out);
}
}
/* datasets generators and benchmark registration */
struct Dataset1 {
std::string operator()() {
return ""; // load from file, generate random data, etc
}
};
BENCHMARK_TEMPLATE(SerializeBenchmark, Dataset1);
BENCHMARK_TEMPLATE(DeserializeBenchmark, Dataset1);
struct Dataset2 {
std::string operator()() { return ""; }
};
BENCHMARK_TEMPLATE(SerializeBenchmark, Dataset2);
BENCHMARK_TEMPLATE(DeserializeBenchmark, Dataset2);
struct Dataset3 {
std::string operator()() { return ""; }
};
BENCHMARK_TEMPLATE(SerializeBenchmark, Dataset3);
BENCHMARK_TEMPLATE(DeserializeBenchmark, Dataset3);
BENCHMARK_MAIN();
这使代码量膨胀保持在相当低的水平。基准名称也很好,例如SerializeBenchmark<Dataset2>
。数据集生成函数仍然会被多次调用,所以如果你想避免这种情况,你将不得不将它们存储在延迟加载的单例中。
假设我想针对三个不同的数据集测试 serialization/deserializtion 例程。这导致 2x3=6 个基准。
理想情况下,我想实现以下目标:
- 避免代码重复
- 每次可执行调用仅调用一次数据集生成器函数,并且仅在未被
--benchmark_filter=...
排除时调用(生成器函数很昂贵) - 有意义的基准名称(例如 "Serialize/DatasetAlpha")
指南中提到的功能似乎都不完全符合目的。
到目前为止,我找到的最接近的解决方案是使用可变参数化的 Serialize()
/Deserialize()
函数以及生成器函数,这将 return 生成数据作为单例。
有没有更好的方法?
这是我想避免的:
#include <benchmark/benchmark.h>
/* library */
std::string serialize(const std::string& data) {
return data;
}
std::string deserialize(const std::string& data) {
return data;
}
/* helpers */
void SerializeHelper(benchmark::State& state, const std::string& data) {
for (auto _ : state) {
std::string bytes = serialize(data);
benchmark::DoNotOptimize(bytes);
}
}
void DeserializeHelper(benchmark::State& state, const std::string& data) {
std::string bytes = serialize(data);
for (auto _ : state) {
std::string data_out = deserialize(data);
benchmark::DoNotOptimize(data_out);
}
}
std::string GenerateDatasetAlpha() {
return "";
}
std::string GenerateDatasetBeta() {
return "";
}
std::string GenerateDatasetGamma() {
return "";
}
/* oh, my... */
void SerializeAlpha(benchmark::State& state) {
SerializeHelper(state, GenerateDatasetAlpha());
}
void DeserializeAlpha(benchmark::State& state) {
DeserializeHelper(state, GenerateDatasetAlpha());
}
void SerializeBeta(benchmark::State& state) {
SerializeHelper(state, GenerateDatasetBeta());
}
void DeserializeBeta(benchmark::State& state) {
DeserializeHelper(state, GenerateDatasetBeta());
}
void SerializeGamma(benchmark::State& state) {
SerializeHelper(state, GenerateDatasetGamma());
}
void DeserializeGamma(benchmark::State& state) {
DeserializeHelper(state, GenerateDatasetGamma());
}
BENCHMARK(SerializeAlpha);
BENCHMARK(DeserializeAlpha);
BENCHMARK(SerializeBeta);
BENCHMARK(DeserializeBeta);
BENCHMARK(SerializeGamma);
BENCHMARK(DeserializeGamma);
BENCHMARK_MAIN();
//g++ wtf.cc -o wtf -I benchmark/include/ -lbenchmark -L benchmark/build/src -lpthread -O3
到目前为止我找到的最接近的解决方案是使用带有每个数据集生成器的模板基准测试 类:
#include <benchmark/benchmark.h>
/* library */
std::string serialize(const std::string& data) {
return data;
}
std::string deserialize(const std::string& data) {
return data;
}
/* benchmarks routines */
template<typename Dataset>
void SerializeBenchmark(benchmark::State& state) {
std::string data = Dataset()();
for (auto _ : state) {
std::string bytes = serialize(data);
benchmark::DoNotOptimize(bytes);
}
}
template<typename Dataset>
void DeserializeBenchmark(benchmark::State& state) {
std::string data = Dataset()();
std::string bytes = serialize(data);
for (auto _ : state) {
std::string data_out = deserialize(data);
benchmark::DoNotOptimize(data_out);
}
}
/* datasets generators and benchmark registration */
struct Dataset1 {
std::string operator()() {
return ""; // load from file, generate random data, etc
}
};
BENCHMARK_TEMPLATE(SerializeBenchmark, Dataset1);
BENCHMARK_TEMPLATE(DeserializeBenchmark, Dataset1);
struct Dataset2 {
std::string operator()() { return ""; }
};
BENCHMARK_TEMPLATE(SerializeBenchmark, Dataset2);
BENCHMARK_TEMPLATE(DeserializeBenchmark, Dataset2);
struct Dataset3 {
std::string operator()() { return ""; }
};
BENCHMARK_TEMPLATE(SerializeBenchmark, Dataset3);
BENCHMARK_TEMPLATE(DeserializeBenchmark, Dataset3);
BENCHMARK_MAIN();
这使代码量膨胀保持在相当低的水平。基准名称也很好,例如SerializeBenchmark<Dataset2>
。数据集生成函数仍然会被多次调用,所以如果你想避免这种情况,你将不得不将它们存储在延迟加载的单例中。