Test program:
#include <benchmark/benchmark.h>
#include <cmath>
#include <cstdio>
void BM_stdPowSimple(benchmark::State& state)
{
const double n = 2.00;
double ret = std::pow(10, n);
printf("Before %f %f\n", n, ret);
for (auto _ : state) {
printf("Step 1 %f\n", ret);
ret = std::pow(10, n);
printf("Step 2 %f\n", ret);
benchmark::DoNotOptimize(ret);
printf("Step 3 %f\n", ret);
}
printf("After %f %f\n", n, ret);
}
BENCHMARK(BM_stdPowSimple)->Iterations(1);
Output:
Before 2.000000 100.000000
Step 1 100.000000
Step 2 100.000000
Step 3 0.000000
After 2.000000 0.000000
ret should always be 100, but it become zero after benchmark::DoNotOptimize(ret);.
If I comment benchmark::DoNotOptimize(ret);, works as expect:
#include <benchmark/benchmark.h>
#include <cmath>
#include <cstdio>
void BM_stdPowSimple(benchmark::State& state)
{
const double n = 2.00;
double ret = std::pow(10, n);
printf("Before %f %f\n", n, ret);
for (auto _ : state) {
printf("Step 1 %f\n", ret);
ret = std::pow(10, n);
printf("Step 2 %f\n", ret);
// benchmark::DoNotOptimize(ret);
printf("Step 3 %f\n", ret);
}
printf("After %f %f\n", n, ret);
}
BENCHMARK(BM_stdPowSimple)->Iterations(1);
Before 2.000000 100.000000
Step 1 100.000000
Step 2 100.000000
Step 3 100.000000
After 2.000000 100.000000
Or if I declare ret as volatile double, the result is correct too:
#include <benchmark/benchmark.h>
#include <cmath>
#include <cstdio>
void BM_stdPowSimple(benchmark::State& state)
{
const double n = 2.00;
volatile double ret = std::pow(10, n);
printf("Before %f %f\n", n, ret);
for (auto _ : state) {
printf("Step 1 %f\n", ret);
ret = std::pow(10, n);
printf("Step 2 %f\n", ret);
benchmark::DoNotOptimize(ret);
printf("Step 3 %f\n", ret);
}
printf("After %f %f\n", n, ret);
}
BENCHMARK(BM_stdPowSimple)->Iterations(1);
Before 2.000000 100.000000
Step 1 100.000000
Step 2 100.000000
Step 3 100.000000
After 2.000000 100.000000
Compiler: g++ (GCC) 12.2.1 20221121 (Red Hat 12.2.1-4)
benchmark::DoNotOptimize() implementation:
template <class Tp>
inline BENCHMARK_ALWAYS_INLINE
typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
(sizeof(Tp) <= sizeof(Tp*))>::type
DoNotOptimize(Tp& value) {
asm volatile("" : "+m,r"(value) : : "memory");
}
I'm not sure whether it's about google benchmark or g++ compiler. DoNotOptimize() function is very simple, I don't understand why adding it would lead to the incorrect result.