Index: bench/nanobench.cpp |
diff --git a/bench/nanobench.cpp b/bench/nanobench.cpp |
index 77df430c07b6c70bc78d89ed88ad43e4b4bc49d5..8927d64bb69263b9fb760ba847a9ffdcf36e79a3 100644 |
--- a/bench/nanobench.cpp |
+++ b/bench/nanobench.cpp |
@@ -17,6 +17,11 @@ |
#include "SkString.h" |
#include "SkSurface.h" |
+#if SK_SUPPORT_GPU |
+ #include "GrContextFactory.h" |
+ GrContextFactory gContextFactory; |
+#endif |
+ |
__SK_FORCE_IMAGE_DECODER_LINKING; |
DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); |
@@ -26,10 +31,11 @@ DEFINE_double(overheadGoal, 0.0001, |
DEFINE_string(match, "", "The usual filters on file names of benchmarks to measure."); |
DEFINE_bool2(quiet, q, false, "Print only bench name and minimum sample."); |
DEFINE_bool2(verbose, v, false, "Print all samples."); |
-DEFINE_string(config, "8888 nonrendering", |
- "Configs to measure. Options: 565 8888 nonrendering"); |
+DEFINE_string(config, "565 8888 gpu nonrendering", |
bsalomon
2014/06/30 19:58:12
Does regular bench have the other gpu configs (msa
mtklein_C
2014/06/30 23:05:29
Yup, done.
|
+ "Configs to measure. Options: 565 8888 gpu nonrendering"); |
+DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); |
+DEFINE_int32(gpuFrameLag, 10, "Overestimate of maximum number of frames GPU allows to lag."); |
bsalomon
2014/06/30 19:58:12
I'd default to 5 or fewer.
mtklein_C
2014/06/30 23:05:29
You got it.
|
-// TODO: GPU benches |
static SkString humanize(double ms) { |
if (ms > 1e+3) return SkStringPrintf("%.3gs", ms/1e3); |
@@ -38,53 +44,91 @@ static SkString humanize(double ms) { |
return SkStringPrintf("%.3gms", ms); |
} |
-static double estimate_timer_overhead() { |
- double overhead = 0; |
+static double time(int loops, Benchmark* bench, SkCanvas* canvas, SkGLContextHelper* gl) { |
WallTimer timer; |
- for (int i = 0; i < FLAGS_overheadLoops; i++) { |
- timer.start(); |
- timer.end(); |
- overhead += timer.fWall; |
+ timer.start(); |
+ if (bench) { |
+ bench->draw(loops, canvas); |
} |
- return overhead / FLAGS_overheadLoops; |
-} |
- |
-static void safe_flush(SkCanvas* canvas) { |
if (canvas) { |
canvas->flush(); |
} |
+ if (gl) { |
+ SK_GL(*gl, Flush()); |
+ gl->swapBuffers(); |
+ } |
+ timer.end(); |
+ return timer.fWall; |
} |
-static int guess_loops(double overhead, Benchmark* bench, SkCanvas* canvas) { |
- WallTimer timer; |
+static double estimate_timer_overhead() { |
+ double overhead = 0; |
+ for (int i = 0; i < FLAGS_overheadLoops; i++) { |
+ overhead += time(1, NULL, NULL, NULL); |
+ } |
+ return overhead / FLAGS_overheadLoops; |
+} |
- // Measure timer overhead and bench time together. |
+static int cpu_bench(const double overhead, Benchmark* bench, SkCanvas* canvas, double* samples) { |
+ // First figure out approximately how many loops of bench it takes to make overhead negligible. |
+ double bench_plus_overhead; |
do { |
- timer.start(); |
- bench->draw(1, canvas); |
- safe_flush(canvas); |
- timer.end(); |
- } while (timer.fWall < overhead); // Shouldn't normally happen. |
+ bench_plus_overhead = time(1, bench, canvas, NULL); |
+ } while (bench_plus_overhead < overhead); // Shouldn't normally happen. |
- // Later we'll just start and stop the timer once, but loop N times. |
+ // Later we'll just start and stop the timer once but loop N times. |
// We'll pick N to make timer overhead negligible: |
// |
- // Timer Overhead |
- // ------------------------------- < FLAGS_overheadGoal |
- // Timer Overhead + N * Bench Time |
+ // overhead |
+ // ------------------------- < FLAGS_overheadGoal |
+ // overhead + N * Bench Time |
// |
- // where timer.fWall ≈ Timer Overhead + Bench Time. |
+ // where bench_plus_overhead ≈ overhead + Bench Time. |
// |
// Doing some math, we get: |
// |
- // (Timer Overhead / FLAGS_overheadGoal) - Timer Overhead |
- // ----------------------------------------------------- < N |
- // (timer.fWall - Timer Overhead) |
+ // (overhead / FLAGS_overheadGoal) - overhead |
+ // ------------------------------------------ < N |
+ // bench_plus_overhead - overhead) |
// |
// Luckily, this also works well in practice. :) |
const double numer = overhead / FLAGS_overheadGoal - overhead; |
- const double denom = timer.fWall - overhead; |
- return (int)ceil(numer / denom); |
+ const double denom = bench_plus_overhead - overhead; |
+ const int loops = ceil(numer / denom); |
+ |
+ for (int i = 0; i < FLAGS_samples; i++) { |
+ samples[i] = time(loops, bench, canvas, NULL) / loops; |
+ } |
+ return loops; |
+} |
+ |
+static int gpu_bench(const double overhead, Benchmark* bench, SkCanvas* canvas, double* samples) { |
+ SkGLContextHelper* gl = gContextFactory.getGLContext(GrContextFactory::kNative_GLContextType); |
+ SK_GL(*gl, Finish); |
+ |
+ // First, figure out how many loops it'll take to get up to FLAGS_gpuMs. |
+ int loops = 1; |
+ double elapsed = 0; |
+ do { |
+ loops *= 2; |
+ // TODO: explain |
+ for (int i = 0; i < FLAGS_gpuFrameLag; i++) { |
+ elapsed = time(loops, bench, canvas, gl); |
+ } |
+ } while (elapsed < FLAGS_gpuMs); |
+ |
+ // We've overshot at least a little. Scale back linearly. |
+ loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); |
+ |
+ // TODO: explain |
+ for (int i = 0; i < FLAGS_gpuFrameLag; i++) { |
+ time(loops, bench, canvas, gl); |
+ } |
+ |
+ for (int i = 0; i < FLAGS_samples; i++) { |
+ samples[i] = time(loops, bench, canvas, gl) / loops; |
+ } |
+ return loops; |
} |
static bool push_config_if_enabled(const char* config, SkTDArray<const char*>* configs) { |
@@ -104,22 +148,30 @@ static void create_surfaces(Benchmark* bench, |
surfaces->push(NULL); |
} |
- if (bench->isSuitableFor(Benchmark::kRaster_Backend)) { |
- const int w = bench->getSize().fX, |
- h = bench->getSize().fY; |
+ const int w = bench->getSize().fX, |
+ h = bench->getSize().fY; |
+ const SkImageInfo _8888 = { w, h, kN32_SkColorType, kPremul_SkAlphaType }, |
+ _565 = { w, h, kRGB_565_SkColorType, kOpaque_SkAlphaType }; |
+ if (bench->isSuitableFor(Benchmark::kRaster_Backend)) { |
if (push_config_if_enabled("8888", configs)) { |
- const SkImageInfo info = { w, h, kN32_SkColorType, kPremul_SkAlphaType }; |
- surfaces->push(SkSurface::NewRaster(info)); |
+ surfaces->push(SkSurface::NewRaster(_8888)); |
} |
if (push_config_if_enabled("565", configs)) { |
- const SkImageInfo info = { w, h, kRGB_565_SkColorType, kOpaque_SkAlphaType }; |
- surfaces->push(SkSurface::NewRaster(info)); |
+ surfaces->push(SkSurface::NewRaster(_565)); |
} |
} |
+#if SK_SUPPORT_GPU |
+ if (bench->isSuitableFor(Benchmark::kGPU_Backend) |
bsalomon
2014/06/30 19:58:12
style nit, don't we do && on prev line?
mtklein_C
2014/06/30 23:05:29
Dunno, but Done.
|
+ && push_config_if_enabled("gpu", configs)) { |
+ surfaces->push(SkSurface::NewRenderTarget( |
+ gContextFactory.get(GrContextFactory::kNative_GLContextType), _8888, 0)); |
+ } |
+#endif |
} |
+ |
int tool_main(int argc, char** argv); |
int tool_main(int argc, char** argv) { |
SetupCrashHandler(); |
@@ -127,13 +179,16 @@ int tool_main(int argc, char** argv) { |
SkCommandLineFlags::Parse(argc, argv); |
const double overhead = estimate_timer_overhead(); |
+ SkAutoTMalloc<double> samples(FLAGS_samples); |
+ |
+ // TODO: display add median, use it in --quiet mode |
if (FLAGS_verbose) { |
// No header. |
} else if (FLAGS_quiet) { |
SkDebugf("min\tbench\tconfig\n"); |
} else { |
- SkDebugf("loops\tmin\tmean\tmax\tstddev\tbench\tconfig\n"); |
+ SkDebugf("loops\tmin\tmean\tmax\tstddev\tconfig\tbench\n"); |
} |
for (const BenchRegistry* r = BenchRegistry::Head(); r != NULL; r = r->next()) { |
@@ -151,19 +206,9 @@ int tool_main(int argc, char** argv) { |
SkCanvas* canvas = surfaces[j] ? surfaces[j]->getCanvas() : NULL; |
const char* config = configs[j]; |
- bench->draw(1, canvas); // Just paranoid warmup. |
- safe_flush(canvas); |
- const int loops = guess_loops(overhead, bench.get(), canvas); |
- |
- SkAutoTMalloc<double> samples(FLAGS_samples); |
- WallTimer timer; |
- for (int i = 0; i < FLAGS_samples; i++) { |
- timer.start(); |
- bench->draw(loops, canvas); |
- safe_flush(canvas); |
- timer.end(); |
- samples[i] = timer.fWall / loops; |
- } |
+ const int loops = 0 == strcmp("gpu", config) |
+ ? gpu_bench(overhead, bench.get(), canvas, samples.get()) |
+ : cpu_bench(overhead, bench.get(), canvas, samples.get()); |
Stats stats(samples.get(), FLAGS_samples); |
@@ -185,8 +230,8 @@ int tool_main(int argc, char** argv) { |
, humanize(stats.mean).c_str() |
, humanize(stats.max).c_str() |
, stddev_percent |
- , bench->getName() |
, config |
+ , bench->getName() |
); |
} |
} |