| Index: bench/nanobench.cpp
|
| diff --git a/bench/nanobench.cpp b/bench/nanobench.cpp
|
| index 77df430c07b6c70bc78d89ed88ad43e4b4bc49d5..a3dbf22306105e9754df5fec0ed6c3cdcef4429f 100644
|
| --- a/bench/nanobench.cpp
|
| +++ b/bench/nanobench.cpp
|
| @@ -5,6 +5,8 @@
|
| * found in the LICENSE file.
|
| */
|
|
|
| +#include <ctype.h>
|
| +
|
| #include "Benchmark.h"
|
| #include "CrashHandler.h"
|
| #include "Stats.h"
|
| @@ -17,6 +19,11 @@
|
| #include "SkString.h"
|
| #include "SkSurface.h"
|
|
|
| +#if SK_SUPPORT_GPU
|
| + #include "GrContextFactory.h"
|
| + GrContextFactory gGrFactory;
|
| +#endif
|
| +
|
| __SK_FORCE_IMAGE_DECODER_LINKING;
|
|
|
| DEFINE_int32(samples, 10, "Number of samples to measure for each bench.");
|
| @@ -26,10 +33,11 @@ DEFINE_double(overheadGoal, 0.0001,
|
| DEFINE_string(match, "", "The usual filters on file names of benchmarks to measure.");
|
| DEFINE_bool2(quiet, q, false, "Print only bench name and minimum sample.");
|
| DEFINE_bool2(verbose, v, false, "Print all samples.");
|
| -DEFINE_string(config, "8888 nonrendering",
|
| - "Configs to measure. Options: 565 8888 nonrendering");
|
| +DEFINE_string(config, "nonrendering 8888 gpu", "Configs to measure. Options: "
|
| + "565 8888 gpu nonrendering debug nullgpu msaa4 msaa16 nvprmsaa4 nvprmsaa16 angle");
|
| +DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU.");
|
| +DEFINE_int32(gpuFrameLag, 5, "Overestimate of maximum number of frames GPU allows to lag.");
|
|
|
| -// TODO: GPU benches
|
|
|
| static SkString humanize(double ms) {
|
| if (ms > 1e+3) return SkStringPrintf("%.3gs", ms/1e3);
|
| @@ -38,86 +46,175 @@ static SkString humanize(double ms) {
|
| return SkStringPrintf("%.3gms", ms);
|
| }
|
|
|
| -static double estimate_timer_overhead() {
|
| - double overhead = 0;
|
| +static double time(int loops, Benchmark* bench, SkCanvas* canvas, SkGLContextHelper* gl) {
|
| WallTimer timer;
|
| - for (int i = 0; i < FLAGS_overheadLoops; i++) {
|
| - timer.start();
|
| - timer.end();
|
| - overhead += timer.fWall;
|
| + timer.start();
|
| + if (bench) {
|
| + bench->draw(loops, canvas);
|
| }
|
| - return overhead / FLAGS_overheadLoops;
|
| -}
|
| -
|
| -static void safe_flush(SkCanvas* canvas) {
|
| if (canvas) {
|
| canvas->flush();
|
| }
|
| +#if SK_SUPPORT_GPU
|
| + if (gl) {
|
| + SK_GL(*gl, Flush());
|
| + gl->swapBuffers();
|
| + }
|
| +#endif
|
| + timer.end();
|
| + return timer.fWall;
|
| }
|
|
|
| -static int guess_loops(double overhead, Benchmark* bench, SkCanvas* canvas) {
|
| - WallTimer timer;
|
| +static double estimate_timer_overhead() {
|
| + double overhead = 0;
|
| + for (int i = 0; i < FLAGS_overheadLoops; i++) {
|
| + overhead += time(1, NULL, NULL, NULL);
|
| + }
|
| + return overhead / FLAGS_overheadLoops;
|
| +}
|
|
|
| - // Measure timer overhead and bench time together.
|
| +static int cpu_bench(const double overhead, Benchmark* bench, SkCanvas* canvas, double* samples) {
|
| + // First figure out approximately how many loops of bench it takes to make overhead negligible.
|
| + double bench_plus_overhead;
|
| do {
|
| - timer.start();
|
| - bench->draw(1, canvas);
|
| - safe_flush(canvas);
|
| - timer.end();
|
| - } while (timer.fWall < overhead); // Shouldn't normally happen.
|
| + bench_plus_overhead = time(1, bench, canvas, NULL);
|
| + } while (bench_plus_overhead < overhead); // Shouldn't normally happen.
|
|
|
| - // Later we'll just start and stop the timer once, but loop N times.
|
| + // Later we'll just start and stop the timer once but loop N times.
|
| // We'll pick N to make timer overhead negligible:
|
| //
|
| - // Timer Overhead
|
| - // ------------------------------- < FLAGS_overheadGoal
|
| - // Timer Overhead + N * Bench Time
|
| + // overhead
|
| + // ------------------------- < FLAGS_overheadGoal
|
| + // overhead + N * Bench Time
|
| //
|
| - // where timer.fWall ≈ Timer Overhead + Bench Time.
|
| + // where bench_plus_overhead ≈ overhead + Bench Time.
|
| //
|
| // Doing some math, we get:
|
| //
|
| - // (Timer Overhead / FLAGS_overheadGoal) - Timer Overhead
|
| - // ----------------------------------------------------- < N
|
| - // (timer.fWall - Timer Overhead)
|
| + // (overhead / FLAGS_overheadGoal) - overhead
|
| + // ------------------------------------------ < N
|
| + // bench_plus_overhead - overhead)
|
| //
|
| // Luckily, this also works well in practice. :)
|
| const double numer = overhead / FLAGS_overheadGoal - overhead;
|
| - const double denom = timer.fWall - overhead;
|
| - return (int)ceil(numer / denom);
|
| + const double denom = bench_plus_overhead - overhead;
|
| + const int loops = (int)ceil(numer / denom);
|
| +
|
| + for (int i = 0; i < FLAGS_samples; i++) {
|
| + samples[i] = time(loops, bench, canvas, NULL) / loops;
|
| + }
|
| + return loops;
|
| }
|
|
|
| -static bool push_config_if_enabled(const char* config, SkTDArray<const char*>* configs) {
|
| - if (FLAGS_config.contains(config)) {
|
| - configs->push(config);
|
| - return true;
|
| +#if SK_SUPPORT_GPU
|
| +static int gpu_bench(SkGLContextHelper* gl,
|
| + Benchmark* bench,
|
| + SkCanvas* canvas,
|
| + double* samples) {
|
| + // Make sure we're done with whatever came before.
|
| + SK_GL(*gl, Finish);
|
| +
|
| + // First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs.
|
| + int loops = 1;
|
| + double elapsed = 0;
|
| + do {
|
| + loops *= 2;
|
| + // If the GPU lets frames lag at all, we need to make sure we're timing
|
| + // _this_ round, not still timing last round. We force this by looping
|
| + // more times than any reasonable GPU will allow frames to lag.
|
| + for (int i = 0; i < FLAGS_gpuFrameLag; i++) {
|
| + elapsed = time(loops, bench, canvas, gl);
|
| + }
|
| + } while (elapsed < FLAGS_gpuMs);
|
| +
|
| + // We've overshot at least a little. Scale back linearly.
|
| + loops = (int)ceil(loops * FLAGS_gpuMs / elapsed);
|
| +
|
| + // Might as well make sure we're not still timing our calibration.
|
| + SK_GL(*gl, Finish);
|
| +
|
| + // Pretty much the same deal as the calibration: do some warmup to make
|
| + // sure we're timing steady-state pipelined frames.
|
| + for (int i = 0; i < FLAGS_gpuFrameLag; i++) {
|
| + time(loops, bench, canvas, gl);
|
| }
|
| - return false;
|
| +
|
| + // Now, actually do the timing!
|
| + for (int i = 0; i < FLAGS_samples; i++) {
|
| + samples[i] = time(loops, bench, canvas, gl) / loops;
|
| + }
|
| + return loops;
|
| }
|
| +#endif
|
| +
|
| +static SkString to_lower(const char* str) {
|
| + SkString lower(str);
|
| + for (size_t i = 0; i < lower.size(); i++) {
|
| + lower[i] = tolower(lower[i]);
|
| + }
|
| + return lower;
|
| +}
|
| +
|
| +struct Target {
|
| + const char* config;
|
| + Benchmark::Backend backend;
|
| + SkAutoTDelete<SkSurface> surface;
|
| +#if SK_SUPPORT_GPU
|
| + SkGLContextHelper* gl;
|
| +#endif
|
| +};
|
|
|
| -static void create_surfaces(Benchmark* bench,
|
| - SkTDArray<SkSurface*>* surfaces,
|
| - SkTDArray<const char*>* configs) {
|
| +// If bench is enabled for backend/config, returns a Target* for them, otherwise NULL.
|
| +static Target* is_enabled(Benchmark* bench, Benchmark::Backend backend, const char* config) {
|
| + if (!bench->isSuitableFor(backend)) {
|
| + return NULL;
|
| + }
|
|
|
| - if (bench->isSuitableFor(Benchmark::kNonRendering_Backend)
|
| - && push_config_if_enabled("nonrendering", configs)) {
|
| - surfaces->push(NULL);
|
| + for (int i = 0; i < FLAGS_config.count(); i++) {
|
| + if (to_lower(FLAGS_config[i]).equals(config)) {
|
| + Target* target = new Target;
|
| + target->config = config;
|
| + target->backend = backend;
|
| + return target;
|
| + }
|
| }
|
| + return NULL;
|
| +}
|
|
|
| - if (bench->isSuitableFor(Benchmark::kRaster_Backend)) {
|
| - const int w = bench->getSize().fX,
|
| - h = bench->getSize().fY;
|
| +// Append all targets that are suitable for bench.
|
| +static void create_targets(Benchmark* bench, SkTDArray<Target*>* targets) {
|
| + const int w = bench->getSize().fX,
|
| + h = bench->getSize().fY;
|
| + const SkImageInfo _8888 = { w, h, kN32_SkColorType, kPremul_SkAlphaType },
|
| + _565 = { w, h, kRGB_565_SkColorType, kOpaque_SkAlphaType };
|
|
|
| - if (push_config_if_enabled("8888", configs)) {
|
| - const SkImageInfo info = { w, h, kN32_SkColorType, kPremul_SkAlphaType };
|
| - surfaces->push(SkSurface::NewRaster(info));
|
| + #define CPU_TARGET(config, backend, code) \
|
| + if (Target* t = is_enabled(bench, Benchmark::backend, #config)) { \
|
| + t->surface.reset(code); \
|
| + targets->push(t); \
|
| }
|
| + CPU_TARGET(nonrendering, kNonRendering_Backend, NULL)
|
| + CPU_TARGET(8888, kRaster_Backend, SkSurface::NewRaster(_8888))
|
| + CPU_TARGET(565, kRaster_Backend, SkSurface::NewRaster(_565))
|
|
|
| - if (push_config_if_enabled("565", configs)) {
|
| - const SkImageInfo info = { w, h, kRGB_565_SkColorType, kOpaque_SkAlphaType };
|
| - surfaces->push(SkSurface::NewRaster(info));
|
| +#if SK_SUPPORT_GPU
|
| + #define GPU_TARGET(config, ctxType, info, samples) \
|
| + if (Target* t = is_enabled(bench, Benchmark::kGPU_Backend, #config)) { \
|
| + t->surface.reset(SkSurface::NewRenderTarget(gGrFactory.get(ctxType), info, samples)); \
|
| + t->gl = gGrFactory.getGLContext(ctxType); \
|
| + targets->push(t); \
|
| }
|
| - }
|
| + GPU_TARGET(gpu, GrContextFactory::kNative_GLContextType, _8888, 0)
|
| + GPU_TARGET(msaa4, GrContextFactory::kNative_GLContextType, _8888, 4)
|
| + GPU_TARGET(msaa16, GrContextFactory::kNative_GLContextType, _8888, 16)
|
| + GPU_TARGET(nvprmsaa4, GrContextFactory::kNVPR_GLContextType, _8888, 4)
|
| + GPU_TARGET(nvprmsaa16, GrContextFactory::kNVPR_GLContextType, _8888, 16)
|
| + GPU_TARGET(debug, GrContextFactory::kDebug_GLContextType, _8888, 0)
|
| + GPU_TARGET(nullgpu, GrContextFactory::kNull_GLContextType, _8888, 0)
|
| + #if SK_ANGLE
|
| + GPU_TARGET(angle, GrContextFactory::kANGLE_GLContextType, _8888, 0)
|
| + #endif
|
| +#endif
|
| }
|
|
|
| int tool_main(int argc, char** argv);
|
| @@ -127,13 +224,16 @@ int tool_main(int argc, char** argv) {
|
| SkCommandLineFlags::Parse(argc, argv);
|
|
|
| const double overhead = estimate_timer_overhead();
|
| + SkAutoTMalloc<double> samples(FLAGS_samples);
|
| +
|
| + // TODO: display add median, use it in --quiet mode
|
|
|
| if (FLAGS_verbose) {
|
| // No header.
|
| } else if (FLAGS_quiet) {
|
| SkDebugf("min\tbench\tconfig\n");
|
| } else {
|
| - SkDebugf("loops\tmin\tmean\tmax\tstddev\tbench\tconfig\n");
|
| + SkDebugf("loops\tmin\tmean\tmax\tstddev\tconfig\tbench\n");
|
| }
|
|
|
| for (const BenchRegistry* r = BenchRegistry::Head(); r != NULL; r = r->next()) {
|
| @@ -142,38 +242,31 @@ int tool_main(int argc, char** argv) {
|
| continue;
|
| }
|
|
|
| - SkTDArray<SkSurface*> surfaces;
|
| - SkTDArray<const char*> configs;
|
| - create_surfaces(bench.get(), &surfaces, &configs);
|
| + SkTDArray<Target*> targets;
|
| + create_targets(bench.get(), &targets);
|
|
|
| bench->preDraw();
|
| - for (int j = 0; j < surfaces.count(); j++) {
|
| - SkCanvas* canvas = surfaces[j] ? surfaces[j]->getCanvas() : NULL;
|
| - const char* config = configs[j];
|
| -
|
| - bench->draw(1, canvas); // Just paranoid warmup.
|
| - safe_flush(canvas);
|
| - const int loops = guess_loops(overhead, bench.get(), canvas);
|
| -
|
| - SkAutoTMalloc<double> samples(FLAGS_samples);
|
| - WallTimer timer;
|
| - for (int i = 0; i < FLAGS_samples; i++) {
|
| - timer.start();
|
| - bench->draw(loops, canvas);
|
| - safe_flush(canvas);
|
| - timer.end();
|
| - samples[i] = timer.fWall / loops;
|
| - }
|
| + for (int j = 0; j < targets.count(); j++) {
|
| + SkCanvas* canvas = targets[j]->surface.get() ? targets[j]->surface->getCanvas() : NULL;
|
| +
|
| + const int loops =
|
| +#if SK_SUPPORT_GPU
|
| + Benchmark::kGPU_Backend == targets[j]->backend
|
| + ? gpu_bench(targets[j]->gl, bench.get(), canvas, samples.get())
|
| + :
|
| +#endif
|
| + cpu_bench( overhead, bench.get(), canvas, samples.get());
|
|
|
| Stats stats(samples.get(), FLAGS_samples);
|
|
|
| + const char* config = targets[j]->config;
|
| if (FLAGS_verbose) {
|
| for (int i = 0; i < FLAGS_samples; i++) {
|
| SkDebugf("%s ", humanize(samples[i]).c_str());
|
| }
|
| SkDebugf("%s\n", bench->getName());
|
| } else if (FLAGS_quiet) {
|
| - if (configs.count() == 1) {
|
| + if (targets.count() == 1) {
|
| config = ""; // Only print the config if we run the same bench on more than one.
|
| }
|
| SkDebugf("%s\t%s\t%s\n", humanize(stats.min).c_str(), bench->getName(), config);
|
| @@ -185,12 +278,12 @@ int tool_main(int argc, char** argv) {
|
| , humanize(stats.mean).c_str()
|
| , humanize(stats.max).c_str()
|
| , stddev_percent
|
| - , bench->getName()
|
| , config
|
| + , bench->getName()
|
| );
|
| }
|
| }
|
| - surfaces.deleteAll();
|
| + targets.deleteAll();
|
| }
|
|
|
| return 0;
|
|
|