bench/nanobench.cpp - Issue 359473004: draft gpu support in nanobench

Unified Diff: bench/nanobench.cpp

Issue 359473004: draft gpu support in nanobench (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: winders Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: bench/nanobench.cpp

diff --git a/bench/nanobench.cpp b/bench/nanobench.cpp

index 77df430c07b6c70bc78d89ed88ad43e4b4bc49d5..a3dbf22306105e9754df5fec0ed6c3cdcef4429f 100644

--- a/bench/nanobench.cpp

+++ b/bench/nanobench.cpp

@@ -5,6 +5,8 @@

* found in the LICENSE file.

+#include <ctype.h>

#include "Benchmark.h"

#include "CrashHandler.h"

#include "Stats.h"

@@ -17,6 +19,11 @@

#include "SkString.h"

#include "SkSurface.h"

+#if SK_SUPPORT_GPU

+ #include "GrContextFactory.h"

+ GrContextFactory gGrFactory;

+#endif

__SK_FORCE_IMAGE_DECODER_LINKING;

DEFINE_int32(samples, 10, "Number of samples to measure for each bench.");

@@ -26,10 +33,11 @@ DEFINE_double(overheadGoal, 0.0001,

DEFINE_string(match, "", "The usual filters on file names of benchmarks to measure.");

DEFINE_bool2(quiet, q, false, "Print only bench name and minimum sample.");

DEFINE_bool2(verbose, v, false, "Print all samples.");

-DEFINE_string(config, "8888 nonrendering",

- "Configs to measure. Options: 565 8888 nonrendering");

+DEFINE_string(config, "nonrendering 8888 gpu", "Configs to measure. Options: "

+ "565 8888 gpu nonrendering debug nullgpu msaa4 msaa16 nvprmsaa4 nvprmsaa16 angle");

+DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU.");

+DEFINE_int32(gpuFrameLag, 5, "Overestimate of maximum number of frames GPU allows to lag.");

-// TODO: GPU benches

static SkString humanize(double ms) {

if (ms > 1e+3) return SkStringPrintf("%.3gs", ms/1e3);

@@ -38,86 +46,175 @@ static SkString humanize(double ms) {

return SkStringPrintf("%.3gms", ms);

}

-static double estimate_timer_overhead() {

- double overhead = 0;

+static double time(int loops, Benchmark* bench, SkCanvas* canvas, SkGLContextHelper* gl) {

WallTimer timer;

- for (int i = 0; i < FLAGS_overheadLoops; i++) {

- timer.start();

- timer.end();

- overhead += timer.fWall;

+ timer.start();

+ if (bench) {

+ bench->draw(loops, canvas);

}

- return overhead / FLAGS_overheadLoops;

-static void safe_flush(SkCanvas* canvas) {

if (canvas) {

canvas->flush();

}

+#if SK_SUPPORT_GPU

+ if (gl) {

+ SK_GL(*gl, Flush());

+ gl->swapBuffers();

+ }

+#endif

+ timer.end();

+ return timer.fWall;

}

-static int guess_loops(double overhead, Benchmark* bench, SkCanvas* canvas) {

- WallTimer timer;

+static double estimate_timer_overhead() {

+ double overhead = 0;

+ for (int i = 0; i < FLAGS_overheadLoops; i++) {

+ overhead += time(1, NULL, NULL, NULL);

+ }

+ return overhead / FLAGS_overheadLoops;

- // Measure timer overhead and bench time together.

+static int cpu_bench(const double overhead, Benchmark* bench, SkCanvas* canvas, double* samples) {

+ // First figure out approximately how many loops of bench it takes to make overhead negligible.

+ double bench_plus_overhead;

do {

- timer.start();

- bench->draw(1, canvas);

- safe_flush(canvas);

- timer.end();

- } while (timer.fWall < overhead); // Shouldn't normally happen.

+ bench_plus_overhead = time(1, bench, canvas, NULL);

+ } while (bench_plus_overhead < overhead); // Shouldn't normally happen.

- // Later we'll just start and stop the timer once, but loop N times.

+ // Later we'll just start and stop the timer once but loop N times.

// We'll pick N to make timer overhead negligible:

- // Timer Overhead

- // ------------------------------- < FLAGS_overheadGoal

- // Timer Overhead + N * Bench Time

+ // overhead

+ // ------------------------- < FLAGS_overheadGoal

+ // overhead + N * Bench Time

- // where timer.fWall ≈ Timer Overhead + Bench Time.

+ // where bench_plus_overhead ≈ overhead + Bench Time.

// Doing some math, we get:

- // (Timer Overhead / FLAGS_overheadGoal) - Timer Overhead

- // ----------------------------------------------------- < N

- // (timer.fWall - Timer Overhead)

+ // (overhead / FLAGS_overheadGoal) - overhead

+ // ------------------------------------------ < N

+ // bench_plus_overhead - overhead)

// Luckily, this also works well in practice. :)

const double numer = overhead / FLAGS_overheadGoal - overhead;

- const double denom = timer.fWall - overhead;

- return (int)ceil(numer / denom);

+ const double denom = bench_plus_overhead - overhead;

+ const int loops = (int)ceil(numer / denom);

+ for (int i = 0; i < FLAGS_samples; i++) {

+ samples[i] = time(loops, bench, canvas, NULL) / loops;

+ }

+ return loops;

}

-static bool push_config_if_enabled(const char* config, SkTDArray<const char*>* configs) {

- if (FLAGS_config.contains(config)) {

- configs->push(config);

- return true;

+#if SK_SUPPORT_GPU

+static int gpu_bench(SkGLContextHelper* gl,

+ Benchmark* bench,

+ SkCanvas* canvas,

+ double* samples) {

+ // Make sure we're done with whatever came before.

+ SK_GL(*gl, Finish);

+ // First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs.

+ int loops = 1;

+ double elapsed = 0;

+ do {

+ loops *= 2;

+ // If the GPU lets frames lag at all, we need to make sure we're timing

+ // _this_ round, not still timing last round. We force this by looping

+ // more times than any reasonable GPU will allow frames to lag.

+ for (int i = 0; i < FLAGS_gpuFrameLag; i++) {

+ elapsed = time(loops, bench, canvas, gl);

+ }

+ } while (elapsed < FLAGS_gpuMs);

+ // We've overshot at least a little. Scale back linearly.

+ loops = (int)ceil(loops * FLAGS_gpuMs / elapsed);

+ // Might as well make sure we're not still timing our calibration.

+ SK_GL(*gl, Finish);

+ // Pretty much the same deal as the calibration: do some warmup to make

+ // sure we're timing steady-state pipelined frames.

+ for (int i = 0; i < FLAGS_gpuFrameLag; i++) {

+ time(loops, bench, canvas, gl);

}

- return false;

+ // Now, actually do the timing!

+ for (int i = 0; i < FLAGS_samples; i++) {

+ samples[i] = time(loops, bench, canvas, gl) / loops;

+ }

+ return loops;

}

+#endif

+static SkString to_lower(const char* str) {

+ SkString lower(str);

+ for (size_t i = 0; i < lower.size(); i++) {

+ lower[i] = tolower(lower[i]);

+ }

+ return lower;

+struct Target {

+ const char* config;

+ Benchmark::Backend backend;

+ SkAutoTDelete<SkSurface> surface;

+#if SK_SUPPORT_GPU

+ SkGLContextHelper* gl;

+#endif

+};

-static void create_surfaces(Benchmark* bench,

- SkTDArray<SkSurface*>* surfaces,

- SkTDArray<const char*>* configs) {

+// If bench is enabled for backend/config, returns a Target* for them, otherwise NULL.

+static Target* is_enabled(Benchmark* bench, Benchmark::Backend backend, const char* config) {

+ if (!bench->isSuitableFor(backend)) {

+ return NULL;

+ }

- if (bench->isSuitableFor(Benchmark::kNonRendering_Backend)

- && push_config_if_enabled("nonrendering", configs)) {

- surfaces->push(NULL);

+ for (int i = 0; i < FLAGS_config.count(); i++) {

+ if (to_lower(FLAGS_config[i]).equals(config)) {

+ Target* target = new Target;

+ target->config = config;

+ target->backend = backend;

+ return target;

+ }

}

+ return NULL;

- if (bench->isSuitableFor(Benchmark::kRaster_Backend)) {

- const int w = bench->getSize().fX,

- h = bench->getSize().fY;

+// Append all targets that are suitable for bench.

+static void create_targets(Benchmark* bench, SkTDArray<Target*>* targets) {

+ const int w = bench->getSize().fX,

+ h = bench->getSize().fY;

+ const SkImageInfo _8888 = { w, h, kN32_SkColorType, kPremul_SkAlphaType },

+ _565 = { w, h, kRGB_565_SkColorType, kOpaque_SkAlphaType };

- if (push_config_if_enabled("8888", configs)) {

- const SkImageInfo info = { w, h, kN32_SkColorType, kPremul_SkAlphaType };

- surfaces->push(SkSurface::NewRaster(info));

+ #define CPU_TARGET(config, backend, code) \

+ if (Target* t = is_enabled(bench, Benchmark::backend, #config)) { \

+ t->surface.reset(code); \

+ targets->push(t); \

}

+ CPU_TARGET(nonrendering, kNonRendering_Backend, NULL)

+ CPU_TARGET(8888, kRaster_Backend, SkSurface::NewRaster(_8888))

+ CPU_TARGET(565, kRaster_Backend, SkSurface::NewRaster(_565))

- if (push_config_if_enabled("565", configs)) {

- const SkImageInfo info = { w, h, kRGB_565_SkColorType, kOpaque_SkAlphaType };

- surfaces->push(SkSurface::NewRaster(info));

+#if SK_SUPPORT_GPU

+ #define GPU_TARGET(config, ctxType, info, samples) \

+ if (Target* t = is_enabled(bench, Benchmark::kGPU_Backend, #config)) { \

+ t->surface.reset(SkSurface::NewRenderTarget(gGrFactory.get(ctxType), info, samples)); \

+ t->gl = gGrFactory.getGLContext(ctxType); \

+ targets->push(t); \

}

- }

+ GPU_TARGET(gpu, GrContextFactory::kNative_GLContextType, _8888, 0)

+ GPU_TARGET(msaa4, GrContextFactory::kNative_GLContextType, _8888, 4)

+ GPU_TARGET(msaa16, GrContextFactory::kNative_GLContextType, _8888, 16)

+ GPU_TARGET(nvprmsaa4, GrContextFactory::kNVPR_GLContextType, _8888, 4)

+ GPU_TARGET(nvprmsaa16, GrContextFactory::kNVPR_GLContextType, _8888, 16)

+ GPU_TARGET(debug, GrContextFactory::kDebug_GLContextType, _8888, 0)

+ GPU_TARGET(nullgpu, GrContextFactory::kNull_GLContextType, _8888, 0)

+ #if SK_ANGLE

+ GPU_TARGET(angle, GrContextFactory::kANGLE_GLContextType, _8888, 0)

+ #endif

+#endif

}

int tool_main(int argc, char** argv);

@@ -127,13 +224,16 @@ int tool_main(int argc, char** argv) {

SkCommandLineFlags::Parse(argc, argv);

const double overhead = estimate_timer_overhead();

+ SkAutoTMalloc<double> samples(FLAGS_samples);

+ // TODO: display add median, use it in --quiet mode

if (FLAGS_verbose) {

// No header.

} else if (FLAGS_quiet) {

SkDebugf("min\tbench\tconfig\n");

} else {

- SkDebugf("loops\tmin\tmean\tmax\tstddev\tbench\tconfig\n");

+ SkDebugf("loops\tmin\tmean\tmax\tstddev\tconfig\tbench\n");

}

for (const BenchRegistry* r = BenchRegistry::Head(); r != NULL; r = r->next()) {

@@ -142,38 +242,31 @@ int tool_main(int argc, char** argv) {

continue;

}

- SkTDArray<SkSurface*> surfaces;

- SkTDArray<const char*> configs;

- create_surfaces(bench.get(), &surfaces, &configs);

+ SkTDArray<Target*> targets;

+ create_targets(bench.get(), &targets);

bench->preDraw();

- for (int j = 0; j < surfaces.count(); j++) {

- SkCanvas* canvas = surfaces[j] ? surfaces[j]->getCanvas() : NULL;

- const char* config = configs[j];

- bench->draw(1, canvas); // Just paranoid warmup.

- safe_flush(canvas);

- const int loops = guess_loops(overhead, bench.get(), canvas);

- SkAutoTMalloc<double> samples(FLAGS_samples);

- WallTimer timer;

- for (int i = 0; i < FLAGS_samples; i++) {

- timer.start();

- bench->draw(loops, canvas);

- safe_flush(canvas);

- timer.end();

- samples[i] = timer.fWall / loops;

- }

+ for (int j = 0; j < targets.count(); j++) {

+ SkCanvas* canvas = targets[j]->surface.get() ? targets[j]->surface->getCanvas() : NULL;

+ const int loops =

+#if SK_SUPPORT_GPU

+ Benchmark::kGPU_Backend == targets[j]->backend

+ ? gpu_bench(targets[j]->gl, bench.get(), canvas, samples.get())

+ :

+#endif

+ cpu_bench( overhead, bench.get(), canvas, samples.get());

Stats stats(samples.get(), FLAGS_samples);

+ const char* config = targets[j]->config;

if (FLAGS_verbose) {

for (int i = 0; i < FLAGS_samples; i++) {

SkDebugf("%s ", humanize(samples[i]).c_str());

}

SkDebugf("%s\n", bench->getName());

} else if (FLAGS_quiet) {

- if (configs.count() == 1) {

+ if (targets.count() == 1) {

config = ""; // Only print the config if we run the same bench on more than one.

}

SkDebugf("%s\t%s\t%s\n", humanize(stats.min).c_str(), bench->getName(), config);

@@ -185,12 +278,12 @@ int tool_main(int argc, char** argv) {

, humanize(stats.mean).c_str()

, humanize(stats.max).c_str()

, stddev_percent

- , bench->getName()

, config

+ , bench->getName()

);

}

- surfaces.deleteAll();

+ targets.deleteAll();

}

return 0;

« no previous file with comments | « bench/ImageFilterDAGBench.cpp ('k') | no next file » | no next file with comments »