Index: tools/kilobench/kilobench.cpp |
diff --git a/tools/kilobench/kilobench.cpp b/tools/kilobench/kilobench.cpp |
index 3869006c63a6342d43c7a4d777ef6a19fb5bb29d..438e582324fb2e8c97f1da061e7d9a3f8760d870 100644 |
--- a/tools/kilobench/kilobench.cpp |
+++ b/tools/kilobench/kilobench.cpp |
@@ -5,11 +5,18 @@ |
* found in the LICENSE file. |
*/ |
+#include "GrCaps.h" |
+#include "GrContextFactory.h" |
#include "Benchmark.h" |
#include "SkCommandLineFlags.h" |
#include "SkOSFile.h" |
#include "SkStream.h" |
+#include "SkSurface.h" |
+#include "SkTime.h" |
+#include "Stats.h" |
+#include "Timer.h" |
#include "VisualSKPBench.h" |
+#include "gl/GrGLDefines.h" |
/* |
* This is an experimental GPU only benchmarking program. The initial implementation will only |
@@ -21,8 +28,25 @@ |
#include "SkImageDecoder.h" |
__SK_FORCE_IMAGE_DECODER_LINKING; |
-DEFINE_string(skps, "skps", "Directory to read skps from."); |
+static const int kAutoTuneLoops = 0; |
+ |
+static const int kDefaultLoops = |
+#ifdef SK_DEBUG |
+ 1; |
+#else |
+ kAutoTuneLoops; |
+#endif |
+ |
+static SkString loops_help_txt() { |
+ SkString help; |
+ help.printf("Number of times to run each bench. Set this to %d to auto-" |
+ "tune for each bench. Timings are only reported when auto-tuning.", |
+ kAutoTuneLoops); |
+ return help; |
+} |
+ |
+DEFINE_string(skps, "skps", "Directory to read skps from."); |
DEFINE_string2(match, m, nullptr, |
"[~][^]substring[$] [...] of GM name to run.\n" |
"Multiple matches may be separated by spaces.\n" |
@@ -32,6 +56,12 @@ DEFINE_string2(match, m, nullptr, |
"^ and $ requires an exact match\n" |
"If a bench does not match any list entry,\n" |
"it is skipped unless some list entry starts with ~"); |
+DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU allows to lag."); |
+DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); |
+DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this."); |
+DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str()); |
+DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); |
+DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs."); |
namespace kilobench { |
class BenchmarkStream { |
@@ -103,14 +133,265 @@ private: |
int fCurrentSKP; |
}; |
+struct GPUTarget { |
+ void setup() { |
+ this->gl->makeCurrent(); |
+ // Make sure we're done with whatever came before. |
+ SK_GL(*this->gl, Finish()); |
+ } |
+ |
+ SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; } |
+ |
+ void endTiming() { |
+ if (this->gl) { |
+ SK_GL(*this->gl, Flush()); |
+ this->gl->swapBuffers(); |
+ } |
+ } |
+ void fence() { |
+ SK_GL(*this->gl, Finish()); |
+ } |
+ |
+ bool needsFrameTiming(int* maxFrameLag) const { |
+ if (!this->gl->getMaxGpuFrameLag(maxFrameLag)) { |
+ // Frame lag is unknown. |
+ *maxFrameLag = FLAGS_gpuFrameLag; |
+ } |
+ return true; |
+ } |
+ |
+ bool init(Benchmark* bench, GrContextFactory* factory, bool useDfText, |
+ GrContextFactory::GLContextType ctxType, |
+ GrContextFactory::GLContextOptions ctxOptions, int numSamples) { |
+ GrContext* context = factory->get(ctxType, ctxOptions); |
+ int maxRTSize = context->caps()->maxRenderTargetSize(); |
+ SkImageInfo info = SkImageInfo::Make(SkTMin(bench->getSize().fX, maxRTSize), |
+ SkTMin(bench->getSize().fY, maxRTSize), |
+ kN32_SkColorType, kPremul_SkAlphaType); |
+ uint32_t flags = useDfText ? SkSurfaceProps::kUseDeviceIndependentFonts_Flag : |
+ 0; |
+ SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType); |
+ this->surface.reset(SkSurface::NewRenderTarget(context, |
+ SkSurface::kNo_Budgeted, info, |
+ numSamples, &props)); |
+ this->gl = factory->getContextInfo(ctxType, ctxOptions).fGLContext; |
+ if (!this->surface.get()) { |
+ return false; |
+ } |
+ |
+ // Kilobench should only be used on platforms with fence sync support |
+ SkASSERT(this->gl->fenceSyncSupport()); |
+ return true; |
+ } |
+ |
+ SkCanvas* getCanvas() const { |
+ if (!surface.get()) { |
+ return nullptr; |
+ } |
+ return surface->getCanvas(); |
+ } |
+ |
+ bool capturePixels(SkBitmap* bmp) { |
+ SkCanvas* canvas = this->getCanvas(); |
+ if (!canvas) { |
+ return false; |
+ } |
+ bmp->setInfo(canvas->imageInfo()); |
+ if (!canvas->readPixels(bmp, 0, 0)) { |
+ SkDebugf("Can't read canvas pixels.\n"); |
+ return false; |
+ } |
+ return true; |
+ } |
+ |
+private: |
+ //const Config config; |
+ SkGLContext* gl; |
+ SkAutoTDelete<SkSurface> surface; |
+}; |
+ |
+static bool write_canvas_png(GPUTarget* target, const SkString& filename) { |
+ |
+ if (filename.isEmpty()) { |
+ return false; |
+ } |
+ if (target->getCanvas() && |
+ kUnknown_SkColorType == target->getCanvas()->imageInfo().colorType()) { |
+ return false; |
+ } |
+ |
+ SkBitmap bmp; |
+ |
+ if (!target->capturePixels(&bmp)) { |
+ return false; |
+ } |
+ |
+ SkString dir = SkOSPath::Dirname(filename.c_str()); |
+ if (!sk_mkdir(dir.c_str())) { |
+ SkDebugf("Can't make dir %s.\n", dir.c_str()); |
+ return false; |
+ } |
+ SkFILEWStream stream(filename.c_str()); |
+ if (!stream.isValid()) { |
+ SkDebugf("Can't write %s.\n", filename.c_str()); |
+ return false; |
+ } |
+ if (!SkImageEncoder::EncodeStream(&stream, bmp, SkImageEncoder::kPNG_Type, 100)) { |
+ SkDebugf("Can't encode a PNG.\n"); |
+ return false; |
+ } |
+ return true; |
+} |
+ |
+static int detect_forever_loops(int loops) { |
+ // look for a magic run-forever value |
+ if (loops < 0) { |
+ loops = SK_MaxS32; |
+ } |
+ return loops; |
+} |
+ |
+static int clamp_loops(int loops) { |
+ if (loops < 1) { |
+ SkDebugf("ERROR: clamping loops from %d to 1. " |
+ "There's probably something wrong with the bench.\n", loops); |
+ return 1; |
+ } |
+ if (loops > FLAGS_maxLoops) { |
+ SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loops, FLAGS_maxLoops); |
+ return FLAGS_maxLoops; |
+ } |
+ return loops; |
+} |
+ |
+static double now_ms() { return SkTime::GetNSecs() * 1e-6; } |
+static double time(int loops, Benchmark* bench, GPUTarget* target) { |
+ SkCanvas* canvas = target->getCanvas(); |
+ if (canvas) { |
+ canvas->clear(SK_ColorWHITE); |
+ } |
+ bench->preDraw(canvas); |
+ double start = now_ms(); |
+ canvas = target->beginTiming(canvas); |
+ bench->draw(loops, canvas); |
+ if (canvas) { |
+ canvas->flush(); |
+ } |
+ target->endTiming(); |
+ double elapsed = now_ms() - start; |
+ bench->postDraw(canvas); |
+ return elapsed; |
+} |
+ |
+static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameLag) { |
+ // First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs. |
+ int loops = bench->calculateLoops(FLAGS_loops); |
+ if (kAutoTuneLoops == loops) { |
+ loops = 1; |
+ double elapsed = 0; |
+ do { |
+ if (1<<30 == loops) { |
+ // We're about to wrap. Something's wrong with the bench. |
+ loops = 0; |
+ break; |
+ } |
+ loops *= 2; |
+ // If the GPU lets frames lag at all, we need to make sure we're timing |
+ // _this_ round, not still timing last round. |
+ for (int i = 0; i < maxGpuFrameLag; i++) { |
+ elapsed = time(loops, bench, target); |
+ } |
+ } while (elapsed < FLAGS_gpuMs); |
+ |
+ // We've overshot at least a little. Scale back linearly. |
+ loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); |
+ loops = clamp_loops(loops); |
+ |
+ // Make sure we're not still timing our calibration. |
+ target->fence(); |
+ } else { |
+ loops = detect_forever_loops(loops); |
+ } |
+ |
+ // Pretty much the same deal as the calibration: do some warmup to make |
+ // sure we're timing steady-state pipelined frames. |
+ for (int i = 0; i < maxGpuFrameLag - 1; i++) { |
+ time(loops, bench, target); |
+ } |
+ |
+ return loops; |
+} |
+ |
+static SkString humanize(double ms) { |
+ return HumanizeMs(ms); |
+} |
+#define HUMANIZE(ms) humanize(ms).c_str() |
+ |
+void benchmark_inner_loop(Benchmark* bench, GrContextFactory* ctxFactory) { |
+ SkTArray<double> samples; |
+ GPUTarget target; |
+ SkAssertResult(target.init(bench, ctxFactory, false, |
+ GrContextFactory::kNative_GLContextType, |
+ GrContextFactory::kNone_GLContextOptions, 0)); |
+ |
+ SkCanvas* canvas = target.getCanvas(); |
+ target.setup(); |
+ |
+ bench->perCanvasPreDraw(canvas); |
+ int maxFrameLag; |
+ target.needsFrameTiming(&maxFrameLag); |
+ int loops = setup_gpu_bench(&target, bench, maxFrameLag); |
+ |
+ samples.reset(FLAGS_samples); |
+ for (int s = 0; s < FLAGS_samples; s++) { |
+ samples[s] = time(loops, bench, &target) / loops; |
+ } |
+ |
+ bench->perCanvasPostDraw(canvas); |
+ |
+ Stats stats(samples); |
+ const double stddev_percent = 100 * sqrt(stats.var) / stats.mean; |
+ SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n" |
+ , loops |
+ , HUMANIZE(stats.min) |
+ , HUMANIZE(stats.median) |
+ , HUMANIZE(stats.mean) |
+ , HUMANIZE(stats.max) |
+ , stddev_percent |
+ , stats.plot.c_str() |
+ , "gpu" |
+ , bench->getUniqueName() |
+ ); |
+ |
+ if (!FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) { |
+ SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], "gpu"); |
+ pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName()); |
+ pngFilename.append(".png"); |
+ write_canvas_png(&target, pngFilename); |
+ } |
+} |
+ |
} // namespace kilobench |
int kilobench_main() { |
+ SkAutoTDelete<GrContextFactory> ctxFactory; |
+ |
+ GrContextOptions grContextOpts; |
+ ctxFactory.reset(new GrContextFactory(grContextOpts)); |
+ |
kilobench::BenchmarkStream benchStream; |
+ |
+ SkDebugf("loops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n", |
+ FLAGS_samples, "samples"); |
+ |
while (Benchmark* b = benchStream.next()) { |
SkAutoTDelete<Benchmark> bench(b); |
- // TODO actual stuff |
+ kilobench::benchmark_inner_loop(bench.get(), ctxFactory.get()); |
} |
+ |
+ // Make sure we clean up the global GrContextFactory here, otherwise we might race with the |
+ // SkEventTracer destructor |
+ ctxFactory.reset(nullptr); |
return 0; |
} |