OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
| 8 #include "GrCaps.h" |
| 9 #include "GrContextFactory.h" |
8 #include "Benchmark.h" | 10 #include "Benchmark.h" |
9 #include "SkCommandLineFlags.h" | 11 #include "SkCommandLineFlags.h" |
10 #include "SkOSFile.h" | 12 #include "SkOSFile.h" |
11 #include "SkStream.h" | 13 #include "SkStream.h" |
| 14 #include "SkSurface.h" |
| 15 #include "SkTime.h" |
| 16 #include "Stats.h" |
| 17 #include "Timer.h" |
12 #include "VisualSKPBench.h" | 18 #include "VisualSKPBench.h" |
| 19 #include "gl/GrGLDefines.h" |
13 | 20 |
14 /* | 21 /* |
15 * This is an experimental GPU only benchmarking program. The initial implement
ation will only | 22 * This is an experimental GPU only benchmarking program. The initial implement
ation will only |
16 * support SKPs. | 23 * support SKPs. |
17 */ | 24 */ |
18 | 25 |
19 // To get image decoders linked in we have to do the below magic | 26 // To get image decoders linked in we have to do the below magic |
20 #include "SkForceLinking.h" | 27 #include "SkForceLinking.h" |
21 #include "SkImageDecoder.h" | 28 #include "SkImageDecoder.h" |
22 __SK_FORCE_IMAGE_DECODER_LINKING; | 29 __SK_FORCE_IMAGE_DECODER_LINKING; |
23 | 30 |
| 31 |
| 32 static const int kAutoTuneLoops = 0; |
| 33 |
| 34 static const int kDefaultLoops = |
| 35 #ifdef SK_DEBUG |
| 36 1; |
| 37 #else |
| 38 kAutoTuneLoops; |
| 39 #endif |
| 40 |
| 41 static SkString loops_help_txt() { |
| 42 SkString help; |
| 43 help.printf("Number of times to run each bench. Set this to %d to auto-" |
| 44 "tune for each bench. Timings are only reported when auto-tuning
.", |
| 45 kAutoTuneLoops); |
| 46 return help; |
| 47 } |
| 48 |
24 DEFINE_string(skps, "skps", "Directory to read skps from."); | 49 DEFINE_string(skps, "skps", "Directory to read skps from."); |
25 | |
26 DEFINE_string2(match, m, nullptr, | 50 DEFINE_string2(match, m, nullptr, |
27 "[~][^]substring[$] [...] of GM name to run.\n" | 51 "[~][^]substring[$] [...] of GM name to run.\n" |
28 "Multiple matches may be separated by spaces.\n" | 52 "Multiple matches may be separated by spaces.\n" |
29 "~ causes a matching bench to always be skipped\n" | 53 "~ causes a matching bench to always be skipped\n" |
30 "^ requires the start of the bench to match\n" | 54 "^ requires the start of the bench to match\n" |
31 "$ requires the end of the bench to match\n" | 55 "$ requires the end of the bench to match\n" |
32 "^ and $ requires an exact match\n" | 56 "^ and $ requires an exact match\n" |
33 "If a bench does not match any list entry,\n" | 57 "If a bench does not match any list entry,\n" |
34 "it is skipped unless some list entry starts with ~"); | 58 "it is skipped unless some list entry starts with ~"); |
| 59 DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU
allows to lag."); |
| 60 DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); |
| 61 DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this."); |
| 62 DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str()); |
| 63 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); |
| 64 DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs."); |
35 | 65 |
36 namespace kilobench { | 66 namespace kilobench { |
37 class BenchmarkStream { | 67 class BenchmarkStream { |
38 public: | 68 public: |
39 BenchmarkStream() : fCurrentSKP(0) { | 69 BenchmarkStream() : fCurrentSKP(0) { |
40 for (int i = 0; i < FLAGS_skps.count(); i++) { | 70 for (int i = 0; i < FLAGS_skps.count(); i++) { |
41 if (SkStrEndsWith(FLAGS_skps[i], ".skp")) { | 71 if (SkStrEndsWith(FLAGS_skps[i], ".skp")) { |
42 fSKPs.push_back() = FLAGS_skps[i]; | 72 fSKPs.push_back() = FLAGS_skps[i]; |
43 } else { | 73 } else { |
44 SkOSFile::Iter it(FLAGS_skps[i], ".skp"); | 74 SkOSFile::Iter it(FLAGS_skps[i], ".skp"); |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
96 return new VisualSKPBench(name.c_str(), pic.get()); | 126 return new VisualSKPBench(name.c_str(), pic.get()); |
97 } | 127 } |
98 | 128 |
99 return nullptr; | 129 return nullptr; |
100 } | 130 } |
101 | 131 |
102 SkTArray<SkString> fSKPs; | 132 SkTArray<SkString> fSKPs; |
103 int fCurrentSKP; | 133 int fCurrentSKP; |
104 }; | 134 }; |
105 | 135 |
| 136 struct GPUTarget { |
| 137 void setup() { |
| 138 this->gl->makeCurrent(); |
| 139 // Make sure we're done with whatever came before. |
| 140 SK_GL(*this->gl, Finish()); |
| 141 } |
| 142 |
| 143 SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; } |
| 144 |
| 145 void endTiming() { |
| 146 if (this->gl) { |
| 147 SK_GL(*this->gl, Flush()); |
| 148 this->gl->swapBuffers(); |
| 149 } |
| 150 } |
| 151 void fence() { |
| 152 SK_GL(*this->gl, Finish()); |
| 153 } |
| 154 |
| 155 bool needsFrameTiming(int* maxFrameLag) const { |
| 156 if (!this->gl->getMaxGpuFrameLag(maxFrameLag)) { |
| 157 // Frame lag is unknown. |
| 158 *maxFrameLag = FLAGS_gpuFrameLag; |
| 159 } |
| 160 return true; |
| 161 } |
| 162 |
| 163 bool init(Benchmark* bench, GrContextFactory* factory, bool useDfText, |
| 164 GrContextFactory::GLContextType ctxType, |
| 165 GrContextFactory::GLContextOptions ctxOptions, int numSamples) { |
| 166 GrContext* context = factory->get(ctxType, ctxOptions); |
| 167 int maxRTSize = context->caps()->maxRenderTargetSize(); |
| 168 SkImageInfo info = SkImageInfo::Make(SkTMin(bench->getSize().fX, maxRTSi
ze), |
| 169 SkTMin(bench->getSize().fY, maxRTSi
ze), |
| 170 kN32_SkColorType, kPremul_SkAlphaT
ype); |
| 171 uint32_t flags = useDfText ? SkSurfaceProps::kUseDeviceIndependentFonts_
Flag : |
| 172 0; |
| 173 SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType); |
| 174 this->surface.reset(SkSurface::NewRenderTarget(context, |
| 175 SkSurface::kNo_Budgeted,
info, |
| 176 numSamples, &props)); |
| 177 this->gl = factory->getContextInfo(ctxType, ctxOptions).fGLContext; |
| 178 if (!this->surface.get()) { |
| 179 return false; |
| 180 } |
| 181 |
| 182 // Kilobench should only be used on platforms with fence sync support |
| 183 SkASSERT(this->gl->fenceSyncSupport()); |
| 184 return true; |
| 185 } |
| 186 |
| 187 SkCanvas* getCanvas() const { |
| 188 if (!surface.get()) { |
| 189 return nullptr; |
| 190 } |
| 191 return surface->getCanvas(); |
| 192 } |
| 193 |
| 194 bool capturePixels(SkBitmap* bmp) { |
| 195 SkCanvas* canvas = this->getCanvas(); |
| 196 if (!canvas) { |
| 197 return false; |
| 198 } |
| 199 bmp->setInfo(canvas->imageInfo()); |
| 200 if (!canvas->readPixels(bmp, 0, 0)) { |
| 201 SkDebugf("Can't read canvas pixels.\n"); |
| 202 return false; |
| 203 } |
| 204 return true; |
| 205 } |
| 206 |
| 207 private: |
| 208 //const Config config; |
| 209 SkGLContext* gl; |
| 210 SkAutoTDelete<SkSurface> surface; |
| 211 }; |
| 212 |
| 213 static bool write_canvas_png(GPUTarget* target, const SkString& filename) { |
| 214 |
| 215 if (filename.isEmpty()) { |
| 216 return false; |
| 217 } |
| 218 if (target->getCanvas() && |
| 219 kUnknown_SkColorType == target->getCanvas()->imageInfo().colorType()) { |
| 220 return false; |
| 221 } |
| 222 |
| 223 SkBitmap bmp; |
| 224 |
| 225 if (!target->capturePixels(&bmp)) { |
| 226 return false; |
| 227 } |
| 228 |
| 229 SkString dir = SkOSPath::Dirname(filename.c_str()); |
| 230 if (!sk_mkdir(dir.c_str())) { |
| 231 SkDebugf("Can't make dir %s.\n", dir.c_str()); |
| 232 return false; |
| 233 } |
| 234 SkFILEWStream stream(filename.c_str()); |
| 235 if (!stream.isValid()) { |
| 236 SkDebugf("Can't write %s.\n", filename.c_str()); |
| 237 return false; |
| 238 } |
| 239 if (!SkImageEncoder::EncodeStream(&stream, bmp, SkImageEncoder::kPNG_Type, 1
00)) { |
| 240 SkDebugf("Can't encode a PNG.\n"); |
| 241 return false; |
| 242 } |
| 243 return true; |
| 244 } |
| 245 |
| 246 static int detect_forever_loops(int loops) { |
| 247 // look for a magic run-forever value |
| 248 if (loops < 0) { |
| 249 loops = SK_MaxS32; |
| 250 } |
| 251 return loops; |
| 252 } |
| 253 |
| 254 static int clamp_loops(int loops) { |
| 255 if (loops < 1) { |
| 256 SkDebugf("ERROR: clamping loops from %d to 1. " |
| 257 "There's probably something wrong with the bench.\n", loops); |
| 258 return 1; |
| 259 } |
| 260 if (loops > FLAGS_maxLoops) { |
| 261 SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loo
ps, FLAGS_maxLoops); |
| 262 return FLAGS_maxLoops; |
| 263 } |
| 264 return loops; |
| 265 } |
| 266 |
| 267 static double now_ms() { return SkTime::GetNSecs() * 1e-6; } |
| 268 static double time(int loops, Benchmark* bench, GPUTarget* target) { |
| 269 SkCanvas* canvas = target->getCanvas(); |
| 270 if (canvas) { |
| 271 canvas->clear(SK_ColorWHITE); |
| 272 } |
| 273 bench->preDraw(canvas); |
| 274 double start = now_ms(); |
| 275 canvas = target->beginTiming(canvas); |
| 276 bench->draw(loops, canvas); |
| 277 if (canvas) { |
| 278 canvas->flush(); |
| 279 } |
| 280 target->endTiming(); |
| 281 double elapsed = now_ms() - start; |
| 282 bench->postDraw(canvas); |
| 283 return elapsed; |
| 284 } |
| 285 |
| 286 static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL
ag) { |
| 287 // First, figure out how many loops it'll take to get a frame up to FLAGS_gp
uMs. |
| 288 int loops = bench->calculateLoops(FLAGS_loops); |
| 289 if (kAutoTuneLoops == loops) { |
| 290 loops = 1; |
| 291 double elapsed = 0; |
| 292 do { |
| 293 if (1<<30 == loops) { |
| 294 // We're about to wrap. Something's wrong with the bench. |
| 295 loops = 0; |
| 296 break; |
| 297 } |
| 298 loops *= 2; |
| 299 // If the GPU lets frames lag at all, we need to make sure we're tim
ing |
| 300 // _this_ round, not still timing last round. |
| 301 for (int i = 0; i < maxGpuFrameLag; i++) { |
| 302 elapsed = time(loops, bench, target); |
| 303 } |
| 304 } while (elapsed < FLAGS_gpuMs); |
| 305 |
| 306 // We've overshot at least a little. Scale back linearly. |
| 307 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); |
| 308 loops = clamp_loops(loops); |
| 309 |
| 310 // Make sure we're not still timing our calibration. |
| 311 target->fence(); |
| 312 } else { |
| 313 loops = detect_forever_loops(loops); |
| 314 } |
| 315 |
| 316 // Pretty much the same deal as the calibration: do some warmup to make |
| 317 // sure we're timing steady-state pipelined frames. |
| 318 for (int i = 0; i < maxGpuFrameLag - 1; i++) { |
| 319 time(loops, bench, target); |
| 320 } |
| 321 |
| 322 return loops; |
| 323 } |
| 324 |
| 325 static SkString humanize(double ms) { |
| 326 return HumanizeMs(ms); |
| 327 } |
| 328 #define HUMANIZE(ms) humanize(ms).c_str() |
| 329 |
| 330 void benchmark_inner_loop(Benchmark* bench, GrContextFactory* ctxFactory) { |
| 331 SkTArray<double> samples; |
| 332 GPUTarget target; |
| 333 SkAssertResult(target.init(bench, ctxFactory, false, |
| 334 GrContextFactory::kNative_GLContextType, |
| 335 GrContextFactory::kNone_GLContextOptions, 0)); |
| 336 |
| 337 SkCanvas* canvas = target.getCanvas(); |
| 338 target.setup(); |
| 339 |
| 340 bench->perCanvasPreDraw(canvas); |
| 341 int maxFrameLag; |
| 342 target.needsFrameTiming(&maxFrameLag); |
| 343 int loops = setup_gpu_bench(&target, bench, maxFrameLag); |
| 344 |
| 345 samples.reset(FLAGS_samples); |
| 346 for (int s = 0; s < FLAGS_samples; s++) { |
| 347 samples[s] = time(loops, bench, &target) / loops; |
| 348 } |
| 349 |
| 350 bench->perCanvasPostDraw(canvas); |
| 351 |
| 352 Stats stats(samples); |
| 353 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean; |
| 354 SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n" |
| 355 , loops |
| 356 , HUMANIZE(stats.min) |
| 357 , HUMANIZE(stats.median) |
| 358 , HUMANIZE(stats.mean) |
| 359 , HUMANIZE(stats.max) |
| 360 , stddev_percent |
| 361 , stats.plot.c_str() |
| 362 , "gpu" |
| 363 , bench->getUniqueName() |
| 364 ); |
| 365 |
| 366 if (!FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) { |
| 367 SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], "gpu"); |
| 368 pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName()
); |
| 369 pngFilename.append(".png"); |
| 370 write_canvas_png(&target, pngFilename); |
| 371 } |
| 372 } |
| 373 |
106 } // namespace kilobench | 374 } // namespace kilobench |
107 | 375 |
108 int kilobench_main() { | 376 int kilobench_main() { |
| 377 SkAutoTDelete<GrContextFactory> ctxFactory; |
| 378 |
| 379 GrContextOptions grContextOpts; |
| 380 ctxFactory.reset(new GrContextFactory(grContextOpts)); |
| 381 |
109 kilobench::BenchmarkStream benchStream; | 382 kilobench::BenchmarkStream benchStream; |
| 383 |
| 384 SkDebugf("loops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n", |
| 385 FLAGS_samples, "samples"); |
| 386 |
110 while (Benchmark* b = benchStream.next()) { | 387 while (Benchmark* b = benchStream.next()) { |
111 SkAutoTDelete<Benchmark> bench(b); | 388 SkAutoTDelete<Benchmark> bench(b); |
112 // TODO actual stuff | 389 kilobench::benchmark_inner_loop(bench.get(), ctxFactory.get()); |
113 } | 390 } |
| 391 |
| 392 // Make sure we clean up the global GrContextFactory here, otherwise we migh
t race with the |
| 393 // SkEventTracer destructor |
| 394 ctxFactory.reset(nullptr); |
114 return 0; | 395 return 0; |
115 } | 396 } |
116 | 397 |
117 #if !defined SK_BUILD_FOR_IOS | 398 #if !defined SK_BUILD_FOR_IOS |
118 int main(int argc, char** argv) { | 399 int main(int argc, char** argv) { |
119 SkCommandLineFlags::Parse(argc, argv); | 400 SkCommandLineFlags::Parse(argc, argv); |
120 return kilobench_main(); | 401 return kilobench_main(); |
121 } | 402 } |
122 #endif | 403 #endif |
OLD | NEW |