| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include <ctype.h> | 8 #include <ctype.h> |
| 9 | 9 |
| 10 #include "Benchmark.h" | 10 #include "Benchmark.h" |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 43 DEFINE_string(config, "nonrendering 8888 gpu", "Configs to measure. Options: " | 43 DEFINE_string(config, "nonrendering 8888 gpu", "Configs to measure. Options: " |
| 44 "565 8888 gpu nonrendering debug nullgpu msaa4 msaa16 nvprmsaa4 nv
prmsaa16 angle"); | 44 "565 8888 gpu nonrendering debug nullgpu msaa4 msaa16 nvprmsaa4 nv
prmsaa16 angle"); |
| 45 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); | 45 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); |
| 46 DEFINE_int32(gpuFrameLag, 5, "Overestimate of maximum number of frames GPU allow
s to lag."); | 46 DEFINE_int32(gpuFrameLag, 5, "Overestimate of maximum number of frames GPU allow
s to lag."); |
| 47 | 47 |
| 48 DEFINE_bool(cpu, true, "Master switch for CPU-bound work."); | 48 DEFINE_bool(cpu, true, "Master switch for CPU-bound work."); |
| 49 DEFINE_bool(gpu, true, "Master switch for GPU-bound work."); | 49 DEFINE_bool(gpu, true, "Master switch for GPU-bound work."); |
| 50 | 50 |
| 51 DEFINE_string(outResultsFile, "", "If given, write results here as JSON."); | 51 DEFINE_string(outResultsFile, "", "If given, write results here as JSON."); |
| 52 DEFINE_bool(resetGpuContext, true, "Reset the GrContext before running each benc
h."); | 52 DEFINE_bool(resetGpuContext, true, "Reset the GrContext before running each benc
h."); |
| 53 DEFINE_int32(maxCalibrationAttempts, 3, |
| 54 "Try up to this many times to guess loops for a bench, or skip the
bench."); |
| 55 DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this."); |
| 53 | 56 |
| 54 | 57 |
| 55 static SkString humanize(double ms) { | 58 static SkString humanize(double ms) { |
| 56 if (ms > 1e+3) return SkStringPrintf("%.3gs", ms/1e3); | 59 if (ms > 1e+3) return SkStringPrintf("%.3gs", ms/1e3); |
| 57 if (ms < 1e-3) return SkStringPrintf("%.3gns", ms*1e6); | 60 if (ms < 1e-3) return SkStringPrintf("%.3gns", ms*1e6); |
| 58 #ifdef SK_BUILD_FOR_WIN | 61 #ifdef SK_BUILD_FOR_WIN |
| 59 if (ms < 1) return SkStringPrintf("%.3gus", ms*1e3); | 62 if (ms < 1) return SkStringPrintf("%.3gus", ms*1e3); |
| 60 #else | 63 #else |
| 61 if (ms < 1) return SkStringPrintf("%.3gµs", ms*1e3); | 64 if (ms < 1) return SkStringPrintf("%.3gµs", ms*1e3); |
| 62 #endif | 65 #endif |
| 63 return SkStringPrintf("%.3gms", ms); | 66 return SkStringPrintf("%.3gms", ms); |
| 64 } | 67 } |
| 68 #define HUMANIZE(ms) humanize(ms).c_str() |
| 65 | 69 |
| 66 static double time(int loops, Benchmark* bench, SkCanvas* canvas, SkGLContextHel
per* gl) { | 70 static double time(int loops, Benchmark* bench, SkCanvas* canvas, SkGLContextHel
per* gl) { |
| 67 WallTimer timer; | 71 WallTimer timer; |
| 68 timer.start(); | 72 timer.start(); |
| 69 if (bench) { | 73 if (bench) { |
| 70 bench->draw(loops, canvas); | 74 bench->draw(loops, canvas); |
| 71 } | 75 } |
| 72 if (canvas) { | 76 if (canvas) { |
| 73 canvas->flush(); | 77 canvas->flush(); |
| 74 } | 78 } |
| 75 #if SK_SUPPORT_GPU | 79 #if SK_SUPPORT_GPU |
| 76 if (gl) { | 80 if (gl) { |
| 77 SK_GL(*gl, Flush()); | 81 SK_GL(*gl, Flush()); |
| 78 gl->swapBuffers(); | 82 gl->swapBuffers(); |
| 79 } | 83 } |
| 80 #endif | 84 #endif |
| 81 timer.end(); | 85 timer.end(); |
| 82 return timer.fWall; | 86 return timer.fWall; |
| 83 } | 87 } |
| 84 | 88 |
| 85 static double estimate_timer_overhead() { | 89 static double estimate_timer_overhead() { |
| 86 double overhead = 0; | 90 double overhead = 0; |
| 87 for (int i = 0; i < FLAGS_overheadLoops; i++) { | 91 for (int i = 0; i < FLAGS_overheadLoops; i++) { |
| 88 overhead += time(1, NULL, NULL, NULL); | 92 overhead += time(1, NULL, NULL, NULL); |
| 89 } | 93 } |
| 90 return overhead / FLAGS_overheadLoops; | 94 return overhead / FLAGS_overheadLoops; |
| 91 } | 95 } |
| 92 | 96 |
| 97 static int clamp_loops(int loops) { |
| 98 if (loops < 1) { |
| 99 SkDebugf("ERROR: clamping loops from %d to 1.\n", loops); |
| 100 return 1; |
| 101 } |
| 102 if (loops > FLAGS_maxLoops) { |
| 103 SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loo
ps, FLAGS_maxLoops); |
| 104 return FLAGS_maxLoops; |
| 105 } |
| 106 return loops; |
| 107 } |
| 108 |
| 93 static int cpu_bench(const double overhead, Benchmark* bench, SkCanvas* canvas,
double* samples) { | 109 static int cpu_bench(const double overhead, Benchmark* bench, SkCanvas* canvas,
double* samples) { |
| 94 // First figure out approximately how many loops of bench it takes to make o
verhead negligible. | 110 // First figure out approximately how many loops of bench it takes to make o
verhead negligible. |
| 95 double bench_plus_overhead; | 111 double bench_plus_overhead; |
| 112 int round = 0; |
| 96 do { | 113 do { |
| 97 bench_plus_overhead = time(1, bench, canvas, NULL); | 114 bench_plus_overhead = time(1, bench, canvas, NULL); |
| 115 if (++round == FLAGS_maxCalibrationAttempts) { |
| 116 SkDebugf("WARNING: Can't estimate loops for %s (%s vs. %s); skipping
.\n", |
| 117 bench->getName(), HUMANIZE(bench_plus_overhead), HUMANIZE(o
verhead)); |
| 118 return 0; |
| 119 } |
| 98 } while (bench_plus_overhead < overhead); | 120 } while (bench_plus_overhead < overhead); |
| 99 | 121 |
| 100 // Later we'll just start and stop the timer once but loop N times. | 122 // Later we'll just start and stop the timer once but loop N times. |
| 101 // We'll pick N to make timer overhead negligible: | 123 // We'll pick N to make timer overhead negligible: |
| 102 // | 124 // |
| 103 // overhead | 125 // overhead |
| 104 // ------------------------- < FLAGS_overheadGoal | 126 // ------------------------- < FLAGS_overheadGoal |
| 105 // overhead + N * Bench Time | 127 // overhead + N * Bench Time |
| 106 // | 128 // |
| 107 // where bench_plus_overhead ≈ overhead + Bench Time. | 129 // where bench_plus_overhead ≈ overhead + Bench Time. |
| 108 // | 130 // |
| 109 // Doing some math, we get: | 131 // Doing some math, we get: |
| 110 // | 132 // |
| 111 // (overhead / FLAGS_overheadGoal) - overhead | 133 // (overhead / FLAGS_overheadGoal) - overhead |
| 112 // ------------------------------------------ < N | 134 // ------------------------------------------ < N |
| 113 // bench_plus_overhead - overhead) | 135 // bench_plus_overhead - overhead) |
| 114 // | 136 // |
| 115 // Luckily, this also works well in practice. :) | 137 // Luckily, this also works well in practice. :) |
| 116 const double numer = overhead / FLAGS_overheadGoal - overhead; | 138 const double numer = overhead / FLAGS_overheadGoal - overhead; |
| 117 const double denom = bench_plus_overhead - overhead; | 139 const double denom = bench_plus_overhead - overhead; |
| 118 const int loops = FLAGS_runOnce ? 1 : (int)ceil(numer / denom); | 140 const int loops = clamp_loops(FLAGS_runOnce ? 1 : (int)ceil(numer / denom)); |
| 119 | 141 |
| 120 for (int i = 0; i < FLAGS_samples; i++) { | 142 for (int i = 0; i < FLAGS_samples; i++) { |
| 121 samples[i] = time(loops, bench, canvas, NULL) / loops; | 143 samples[i] = time(loops, bench, canvas, NULL) / loops; |
| 122 } | 144 } |
| 123 return loops; | 145 return loops; |
| 124 } | 146 } |
| 125 | 147 |
| 126 #if SK_SUPPORT_GPU | 148 #if SK_SUPPORT_GPU |
| 127 static int gpu_bench(SkGLContextHelper* gl, | 149 static int gpu_bench(SkGLContextHelper* gl, |
| 128 Benchmark* bench, | 150 Benchmark* bench, |
| (...skipping 15 matching lines...) Expand all Loading... |
| 144 elapsed = time(loops, bench, canvas, gl); | 166 elapsed = time(loops, bench, canvas, gl); |
| 145 } | 167 } |
| 146 } while (elapsed < FLAGS_gpuMs); | 168 } while (elapsed < FLAGS_gpuMs); |
| 147 | 169 |
| 148 // We've overshot at least a little. Scale back linearly. | 170 // We've overshot at least a little. Scale back linearly. |
| 149 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); | 171 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); |
| 150 | 172 |
| 151 // Might as well make sure we're not still timing our calibration. | 173 // Might as well make sure we're not still timing our calibration. |
| 152 SK_GL(*gl, Finish()); | 174 SK_GL(*gl, Finish()); |
| 153 } | 175 } |
| 176 loops = clamp_loops(loops); |
| 154 | 177 |
| 155 // Pretty much the same deal as the calibration: do some warmup to make | 178 // Pretty much the same deal as the calibration: do some warmup to make |
| 156 // sure we're timing steady-state pipelined frames. | 179 // sure we're timing steady-state pipelined frames. |
| 157 for (int i = 0; i < FLAGS_gpuFrameLag; i++) { | 180 for (int i = 0; i < FLAGS_gpuFrameLag; i++) { |
| 158 time(loops, bench, canvas, gl); | 181 time(loops, bench, canvas, gl); |
| 159 } | 182 } |
| 160 | 183 |
| 161 // Now, actually do the timing! | 184 // Now, actually do the timing! |
| 162 for (int i = 0; i < FLAGS_samples; i++) { | 185 for (int i = 0; i < FLAGS_samples; i++) { |
| 163 samples[i] = time(loops, bench, canvas, gl) / loops; | 186 samples[i] = time(loops, bench, canvas, gl) / loops; |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 274 MultiResultsWriter log; | 297 MultiResultsWriter log; |
| 275 SkAutoTDelete<JSONResultsWriter> json; | 298 SkAutoTDelete<JSONResultsWriter> json; |
| 276 if (!FLAGS_outResultsFile.isEmpty()) { | 299 if (!FLAGS_outResultsFile.isEmpty()) { |
| 277 json.reset(SkNEW(JSONResultsWriter(FLAGS_outResultsFile[0]))); | 300 json.reset(SkNEW(JSONResultsWriter(FLAGS_outResultsFile[0]))); |
| 278 log.add(json.get()); | 301 log.add(json.get()); |
| 279 } | 302 } |
| 280 CallEnd<MultiResultsWriter> ender(log); | 303 CallEnd<MultiResultsWriter> ender(log); |
| 281 fill_static_options(&log); | 304 fill_static_options(&log); |
| 282 | 305 |
| 283 const double overhead = estimate_timer_overhead(); | 306 const double overhead = estimate_timer_overhead(); |
| 284 SkDebugf("Timer overhead: %s\n", humanize(overhead).c_str()); | 307 SkDebugf("Timer overhead: %s\n", HUMANIZE(overhead)); |
| 285 | 308 |
| 286 SkAutoTMalloc<double> samples(FLAGS_samples); | 309 SkAutoTMalloc<double> samples(FLAGS_samples); |
| 287 | 310 |
| 288 if (FLAGS_runOnce) { | 311 if (FLAGS_runOnce) { |
| 289 SkDebugf("--runOnce is true; times would only be misleading so we won't
print them.\n"); | 312 SkDebugf("--runOnce is true; times would only be misleading so we won't
print them.\n"); |
| 290 } else if (FLAGS_verbose) { | 313 } else if (FLAGS_verbose) { |
| 291 // No header. | 314 // No header. |
| 292 } else if (FLAGS_quiet) { | 315 } else if (FLAGS_quiet) { |
| 293 SkDebugf("median\tbench\tconfig\n"); | 316 SkDebugf("median\tbench\tconfig\n"); |
| 294 } else { | 317 } else { |
| (...skipping 18 matching lines...) Expand all Loading... |
| 313 const int loops = | 336 const int loops = |
| 314 #if SK_SUPPORT_GPU | 337 #if SK_SUPPORT_GPU |
| 315 Benchmark::kGPU_Backend == targets[j]->backend | 338 Benchmark::kGPU_Backend == targets[j]->backend |
| 316 ? gpu_bench(targets[j]->gl, bench.get(), canvas, samples.get()) | 339 ? gpu_bench(targets[j]->gl, bench.get(), canvas, samples.get()) |
| 317 : | 340 : |
| 318 #endif | 341 #endif |
| 319 cpu_bench( overhead, bench.get(), canvas, samples.get()); | 342 cpu_bench( overhead, bench.get(), canvas, samples.get()); |
| 320 | 343 |
| 321 if (loops == 0) { | 344 if (loops == 0) { |
| 322 SkDebugf("Unable to time %s\t%s (overhead %s)\n", | 345 SkDebugf("Unable to time %s\t%s (overhead %s)\n", |
| 323 bench->getName(), config, humanize(overhead).c_str()); | 346 bench->getName(), config, HUMANIZE(overhead)); |
| 324 continue; | 347 continue; |
| 325 } | 348 } |
| 326 | 349 |
| 327 Stats stats(samples.get(), FLAGS_samples); | 350 Stats stats(samples.get(), FLAGS_samples); |
| 328 log.config(config); | 351 log.config(config); |
| 329 log.timer("min_ms", stats.min); | 352 log.timer("min_ms", stats.min); |
| 330 log.timer("median_ms", stats.median); | 353 log.timer("median_ms", stats.median); |
| 331 log.timer("mean_ms", stats.mean); | 354 log.timer("mean_ms", stats.mean); |
| 332 log.timer("max_ms", stats.max); | 355 log.timer("max_ms", stats.max); |
| 333 log.timer("stddev_ms", sqrt(stats.var)); | 356 log.timer("stddev_ms", sqrt(stats.var)); |
| 334 | 357 |
| 335 if (FLAGS_runOnce) { | 358 if (FLAGS_runOnce) { |
| 336 if (targets.count() == 1) { | 359 if (targets.count() == 1) { |
| 337 config = ""; // Only print the config if we run the same ben
ch on more than one. | 360 config = ""; // Only print the config if we run the same ben
ch on more than one. |
| 338 } | 361 } |
| 339 SkDebugf("%s\t%s\n", bench->getName(), config); | 362 SkDebugf("%s\t%s\n", bench->getName(), config); |
| 340 } else if (FLAGS_verbose) { | 363 } else if (FLAGS_verbose) { |
| 341 for (int i = 0; i < FLAGS_samples; i++) { | 364 for (int i = 0; i < FLAGS_samples; i++) { |
| 342 SkDebugf("%s ", humanize(samples[i]).c_str()); | 365 SkDebugf("%s ", HUMANIZE(samples[i])); |
| 343 } | 366 } |
| 344 SkDebugf("%s\n", bench->getName()); | 367 SkDebugf("%s\n", bench->getName()); |
| 345 } else if (FLAGS_quiet) { | 368 } else if (FLAGS_quiet) { |
| 346 if (targets.count() == 1) { | 369 if (targets.count() == 1) { |
| 347 config = ""; // Only print the config if we run the same ben
ch on more than one. | 370 config = ""; // Only print the config if we run the same ben
ch on more than one. |
| 348 } | 371 } |
| 349 SkDebugf("%s\t%s\t%s\n", humanize(stats.median).c_str(), bench->
getName(), config); | 372 SkDebugf("%s\t%s\t%s\n", HUMANIZE(stats.median), bench->getName(
), config); |
| 350 } else { | 373 } else { |
| 351 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean
; | 374 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean
; |
| 352 SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n" | 375 SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n" |
| 353 , loops | 376 , loops |
| 354 , humanize(stats.min).c_str() | 377 , HUMANIZE(stats.min) |
| 355 , humanize(stats.median).c_str() | 378 , HUMANIZE(stats.median) |
| 356 , humanize(stats.mean).c_str() | 379 , HUMANIZE(stats.mean) |
| 357 , humanize(stats.max).c_str() | 380 , HUMANIZE(stats.max) |
| 358 , stddev_percent | 381 , stddev_percent |
| 359 , stats.plot.c_str() | 382 , stats.plot.c_str() |
| 360 , config | 383 , config |
| 361 , bench->getName() | 384 , bench->getName() |
| 362 ); | 385 ); |
| 363 } | 386 } |
| 364 } | 387 } |
| 365 targets.deleteAll(); | 388 targets.deleteAll(); |
| 366 | 389 |
| 367 #if SK_SUPPORT_GPU | 390 #if SK_SUPPORT_GPU |
| 368 if (FLAGS_resetGpuContext) { | 391 if (FLAGS_resetGpuContext) { |
| 369 gGrFactory.destroyContexts(); | 392 gGrFactory.destroyContexts(); |
| 370 } | 393 } |
| 371 #endif | 394 #endif |
| 372 } | 395 } |
| 373 | 396 |
| 374 return 0; | 397 return 0; |
| 375 } | 398 } |
| 376 | 399 |
| 377 #if !defined SK_BUILD_FOR_IOS | 400 #if !defined SK_BUILD_FOR_IOS |
| 378 int main(int argc, char * const argv[]) { | 401 int main(int argc, char * const argv[]) { |
| 379 return tool_main(argc, (char**) argv); | 402 return tool_main(argc, (char**) argv); |
| 380 } | 403 } |
| 381 #endif | 404 #endif |
| OLD | NEW |