Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 // TODO: clean up before submitting | |
|
bsalomon
2014/07/01 14:06:08
I submitted a change that changes the below refere
mtklein_C
2014/07/01 14:08:44
Perfect.
| |
| 9 #ifndef GR_GL_NO_ERROR | |
| 10 #define GR_GL_NO_ERROR 0 | |
| 11 #endif | |
| 12 | |
| 13 #include <ctype.h> | |
| 14 | |
| 8 #include "Benchmark.h" | 15 #include "Benchmark.h" |
| 9 #include "CrashHandler.h" | 16 #include "CrashHandler.h" |
| 10 #include "Stats.h" | 17 #include "Stats.h" |
| 11 #include "Timer.h" | 18 #include "Timer.h" |
| 12 | 19 |
| 13 #include "SkCanvas.h" | 20 #include "SkCanvas.h" |
| 14 #include "SkCommandLineFlags.h" | 21 #include "SkCommandLineFlags.h" |
| 15 #include "SkForceLinking.h" | 22 #include "SkForceLinking.h" |
| 16 #include "SkGraphics.h" | 23 #include "SkGraphics.h" |
| 17 #include "SkString.h" | 24 #include "SkString.h" |
| 18 #include "SkSurface.h" | 25 #include "SkSurface.h" |
| 19 | 26 |
| 27 #if SK_SUPPORT_GPU | |
| 28 #include "GrContextFactory.h" | |
| 29 GrContextFactory gGrFactory; | |
| 30 #endif | |
| 31 | |
| 20 __SK_FORCE_IMAGE_DECODER_LINKING; | 32 __SK_FORCE_IMAGE_DECODER_LINKING; |
| 21 | 33 |
| 22 DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); | 34 DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); |
| 23 DEFINE_int32(overheadLoops, 100000, "Loops to estimate timer overhead."); | 35 DEFINE_int32(overheadLoops, 100000, "Loops to estimate timer overhead."); |
| 24 DEFINE_double(overheadGoal, 0.0001, | 36 DEFINE_double(overheadGoal, 0.0001, |
| 25 "Loop until timer overhead is at most this fraction of our measurm ents."); | 37 "Loop until timer overhead is at most this fraction of our measurm ents."); |
| 26 DEFINE_string(match, "", "The usual filters on file names of benchmarks to measu re."); | 38 DEFINE_string(match, "", "The usual filters on file names of benchmarks to measu re."); |
| 27 DEFINE_bool2(quiet, q, false, "Print only bench name and minimum sample."); | 39 DEFINE_bool2(quiet, q, false, "Print only bench name and minimum sample."); |
| 28 DEFINE_bool2(verbose, v, false, "Print all samples."); | 40 DEFINE_bool2(verbose, v, false, "Print all samples."); |
| 29 DEFINE_string(config, "8888 nonrendering", | 41 DEFINE_string(config, "nonrendering 8888 gpu", "Configs to measure. Options: " |
| 30 "Configs to measure. Options: 565 8888 nonrendering"); | 42 "565 8888 gpu nonrendering debug nullgpu msaa4 msaa16 nvprmsaa4 nv prmsaa16 angle"); |
| 43 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); | |
| 44 DEFINE_int32(gpuFrameLag, 5, "Overestimate of maximum number of frames GPU allow s to lag."); | |
| 31 | 45 |
| 32 // TODO: GPU benches | |
| 33 | 46 |
| 34 static SkString humanize(double ms) { | 47 static SkString humanize(double ms) { |
| 35 if (ms > 1e+3) return SkStringPrintf("%.3gs", ms/1e3); | 48 if (ms > 1e+3) return SkStringPrintf("%.3gs", ms/1e3); |
| 36 if (ms < 1e-3) return SkStringPrintf("%.3gns", ms*1e6); | 49 if (ms < 1e-3) return SkStringPrintf("%.3gns", ms*1e6); |
| 37 if (ms < 1) return SkStringPrintf("%.3gµs", ms*1e3); | 50 if (ms < 1) return SkStringPrintf("%.3gµs", ms*1e3); |
| 38 return SkStringPrintf("%.3gms", ms); | 51 return SkStringPrintf("%.3gms", ms); |
| 39 } | 52 } |
| 40 | 53 |
| 54 static double time(int loops, Benchmark* bench, SkCanvas* canvas, SkGLContextHel per* gl) { | |
| 55 WallTimer timer; | |
| 56 timer.start(); | |
| 57 if (bench) { | |
| 58 bench->draw(loops, canvas); | |
| 59 } | |
| 60 if (canvas) { | |
| 61 canvas->flush(); | |
| 62 } | |
| 63 #if SK_SUPPORT_GPU | |
| 64 if (gl) { | |
| 65 SK_GL(*gl, Flush()); | |
| 66 gl->swapBuffers(); | |
| 67 } | |
| 68 #endif | |
| 69 timer.end(); | |
| 70 return timer.fWall; | |
| 71 } | |
| 72 | |
| 41 static double estimate_timer_overhead() { | 73 static double estimate_timer_overhead() { |
| 42 double overhead = 0; | 74 double overhead = 0; |
| 43 WallTimer timer; | |
| 44 for (int i = 0; i < FLAGS_overheadLoops; i++) { | 75 for (int i = 0; i < FLAGS_overheadLoops; i++) { |
| 45 timer.start(); | 76 overhead += time(1, NULL, NULL, NULL); |
| 46 timer.end(); | |
| 47 overhead += timer.fWall; | |
| 48 } | 77 } |
| 49 return overhead / FLAGS_overheadLoops; | 78 return overhead / FLAGS_overheadLoops; |
| 50 } | 79 } |
| 51 | 80 |
| 52 static void safe_flush(SkCanvas* canvas) { | 81 static int cpu_bench(const double overhead, Benchmark* bench, SkCanvas* canvas, double* samples) { |
| 53 if (canvas) { | 82 // First figure out approximately how many loops of bench it takes to make o verhead negligible. |
| 54 canvas->flush(); | 83 double bench_plus_overhead; |
| 55 } | 84 do { |
| 56 } | 85 bench_plus_overhead = time(1, bench, canvas, NULL); |
| 86 } while (bench_plus_overhead < overhead); // Shouldn't normally happen. | |
| 57 | 87 |
| 58 static int guess_loops(double overhead, Benchmark* bench, SkCanvas* canvas) { | 88 // Later we'll just start and stop the timer once but loop N times. |
| 59 WallTimer timer; | |
| 60 | |
| 61 // Measure timer overhead and bench time together. | |
| 62 do { | |
| 63 timer.start(); | |
| 64 bench->draw(1, canvas); | |
| 65 safe_flush(canvas); | |
| 66 timer.end(); | |
| 67 } while (timer.fWall < overhead); // Shouldn't normally happen. | |
| 68 | |
| 69 // Later we'll just start and stop the timer once, but loop N times. | |
| 70 // We'll pick N to make timer overhead negligible: | 89 // We'll pick N to make timer overhead negligible: |
| 71 // | 90 // |
| 72 // Timer Overhead | 91 // overhead |
| 73 // ------------------------------- < FLAGS_overheadGoal | 92 // ------------------------- < FLAGS_overheadGoal |
| 74 // Timer Overhead + N * Bench Time | 93 // overhead + N * Bench Time |
| 75 // | 94 // |
| 76 // where timer.fWall ≈ Timer Overhead + Bench Time. | 95 // where bench_plus_overhead ≈ overhead + Bench Time. |
| 77 // | 96 // |
| 78 // Doing some math, we get: | 97 // Doing some math, we get: |
| 79 // | 98 // |
| 80 // (Timer Overhead / FLAGS_overheadGoal) - Timer Overhead | 99 // (overhead / FLAGS_overheadGoal) - overhead |
| 81 // ----------------------------------------------------- < N | 100 // ------------------------------------------ < N |
| 82 // (timer.fWall - Timer Overhead) | 101 // bench_plus_overhead - overhead) |
| 83 // | 102 // |
| 84 // Luckily, this also works well in practice. :) | 103 // Luckily, this also works well in practice. :) |
| 85 const double numer = overhead / FLAGS_overheadGoal - overhead; | 104 const double numer = overhead / FLAGS_overheadGoal - overhead; |
| 86 const double denom = timer.fWall - overhead; | 105 const double denom = bench_plus_overhead - overhead; |
| 87 return (int)ceil(numer / denom); | 106 const int loops = ceil(numer / denom); |
| 107 | |
| 108 for (int i = 0; i < FLAGS_samples; i++) { | |
| 109 samples[i] = time(loops, bench, canvas, NULL) / loops; | |
| 110 } | |
| 111 return loops; | |
| 88 } | 112 } |
| 89 | 113 |
| 90 static bool push_config_if_enabled(const char* config, SkTDArray<const char*>* c onfigs) { | 114 #if SK_SUPPORT_GPU |
| 91 if (FLAGS_config.contains(config)) { | 115 static int gpu_bench(SkGLContextHelper* gl, |
| 92 configs->push(config); | 116 Benchmark* bench, |
| 93 return true; | 117 SkCanvas* canvas, |
| 118 double* samples) { | |
| 119 // Make sure we're done with whatever came before. | |
| 120 SK_GL(*gl, Finish); | |
| 121 | |
| 122 // First, figure out how many loops it'll take to get a frame up to FLAGS_gp uMs. | |
| 123 int loops = 1; | |
| 124 double elapsed = 0; | |
| 125 do { | |
| 126 loops *= 2; | |
| 127 // If the GPU lets frames lag at all, we need to make sure we're timing | |
| 128 // _this_ round, not still timing last round. We force this by looping | |
| 129 // more times than any reasonable GPU will allow frames to lag. | |
| 130 for (int i = 0; i < FLAGS_gpuFrameLag; i++) { | |
| 131 elapsed = time(loops, bench, canvas, gl); | |
| 132 } | |
| 133 } while (elapsed < FLAGS_gpuMs); | |
| 134 | |
| 135 // We've overshot at least a little. Scale back linearly. | |
| 136 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); | |
| 137 | |
| 138 // Might as well make sure we're not still timing our calibration. | |
| 139 SK_GL(*gl, Finish); | |
| 140 | |
| 141 // Pretty much the same deal as the calibration: do some warmup to make | |
| 142 // sure we're timing steady-state pipelined frames. | |
| 143 for (int i = 0; i < FLAGS_gpuFrameLag; i++) { | |
| 144 time(loops, bench, canvas, gl); | |
| 94 } | 145 } |
| 95 return false; | 146 |
| 147 // Now, actually do the timing! | |
| 148 for (int i = 0; i < FLAGS_samples; i++) { | |
| 149 samples[i] = time(loops, bench, canvas, gl) / loops; | |
| 150 } | |
| 151 return loops; | |
| 152 } | |
| 153 #endif | |
| 154 | |
| 155 static SkString to_lower(const char* str) { | |
| 156 SkString lower(str); | |
| 157 for (size_t i = 0; i < lower.size(); i++) { | |
| 158 lower[i] = tolower(lower[i]); | |
| 159 } | |
| 160 return lower; | |
| 96 } | 161 } |
| 97 | 162 |
| 98 static void create_surfaces(Benchmark* bench, | 163 struct Target { |
| 99 SkTDArray<SkSurface*>* surfaces, | 164 const char* config; |
| 100 SkTDArray<const char*>* configs) { | 165 Benchmark::Backend backend; |
| 166 SkAutoTDelete<SkSurface> surface; | |
| 167 #if SK_SUPPORT_GPU | |
| 168 SkGLContextHelper* gl; | |
| 169 #endif | |
| 170 }; | |
| 101 | 171 |
| 102 if (bench->isSuitableFor(Benchmark::kNonRendering_Backend) | 172 // If bench is enabled for backend/config, returns a Target* for them, otherwise NULL. |
| 103 && push_config_if_enabled("nonrendering", configs)) { | 173 static Target* is_enabled(Benchmark* bench, Benchmark::Backend backend, const ch ar* config) { |
| 104 surfaces->push(NULL); | 174 if (!bench->isSuitableFor(backend)) { |
| 175 return NULL; | |
| 105 } | 176 } |
| 106 | 177 |
| 107 if (bench->isSuitableFor(Benchmark::kRaster_Backend)) { | 178 for (int i = 0; i < FLAGS_config.count(); i++) { |
| 108 const int w = bench->getSize().fX, | 179 if (to_lower(FLAGS_config[i]).equals(config)) { |
| 109 h = bench->getSize().fY; | 180 Target* target = new Target; |
| 110 | 181 target->config = config; |
| 111 if (push_config_if_enabled("8888", configs)) { | 182 target->backend = backend; |
| 112 const SkImageInfo info = { w, h, kN32_SkColorType, kPremul_SkAlphaTy pe }; | 183 return target; |
| 113 surfaces->push(SkSurface::NewRaster(info)); | |
| 114 } | |
| 115 | |
| 116 if (push_config_if_enabled("565", configs)) { | |
| 117 const SkImageInfo info = { w, h, kRGB_565_SkColorType, kOpaque_SkAlp haType }; | |
| 118 surfaces->push(SkSurface::NewRaster(info)); | |
| 119 } | 184 } |
| 120 } | 185 } |
| 186 return NULL; | |
| 187 } | |
| 188 | |
| 189 // Append all targets that are suitable for bench. | |
| 190 static void create_targets(Benchmark* bench, SkTDArray<Target*>* targets) { | |
| 191 const int w = bench->getSize().fX, | |
| 192 h = bench->getSize().fY; | |
| 193 const SkImageInfo _8888 = { w, h, kN32_SkColorType, kPremul_SkAlphaType }, | |
| 194 _565 = { w, h, kRGB_565_SkColorType, kOpaque_SkAlphaType }; | |
| 195 | |
| 196 #define CPU_TARGET(config, backend, code) \ | |
| 197 if (Target* t = is_enabled(bench, Benchmark::backend, #config)) { \ | |
| 198 t->surface.reset(code); \ | |
| 199 targets->push(t); \ | |
| 200 } | |
| 201 CPU_TARGET(nonrendering, kNonRendering_Backend, NULL) | |
| 202 CPU_TARGET(8888, kRaster_Backend, SkSurface::NewRaster(_8888)) | |
| 203 CPU_TARGET(565, kRaster_Backend, SkSurface::NewRaster(_565)) | |
| 204 | |
| 205 #if SK_SUPPORT_GPU | |
| 206 #define GPU_TARGET(config, ctxType, info, samples) \ | |
| 207 if (Target* t = is_enabled(bench, Benchmark::kGPU_Backend, #config)) { \ | |
| 208 t->surface.reset(SkSurface::NewRenderTarget(gGrFactory.get(ctxType), info, samples)); \ | |
| 209 t->gl = gGrFactory.getGLContext(ctxType); \ | |
| 210 targets->push(t); \ | |
| 211 } | |
| 212 GPU_TARGET(gpu, GrContextFactory::kNative_GLContextType, _8888, 0) | |
| 213 GPU_TARGET(msaa4, GrContextFactory::kNative_GLContextType, _8888, 4) | |
| 214 GPU_TARGET(msaa16, GrContextFactory::kNative_GLContextType, _8888, 16) | |
| 215 GPU_TARGET(nvprmsaa4, GrContextFactory::kNVPR_GLContextType, _8888, 4) | |
| 216 GPU_TARGET(nvprmsaa16, GrContextFactory::kNVPR_GLContextType, _8888, 16) | |
| 217 GPU_TARGET(debug, GrContextFactory::kDebug_GLContextType, _8888, 0) | |
| 218 GPU_TARGET(nullgpu, GrContextFactory::kNull_GLContextType, _8888, 0) | |
| 219 #if SK_ANGLE | |
| 220 GPU_TARGET(angle, GrContextFactory::kANGLE_GLContextType, _8888, 0) | |
| 221 #endif | |
| 222 #endif | |
| 121 } | 223 } |
| 122 | 224 |
| 123 int tool_main(int argc, char** argv); | 225 int tool_main(int argc, char** argv); |
| 124 int tool_main(int argc, char** argv) { | 226 int tool_main(int argc, char** argv) { |
| 125 SetupCrashHandler(); | 227 SetupCrashHandler(); |
| 126 SkAutoGraphics ag; | 228 SkAutoGraphics ag; |
| 127 SkCommandLineFlags::Parse(argc, argv); | 229 SkCommandLineFlags::Parse(argc, argv); |
| 128 | 230 |
| 129 const double overhead = estimate_timer_overhead(); | 231 const double overhead = estimate_timer_overhead(); |
| 232 SkAutoTMalloc<double> samples(FLAGS_samples); | |
| 233 | |
| 234 // TODO: display add median, use it in --quiet mode | |
| 130 | 235 |
| 131 if (FLAGS_verbose) { | 236 if (FLAGS_verbose) { |
| 132 // No header. | 237 // No header. |
| 133 } else if (FLAGS_quiet) { | 238 } else if (FLAGS_quiet) { |
| 134 SkDebugf("min\tbench\tconfig\n"); | 239 SkDebugf("min\tbench\tconfig\n"); |
| 135 } else { | 240 } else { |
| 136 SkDebugf("loops\tmin\tmean\tmax\tstddev\tbench\tconfig\n"); | 241 SkDebugf("loops\tmin\tmean\tmax\tstddev\tconfig\tbench\n"); |
| 137 } | 242 } |
| 138 | 243 |
| 139 for (const BenchRegistry* r = BenchRegistry::Head(); r != NULL; r = r->next( )) { | 244 for (const BenchRegistry* r = BenchRegistry::Head(); r != NULL; r = r->next( )) { |
| 140 SkAutoTDelete<Benchmark> bench(r->factory()(NULL)); | 245 SkAutoTDelete<Benchmark> bench(r->factory()(NULL)); |
| 141 if (SkCommandLineFlags::ShouldSkip(FLAGS_match, bench->getName())) { | 246 if (SkCommandLineFlags::ShouldSkip(FLAGS_match, bench->getName())) { |
| 142 continue; | 247 continue; |
| 143 } | 248 } |
| 144 | 249 |
| 145 SkTDArray<SkSurface*> surfaces; | 250 SkTDArray<Target*> targets; |
| 146 SkTDArray<const char*> configs; | 251 create_targets(bench.get(), &targets); |
| 147 create_surfaces(bench.get(), &surfaces, &configs); | |
| 148 | 252 |
| 149 bench->preDraw(); | 253 bench->preDraw(); |
| 150 for (int j = 0; j < surfaces.count(); j++) { | 254 for (int j = 0; j < targets.count(); j++) { |
| 151 SkCanvas* canvas = surfaces[j] ? surfaces[j]->getCanvas() : NULL; | 255 SkCanvas* canvas = targets[j]->surface.get() ? targets[j]->surface-> getCanvas() : NULL; |
| 152 const char* config = configs[j]; | |
| 153 | 256 |
| 154 bench->draw(1, canvas); // Just paranoid warmup. | 257 const int loops = |
| 155 safe_flush(canvas); | 258 #if SK_SUPPORT_GPU |
| 156 const int loops = guess_loops(overhead, bench.get(), canvas); | 259 Benchmark::kGPU_Backend == targets[j]->backend |
| 157 | 260 ? gpu_bench(targets[j]->gl, bench.get(), canvas, samples.get()) |
| 158 SkAutoTMalloc<double> samples(FLAGS_samples); | 261 : |
| 159 WallTimer timer; | 262 #endif |
| 160 for (int i = 0; i < FLAGS_samples; i++) { | 263 cpu_bench( overhead, bench.get(), canvas, samples.get()); |
| 161 timer.start(); | |
| 162 bench->draw(loops, canvas); | |
| 163 safe_flush(canvas); | |
| 164 timer.end(); | |
| 165 samples[i] = timer.fWall / loops; | |
| 166 } | |
| 167 | 264 |
| 168 Stats stats(samples.get(), FLAGS_samples); | 265 Stats stats(samples.get(), FLAGS_samples); |
| 169 | 266 |
| 267 const char* config = targets[j]->config; | |
| 170 if (FLAGS_verbose) { | 268 if (FLAGS_verbose) { |
| 171 for (int i = 0; i < FLAGS_samples; i++) { | 269 for (int i = 0; i < FLAGS_samples; i++) { |
| 172 SkDebugf("%s ", humanize(samples[i]).c_str()); | 270 SkDebugf("%s ", humanize(samples[i]).c_str()); |
| 173 } | 271 } |
| 174 SkDebugf("%s\n", bench->getName()); | 272 SkDebugf("%s\n", bench->getName()); |
| 175 } else if (FLAGS_quiet) { | 273 } else if (FLAGS_quiet) { |
| 176 if (configs.count() == 1) { | 274 if (targets.count() == 1) { |
| 177 config = ""; // Only print the config if we run the same ben ch on more than one. | 275 config = ""; // Only print the config if we run the same ben ch on more than one. |
| 178 } | 276 } |
| 179 SkDebugf("%s\t%s\t%s\n", humanize(stats.min).c_str(), bench->get Name(), config); | 277 SkDebugf("%s\t%s\t%s\n", humanize(stats.min).c_str(), bench->get Name(), config); |
| 180 } else { | 278 } else { |
| 181 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean ; | 279 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean ; |
| 182 SkDebugf("%d\t%s\t%s\t%s\t%.0f%%\t%s\t%s\n" | 280 SkDebugf("%d\t%s\t%s\t%s\t%.0f%%\t%s\t%s\n" |
| 183 , loops | 281 , loops |
| 184 , humanize(stats.min).c_str() | 282 , humanize(stats.min).c_str() |
| 185 , humanize(stats.mean).c_str() | 283 , humanize(stats.mean).c_str() |
| 186 , humanize(stats.max).c_str() | 284 , humanize(stats.max).c_str() |
| 187 , stddev_percent | 285 , stddev_percent |
| 286 , config | |
| 188 , bench->getName() | 287 , bench->getName() |
| 189 , config | |
| 190 ); | 288 ); |
| 191 } | 289 } |
| 192 } | 290 } |
| 193 surfaces.deleteAll(); | 291 targets.deleteAll(); |
| 194 } | 292 } |
| 195 | 293 |
| 196 return 0; | 294 return 0; |
| 197 } | 295 } |
| 198 | 296 |
| 199 #if !defined SK_BUILD_FOR_IOS | 297 #if !defined SK_BUILD_FOR_IOS |
| 200 int main(int argc, char * const argv[]) { | 298 int main(int argc, char * const argv[]) { |
| 201 return tool_main(argc, (char**) argv); | 299 return tool_main(argc, (char**) argv); |
| 202 } | 300 } |
| 203 #endif | 301 #endif |
| OLD | NEW |