OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
| 8 #include <ctype.h> |
| 9 |
8 #include "Benchmark.h" | 10 #include "Benchmark.h" |
9 #include "CrashHandler.h" | 11 #include "CrashHandler.h" |
10 #include "Stats.h" | 12 #include "Stats.h" |
11 #include "Timer.h" | 13 #include "Timer.h" |
12 | 14 |
13 #include "SkCanvas.h" | 15 #include "SkCanvas.h" |
14 #include "SkCommandLineFlags.h" | 16 #include "SkCommandLineFlags.h" |
15 #include "SkForceLinking.h" | 17 #include "SkForceLinking.h" |
16 #include "SkGraphics.h" | 18 #include "SkGraphics.h" |
17 #include "SkString.h" | 19 #include "SkString.h" |
18 #include "SkSurface.h" | 20 #include "SkSurface.h" |
19 | 21 |
| 22 #if SK_SUPPORT_GPU |
| 23 #include "GrContextFactory.h" |
| 24 GrContextFactory gGrFactory; |
| 25 #endif |
| 26 |
20 __SK_FORCE_IMAGE_DECODER_LINKING; | 27 __SK_FORCE_IMAGE_DECODER_LINKING; |
21 | 28 |
22 DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); | 29 DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); |
23 DEFINE_int32(overheadLoops, 100000, "Loops to estimate timer overhead."); | 30 DEFINE_int32(overheadLoops, 100000, "Loops to estimate timer overhead."); |
24 DEFINE_double(overheadGoal, 0.0001, | 31 DEFINE_double(overheadGoal, 0.0001, |
25 "Loop until timer overhead is at most this fraction of our measurm
ents."); | 32 "Loop until timer overhead is at most this fraction of our measurm
ents."); |
26 DEFINE_string(match, "", "The usual filters on file names of benchmarks to measu
re."); | 33 DEFINE_string(match, "", "The usual filters on file names of benchmarks to measu
re."); |
27 DEFINE_bool2(quiet, q, false, "Print only bench name and minimum sample."); | 34 DEFINE_bool2(quiet, q, false, "Print only bench name and minimum sample."); |
28 DEFINE_bool2(verbose, v, false, "Print all samples."); | 35 DEFINE_bool2(verbose, v, false, "Print all samples."); |
29 DEFINE_string(config, "8888 nonrendering", | 36 DEFINE_string(config, "nonrendering 8888 gpu", "Configs to measure. Options: " |
30 "Configs to measure. Options: 565 8888 nonrendering"); | 37 "565 8888 gpu nonrendering debug nullgpu msaa4 msaa16 nvprmsaa4 nv
prmsaa16 angle"); |
| 38 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); |
| 39 DEFINE_int32(gpuFrameLag, 5, "Overestimate of maximum number of frames GPU allow
s to lag."); |
31 | 40 |
32 // TODO: GPU benches | |
33 | 41 |
34 static SkString humanize(double ms) { | 42 static SkString humanize(double ms) { |
35 if (ms > 1e+3) return SkStringPrintf("%.3gs", ms/1e3); | 43 if (ms > 1e+3) return SkStringPrintf("%.3gs", ms/1e3); |
36 if (ms < 1e-3) return SkStringPrintf("%.3gns", ms*1e6); | 44 if (ms < 1e-3) return SkStringPrintf("%.3gns", ms*1e6); |
37 if (ms < 1) return SkStringPrintf("%.3gµs", ms*1e3); | 45 if (ms < 1) return SkStringPrintf("%.3gµs", ms*1e3); |
38 return SkStringPrintf("%.3gms", ms); | 46 return SkStringPrintf("%.3gms", ms); |
39 } | 47 } |
40 | 48 |
| 49 static double time(int loops, Benchmark* bench, SkCanvas* canvas, SkGLContextHel
per* gl) { |
| 50 WallTimer timer; |
| 51 timer.start(); |
| 52 if (bench) { |
| 53 bench->draw(loops, canvas); |
| 54 } |
| 55 if (canvas) { |
| 56 canvas->flush(); |
| 57 } |
| 58 #if SK_SUPPORT_GPU |
| 59 if (gl) { |
| 60 SK_GL(*gl, Flush()); |
| 61 gl->swapBuffers(); |
| 62 } |
| 63 #endif |
| 64 timer.end(); |
| 65 return timer.fWall; |
| 66 } |
| 67 |
41 static double estimate_timer_overhead() { | 68 static double estimate_timer_overhead() { |
42 double overhead = 0; | 69 double overhead = 0; |
43 WallTimer timer; | |
44 for (int i = 0; i < FLAGS_overheadLoops; i++) { | 70 for (int i = 0; i < FLAGS_overheadLoops; i++) { |
45 timer.start(); | 71 overhead += time(1, NULL, NULL, NULL); |
46 timer.end(); | |
47 overhead += timer.fWall; | |
48 } | 72 } |
49 return overhead / FLAGS_overheadLoops; | 73 return overhead / FLAGS_overheadLoops; |
50 } | 74 } |
51 | 75 |
52 static void safe_flush(SkCanvas* canvas) { | 76 static int cpu_bench(const double overhead, Benchmark* bench, SkCanvas* canvas,
double* samples) { |
53 if (canvas) { | 77 // First figure out approximately how many loops of bench it takes to make o
verhead negligible. |
54 canvas->flush(); | 78 double bench_plus_overhead; |
55 } | 79 do { |
56 } | 80 bench_plus_overhead = time(1, bench, canvas, NULL); |
| 81 } while (bench_plus_overhead < overhead); // Shouldn't normally happen. |
57 | 82 |
58 static int guess_loops(double overhead, Benchmark* bench, SkCanvas* canvas) { | 83 // Later we'll just start and stop the timer once but loop N times. |
59 WallTimer timer; | |
60 | |
61 // Measure timer overhead and bench time together. | |
62 do { | |
63 timer.start(); | |
64 bench->draw(1, canvas); | |
65 safe_flush(canvas); | |
66 timer.end(); | |
67 } while (timer.fWall < overhead); // Shouldn't normally happen. | |
68 | |
69 // Later we'll just start and stop the timer once, but loop N times. | |
70 // We'll pick N to make timer overhead negligible: | 84 // We'll pick N to make timer overhead negligible: |
71 // | 85 // |
72 // Timer Overhead | 86 // overhead |
73 // ------------------------------- < FLAGS_overheadGoal | 87 // ------------------------- < FLAGS_overheadGoal |
74 // Timer Overhead + N * Bench Time | 88 // overhead + N * Bench Time |
75 // | 89 // |
76 // where timer.fWall ≈ Timer Overhead + Bench Time. | 90 // where bench_plus_overhead ≈ overhead + Bench Time. |
77 // | 91 // |
78 // Doing some math, we get: | 92 // Doing some math, we get: |
79 // | 93 // |
80 // (Timer Overhead / FLAGS_overheadGoal) - Timer Overhead | 94 // (overhead / FLAGS_overheadGoal) - overhead |
81 // ----------------------------------------------------- < N | 95 // ------------------------------------------ < N |
82 // (timer.fWall - Timer Overhead) | 96 // bench_plus_overhead - overhead) |
83 // | 97 // |
84 // Luckily, this also works well in practice. :) | 98 // Luckily, this also works well in practice. :) |
85 const double numer = overhead / FLAGS_overheadGoal - overhead; | 99 const double numer = overhead / FLAGS_overheadGoal - overhead; |
86 const double denom = timer.fWall - overhead; | 100 const double denom = bench_plus_overhead - overhead; |
87 return (int)ceil(numer / denom); | 101 const int loops = (int)ceil(numer / denom); |
| 102 |
| 103 for (int i = 0; i < FLAGS_samples; i++) { |
| 104 samples[i] = time(loops, bench, canvas, NULL) / loops; |
| 105 } |
| 106 return loops; |
88 } | 107 } |
89 | 108 |
90 static bool push_config_if_enabled(const char* config, SkTDArray<const char*>* c
onfigs) { | 109 #if SK_SUPPORT_GPU |
91 if (FLAGS_config.contains(config)) { | 110 static int gpu_bench(SkGLContextHelper* gl, |
92 configs->push(config); | 111 Benchmark* bench, |
93 return true; | 112 SkCanvas* canvas, |
| 113 double* samples) { |
| 114 // Make sure we're done with whatever came before. |
| 115 SK_GL(*gl, Finish); |
| 116 |
| 117 // First, figure out how many loops it'll take to get a frame up to FLAGS_gp
uMs. |
| 118 int loops = 1; |
| 119 double elapsed = 0; |
| 120 do { |
| 121 loops *= 2; |
| 122 // If the GPU lets frames lag at all, we need to make sure we're timing |
| 123 // _this_ round, not still timing last round. We force this by looping |
| 124 // more times than any reasonable GPU will allow frames to lag. |
| 125 for (int i = 0; i < FLAGS_gpuFrameLag; i++) { |
| 126 elapsed = time(loops, bench, canvas, gl); |
| 127 } |
| 128 } while (elapsed < FLAGS_gpuMs); |
| 129 |
| 130 // We've overshot at least a little. Scale back linearly. |
| 131 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); |
| 132 |
| 133 // Might as well make sure we're not still timing our calibration. |
| 134 SK_GL(*gl, Finish); |
| 135 |
| 136 // Pretty much the same deal as the calibration: do some warmup to make |
| 137 // sure we're timing steady-state pipelined frames. |
| 138 for (int i = 0; i < FLAGS_gpuFrameLag; i++) { |
| 139 time(loops, bench, canvas, gl); |
94 } | 140 } |
95 return false; | 141 |
| 142 // Now, actually do the timing! |
| 143 for (int i = 0; i < FLAGS_samples; i++) { |
| 144 samples[i] = time(loops, bench, canvas, gl) / loops; |
| 145 } |
| 146 return loops; |
| 147 } |
| 148 #endif |
| 149 |
| 150 static SkString to_lower(const char* str) { |
| 151 SkString lower(str); |
| 152 for (size_t i = 0; i < lower.size(); i++) { |
| 153 lower[i] = tolower(lower[i]); |
| 154 } |
| 155 return lower; |
96 } | 156 } |
97 | 157 |
98 static void create_surfaces(Benchmark* bench, | 158 struct Target { |
99 SkTDArray<SkSurface*>* surfaces, | 159 const char* config; |
100 SkTDArray<const char*>* configs) { | 160 Benchmark::Backend backend; |
| 161 SkAutoTDelete<SkSurface> surface; |
| 162 #if SK_SUPPORT_GPU |
| 163 SkGLContextHelper* gl; |
| 164 #endif |
| 165 }; |
101 | 166 |
102 if (bench->isSuitableFor(Benchmark::kNonRendering_Backend) | 167 // If bench is enabled for backend/config, returns a Target* for them, otherwise
NULL. |
103 && push_config_if_enabled("nonrendering", configs)) { | 168 static Target* is_enabled(Benchmark* bench, Benchmark::Backend backend, const ch
ar* config) { |
104 surfaces->push(NULL); | 169 if (!bench->isSuitableFor(backend)) { |
| 170 return NULL; |
105 } | 171 } |
106 | 172 |
107 if (bench->isSuitableFor(Benchmark::kRaster_Backend)) { | 173 for (int i = 0; i < FLAGS_config.count(); i++) { |
108 const int w = bench->getSize().fX, | 174 if (to_lower(FLAGS_config[i]).equals(config)) { |
109 h = bench->getSize().fY; | 175 Target* target = new Target; |
110 | 176 target->config = config; |
111 if (push_config_if_enabled("8888", configs)) { | 177 target->backend = backend; |
112 const SkImageInfo info = { w, h, kN32_SkColorType, kPremul_SkAlphaTy
pe }; | 178 return target; |
113 surfaces->push(SkSurface::NewRaster(info)); | |
114 } | |
115 | |
116 if (push_config_if_enabled("565", configs)) { | |
117 const SkImageInfo info = { w, h, kRGB_565_SkColorType, kOpaque_SkAlp
haType }; | |
118 surfaces->push(SkSurface::NewRaster(info)); | |
119 } | 179 } |
120 } | 180 } |
| 181 return NULL; |
| 182 } |
| 183 |
| 184 // Append all targets that are suitable for bench. |
| 185 static void create_targets(Benchmark* bench, SkTDArray<Target*>* targets) { |
| 186 const int w = bench->getSize().fX, |
| 187 h = bench->getSize().fY; |
| 188 const SkImageInfo _8888 = { w, h, kN32_SkColorType, kPremul_SkAlphaType
}, |
| 189 _565 = { w, h, kRGB_565_SkColorType, kOpaque_SkAlphaType
}; |
| 190 |
| 191 #define CPU_TARGET(config, backend, code) \ |
| 192 if (Target* t = is_enabled(bench, Benchmark::backend, #config)) { \ |
| 193 t->surface.reset(code); \ |
| 194 targets->push(t); \ |
| 195 } |
| 196 CPU_TARGET(nonrendering, kNonRendering_Backend, NULL) |
| 197 CPU_TARGET(8888, kRaster_Backend, SkSurface::NewRaster(_8888)) |
| 198 CPU_TARGET(565, kRaster_Backend, SkSurface::NewRaster(_565)) |
| 199 |
| 200 #if SK_SUPPORT_GPU |
| 201 #define GPU_TARGET(config, ctxType, info, samples)
\ |
| 202 if (Target* t = is_enabled(bench, Benchmark::kGPU_Backend, #config)) {
\ |
| 203 t->surface.reset(SkSurface::NewRenderTarget(gGrFactory.get(ctxType),
info, samples)); \ |
| 204 t->gl = gGrFactory.getGLContext(ctxType);
\ |
| 205 targets->push(t);
\ |
| 206 } |
| 207 GPU_TARGET(gpu, GrContextFactory::kNative_GLContextType, _8888, 0) |
| 208 GPU_TARGET(msaa4, GrContextFactory::kNative_GLContextType, _8888, 4) |
| 209 GPU_TARGET(msaa16, GrContextFactory::kNative_GLContextType, _8888, 16) |
| 210 GPU_TARGET(nvprmsaa4, GrContextFactory::kNVPR_GLContextType, _8888, 4) |
| 211 GPU_TARGET(nvprmsaa16, GrContextFactory::kNVPR_GLContextType, _8888, 16) |
| 212 GPU_TARGET(debug, GrContextFactory::kDebug_GLContextType, _8888, 0) |
| 213 GPU_TARGET(nullgpu, GrContextFactory::kNull_GLContextType, _8888, 0) |
| 214 #if SK_ANGLE |
| 215 GPU_TARGET(angle, GrContextFactory::kANGLE_GLContextType, _8888, 0) |
| 216 #endif |
| 217 #endif |
121 } | 218 } |
122 | 219 |
123 int tool_main(int argc, char** argv); | 220 int tool_main(int argc, char** argv); |
124 int tool_main(int argc, char** argv) { | 221 int tool_main(int argc, char** argv) { |
125 SetupCrashHandler(); | 222 SetupCrashHandler(); |
126 SkAutoGraphics ag; | 223 SkAutoGraphics ag; |
127 SkCommandLineFlags::Parse(argc, argv); | 224 SkCommandLineFlags::Parse(argc, argv); |
128 | 225 |
129 const double overhead = estimate_timer_overhead(); | 226 const double overhead = estimate_timer_overhead(); |
| 227 SkAutoTMalloc<double> samples(FLAGS_samples); |
| 228 |
| 229 // TODO: display add median, use it in --quiet mode |
130 | 230 |
131 if (FLAGS_verbose) { | 231 if (FLAGS_verbose) { |
132 // No header. | 232 // No header. |
133 } else if (FLAGS_quiet) { | 233 } else if (FLAGS_quiet) { |
134 SkDebugf("min\tbench\tconfig\n"); | 234 SkDebugf("min\tbench\tconfig\n"); |
135 } else { | 235 } else { |
136 SkDebugf("loops\tmin\tmean\tmax\tstddev\tbench\tconfig\n"); | 236 SkDebugf("loops\tmin\tmean\tmax\tstddev\tconfig\tbench\n"); |
137 } | 237 } |
138 | 238 |
139 for (const BenchRegistry* r = BenchRegistry::Head(); r != NULL; r = r->next(
)) { | 239 for (const BenchRegistry* r = BenchRegistry::Head(); r != NULL; r = r->next(
)) { |
140 SkAutoTDelete<Benchmark> bench(r->factory()(NULL)); | 240 SkAutoTDelete<Benchmark> bench(r->factory()(NULL)); |
141 if (SkCommandLineFlags::ShouldSkip(FLAGS_match, bench->getName())) { | 241 if (SkCommandLineFlags::ShouldSkip(FLAGS_match, bench->getName())) { |
142 continue; | 242 continue; |
143 } | 243 } |
144 | 244 |
145 SkTDArray<SkSurface*> surfaces; | 245 SkTDArray<Target*> targets; |
146 SkTDArray<const char*> configs; | 246 create_targets(bench.get(), &targets); |
147 create_surfaces(bench.get(), &surfaces, &configs); | |
148 | 247 |
149 bench->preDraw(); | 248 bench->preDraw(); |
150 for (int j = 0; j < surfaces.count(); j++) { | 249 for (int j = 0; j < targets.count(); j++) { |
151 SkCanvas* canvas = surfaces[j] ? surfaces[j]->getCanvas() : NULL; | 250 SkCanvas* canvas = targets[j]->surface.get() ? targets[j]->surface->
getCanvas() : NULL; |
152 const char* config = configs[j]; | |
153 | 251 |
154 bench->draw(1, canvas); // Just paranoid warmup. | 252 const int loops = |
155 safe_flush(canvas); | 253 #if SK_SUPPORT_GPU |
156 const int loops = guess_loops(overhead, bench.get(), canvas); | 254 Benchmark::kGPU_Backend == targets[j]->backend |
157 | 255 ? gpu_bench(targets[j]->gl, bench.get(), canvas, samples.get()) |
158 SkAutoTMalloc<double> samples(FLAGS_samples); | 256 : |
159 WallTimer timer; | 257 #endif |
160 for (int i = 0; i < FLAGS_samples; i++) { | 258 cpu_bench( overhead, bench.get(), canvas, samples.get()); |
161 timer.start(); | |
162 bench->draw(loops, canvas); | |
163 safe_flush(canvas); | |
164 timer.end(); | |
165 samples[i] = timer.fWall / loops; | |
166 } | |
167 | 259 |
168 Stats stats(samples.get(), FLAGS_samples); | 260 Stats stats(samples.get(), FLAGS_samples); |
169 | 261 |
| 262 const char* config = targets[j]->config; |
170 if (FLAGS_verbose) { | 263 if (FLAGS_verbose) { |
171 for (int i = 0; i < FLAGS_samples; i++) { | 264 for (int i = 0; i < FLAGS_samples; i++) { |
172 SkDebugf("%s ", humanize(samples[i]).c_str()); | 265 SkDebugf("%s ", humanize(samples[i]).c_str()); |
173 } | 266 } |
174 SkDebugf("%s\n", bench->getName()); | 267 SkDebugf("%s\n", bench->getName()); |
175 } else if (FLAGS_quiet) { | 268 } else if (FLAGS_quiet) { |
176 if (configs.count() == 1) { | 269 if (targets.count() == 1) { |
177 config = ""; // Only print the config if we run the same ben
ch on more than one. | 270 config = ""; // Only print the config if we run the same ben
ch on more than one. |
178 } | 271 } |
179 SkDebugf("%s\t%s\t%s\n", humanize(stats.min).c_str(), bench->get
Name(), config); | 272 SkDebugf("%s\t%s\t%s\n", humanize(stats.min).c_str(), bench->get
Name(), config); |
180 } else { | 273 } else { |
181 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean
; | 274 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean
; |
182 SkDebugf("%d\t%s\t%s\t%s\t%.0f%%\t%s\t%s\n" | 275 SkDebugf("%d\t%s\t%s\t%s\t%.0f%%\t%s\t%s\n" |
183 , loops | 276 , loops |
184 , humanize(stats.min).c_str() | 277 , humanize(stats.min).c_str() |
185 , humanize(stats.mean).c_str() | 278 , humanize(stats.mean).c_str() |
186 , humanize(stats.max).c_str() | 279 , humanize(stats.max).c_str() |
187 , stddev_percent | 280 , stddev_percent |
| 281 , config |
188 , bench->getName() | 282 , bench->getName() |
189 , config | |
190 ); | 283 ); |
191 } | 284 } |
192 } | 285 } |
193 surfaces.deleteAll(); | 286 targets.deleteAll(); |
194 } | 287 } |
195 | 288 |
196 return 0; | 289 return 0; |
197 } | 290 } |
198 | 291 |
199 #if !defined SK_BUILD_FOR_IOS | 292 #if !defined SK_BUILD_FOR_IOS |
200 int main(int argc, char * const argv[]) { | 293 int main(int argc, char * const argv[]) { |
201 return tool_main(argc, (char**) argv); | 294 return tool_main(argc, (char**) argv); |
202 } | 295 } |
203 #endif | 296 #endif |
OLD | NEW |