OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "GpuTimer.h" | |
9 #include "GrContextFactory.h" | 8 #include "GrContextFactory.h" |
10 #include "SkCanvas.h" | 9 #include "SkCanvas.h" |
11 #include "SkOSFile.h" | 10 #include "SkOSFile.h" |
12 #include "SkPicture.h" | 11 #include "SkPicture.h" |
13 #include "SkStream.h" | 12 #include "SkStream.h" |
14 #include "SkSurface.h" | 13 #include "SkSurface.h" |
15 #include "SkSurfaceProps.h" | 14 #include "SkSurfaceProps.h" |
16 #include "picture_utils.h" | 15 #include "picture_utils.h" |
17 #include "flags/SkCommandLineFlags.h" | 16 #include "flags/SkCommandLineFlags.h" |
18 #include "flags/SkCommonFlagsConfig.h" | 17 #include "flags/SkCommonFlagsConfig.h" |
19 #include <stdlib.h> | 18 #include <stdlib.h> |
20 #include <algorithm> | 19 #include <algorithm> |
21 #include <array> | 20 #include <array> |
22 #include <chrono> | 21 #include <chrono> |
23 #include <cmath> | 22 #include <cmath> |
24 #include <vector> | 23 #include <vector> |
25 | 24 |
26 /** | 25 /** |
27 * This is a minimalist program whose sole purpose is to open an skp file, bench
mark it on a single | 26 * This is a minimalist program whose sole purpose is to open an skp file, bench
mark it on a single |
28 * config, and exit. It is intended to be used through skpbench.py rather than i
nvoked directly. | 27 * config, and exit. It is intended to be used through skpbench.py rather than i
nvoked directly. |
29 * Limiting the entire process to a single config/skp pair helps to keep the res
ults repeatable. | 28 * Limiting the entire process to a single config/skp pair helps to keep the res
ults repeatable. |
30 * | 29 * |
31 * No tiling, looping, or other fanciness is used; it just draws the skp whole i
nto a size-matched | 30 * No tiling, looping, or other fanciness is used; it just draws the skp whole i
nto a size-matched |
32 * render target and syncs the GPU after each draw. | 31 * render target and syncs the GPU after each draw. |
33 * | 32 * |
34 * Currently, only GPU configs are supported. | 33 * Currently, only GPU configs are supported. |
35 */ | 34 */ |
36 | 35 |
| 36 using sk_gpu_test::PlatformFence; |
| 37 using sk_gpu_test::kInvalidPlatformFence; |
| 38 using sk_gpu_test::FenceSync; |
| 39 |
37 DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark"); | 40 DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark"); |
38 DEFINE_int32(sampleMs, 50, "minimum duration of a sample"); | 41 DEFINE_int32(sampleMs, 50, "minimum duration of a sample"); |
39 DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)"); | |
40 DEFINE_bool(fps, false, "use fps instead of ms"); | 42 DEFINE_bool(fps, false, "use fps instead of ms"); |
41 DEFINE_string(skp, "", "path to a single .skp file to benchmark"); | 43 DEFINE_string(skp, "", "path to a single .skp file to benchmark"); |
42 DEFINE_string(png, "", "if set, save a .png proof to disk at this file location"
); | 44 DEFINE_string(png, "", "if set, save a .png proof to disk at this file location"
); |
43 DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)"); | 45 DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)"); |
44 DEFINE_bool(suppressHeader, false, "don't print a header row before the results"
); | 46 DEFINE_bool(suppressHeader, false, "don't print a header row before the results"
); |
45 | 47 |
46 static const char* header = | 48 static const char* header = |
47 " accum median max min stddev samples sample_ms clock met
ric config bench"; | 49 " accum median max min stddev samples sample_ms metric
config bench"; |
48 | 50 |
49 static const char* resultFormat = | 51 static const char* resultFormat = |
50 "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-5s %-6s %-9s %s"; | 52 "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-6s %-9s %s"; |
51 | 53 |
52 struct Sample { | 54 struct Sample { |
53 using duration = std::chrono::nanoseconds; | 55 using clock = std::chrono::high_resolution_clock; |
54 | 56 |
55 Sample() : fFrames(0), fDuration(0) {} | 57 Sample() : fFrames(0), fDuration(0) {} |
56 double seconds() const { return std::chrono::duration<double>(fDuration).cou
nt(); } | 58 double seconds() const { return std::chrono::duration<double>(fDuration).cou
nt(); } |
57 double ms() const { return std::chrono::duration<double, std::milli>(fDurati
on).count(); } | 59 double ms() const { return std::chrono::duration<double, std::milli>(fDurati
on).count(); } |
58 double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->
ms() / fFrames; } | 60 double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->
ms() / fFrames; } |
59 static const char* metric() { return FLAGS_fps ? "fps" : "ms"; } | 61 static const char* metric() { return FLAGS_fps ? "fps" : "ms"; } |
60 | 62 |
61 int fFrames; | 63 int fFrames; |
62 duration fDuration; | 64 clock::duration fDuration; |
63 }; | 65 }; |
64 | 66 |
65 class GpuSync { | 67 class GpuSync { |
66 public: | 68 public: |
67 GpuSync(const sk_gpu_test::FenceSync* fenceSync); | 69 GpuSync(const FenceSync* fenceSync); |
68 ~GpuSync(); | 70 ~GpuSync(); |
69 | 71 |
70 void syncToPreviousFrame(); | 72 void syncToPreviousFrame(); |
71 | 73 |
72 private: | 74 private: |
73 void updateFence(); | 75 void updateFence(); |
74 | 76 |
75 const sk_gpu_test::FenceSync* const fFenceSync; | 77 const FenceSync* const fFenceSync; |
76 sk_gpu_test::PlatformFence fFence; | 78 PlatformFence fFence; |
77 }; | 79 }; |
78 | 80 |
79 enum class ExitErr { | 81 enum class ExitErr { |
80 kOk = 0, | 82 kOk = 0, |
81 kUsage = 64, | 83 kUsage = 64, |
82 kData = 65, | 84 kData = 65, |
83 kUnavailable = 69, | 85 kUnavailable = 69, |
84 kIO = 74, | 86 kIO = 74, |
85 kSoftware = 70 | 87 kSoftware = 70 |
86 }; | 88 }; |
87 | 89 |
88 static void draw_skp_and_flush(SkCanvas*, const SkPicture*); | 90 static void draw_skp_and_flush(SkCanvas*, const SkPicture*); |
89 static bool mkdir_p(const SkString& name); | 91 static bool mkdir_p(const SkString& name); |
90 static SkString join(const SkCommandLineFlags::StringArray&); | 92 static SkString join(const SkCommandLineFlags::StringArray&); |
91 static void exitf(ExitErr, const char* format, ...); | 93 static void exitf(ExitErr, const char* format, ...); |
92 | 94 |
93 static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* can
vas, | 95 static void run_benchmark(const FenceSync* fenceSync, SkCanvas* canvas, const Sk
Picture* skp, |
94 const SkPicture* skp, std::vector<Sample>* samples) { | 96 std::vector<Sample>* samples) { |
95 using clock = std::chrono::high_resolution_clock; | 97 using clock = Sample::clock; |
96 const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_samp
leMs); | 98 const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampl
eMs); |
97 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_durati
on); | 99 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_durati
on); |
98 | 100 |
99 draw_skp_and_flush(canvas, skp); | 101 draw_skp_and_flush(canvas, skp); |
100 GpuSync gpuSync(fenceSync); | 102 GpuSync gpuSync(fenceSync); |
101 | 103 |
102 draw_skp_and_flush(canvas, skp); | 104 draw_skp_and_flush(canvas, skp); |
103 gpuSync.syncToPreviousFrame(); | 105 gpuSync.syncToPreviousFrame(); |
104 | 106 |
105 clock::time_point now = clock::now(); | 107 clock::time_point now = clock::now(); |
106 const clock::time_point endTime = now + benchDuration; | 108 const clock::time_point endTime = now + benchDuration; |
107 | 109 |
108 do { | 110 do { |
109 clock::time_point sampleStart = now; | 111 clock::time_point sampleStart = now; |
110 samples->emplace_back(); | 112 samples->emplace_back(); |
111 Sample& sample = samples->back(); | 113 Sample& sample = samples->back(); |
112 | 114 |
113 do { | 115 do { |
114 draw_skp_and_flush(canvas, skp); | 116 draw_skp_and_flush(canvas, skp); |
115 gpuSync.syncToPreviousFrame(); | 117 gpuSync.syncToPreviousFrame(); |
116 | 118 |
117 now = clock::now(); | 119 now = clock::now(); |
118 sample.fDuration = now - sampleStart; | 120 sample.fDuration = now - sampleStart; |
119 ++sample.fFrames; | 121 ++sample.fFrames; |
120 } while (sample.fDuration < sampleDuration); | 122 } while (sample.fDuration < sampleDuration); |
121 } while (now < endTime || 0 == samples->size() % 2); | 123 } while (now < endTime || 0 == samples->size() % 2); |
122 } | 124 } |
123 | 125 |
124 static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer, | |
125 const sk_gpu_test::FenceSync* fenceSync, SkCa
nvas* canvas, | |
126 const SkPicture* skp, std::vector<Sample>* sa
mples) { | |
127 using sk_gpu_test::PlatformTimerQuery; | |
128 using clock = std::chrono::steady_clock; | |
129 const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampl
eMs); | |
130 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_durati
on); | |
131 | |
132 if (!gpuTimer->disjointSupport()) { | |
133 fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; " | |
134 "results may be unreliable\n"); | |
135 } | |
136 | |
137 draw_skp_and_flush(canvas, skp); | |
138 GpuSync gpuSync(fenceSync); | |
139 | |
140 gpuTimer->queueStart(); | |
141 draw_skp_and_flush(canvas, skp); | |
142 PlatformTimerQuery previousTime = gpuTimer->queueStop(); | |
143 gpuSync.syncToPreviousFrame(); | |
144 | |
145 clock::time_point now = clock::now(); | |
146 const clock::time_point endTime = now + benchDuration; | |
147 | |
148 do { | |
149 const clock::time_point sampleEndTime = now + sampleDuration; | |
150 samples->emplace_back(); | |
151 Sample& sample = samples->back(); | |
152 | |
153 do { | |
154 gpuTimer->queueStart(); | |
155 draw_skp_and_flush(canvas, skp); | |
156 PlatformTimerQuery time = gpuTimer->queueStop(); | |
157 gpuSync.syncToPreviousFrame(); | |
158 | |
159 switch (gpuTimer->checkQueryStatus(previousTime)) { | |
160 using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus; | |
161 case QueryStatus::kInvalid: | |
162 exitf(ExitErr::kUnavailable, "GPU timer failed"); | |
163 case QueryStatus::kPending: | |
164 exitf(ExitErr::kUnavailable, "timer query still not ready af
ter fence sync"); | |
165 case QueryStatus::kDisjoint: | |
166 if (FLAGS_verbosity >= 4) { | |
167 fprintf(stderr, "discarding timer query due to disjoint
operations.\n"); | |
168 } | |
169 break; | |
170 case QueryStatus::kAccurate: | |
171 sample.fDuration += gpuTimer->getTimeElapsed(previousTime); | |
172 ++sample.fFrames; | |
173 break; | |
174 } | |
175 gpuTimer->deleteQuery(previousTime); | |
176 previousTime = time; | |
177 now = clock::now(); | |
178 } while (now < sampleEndTime || 0 == sample.fFrames); | |
179 } while (now < endTime || 0 == samples->size() % 2); | |
180 | |
181 gpuTimer->deleteQuery(previousTime); | |
182 } | |
183 | |
184 void print_result(const std::vector<Sample>& samples, const char* config, const
char* bench) { | 126 void print_result(const std::vector<Sample>& samples, const char* config, const
char* bench) { |
185 if (0 == (samples.size() % 2)) { | 127 if (0 == (samples.size() % 2)) { |
186 exitf(ExitErr::kSoftware, "attempted to gather stats on even number of s
amples"); | 128 exitf(ExitErr::kSoftware, "attempted to gather stats on even number of s
amples"); |
187 } | 129 } |
188 | 130 |
189 Sample accum = Sample(); | 131 Sample accum = Sample(); |
190 std::vector<double> values; | 132 std::vector<double> values; |
191 values.reserve(samples.size()); | 133 values.reserve(samples.size()); |
192 for (const Sample& sample : samples) { | 134 for (const Sample& sample : samples) { |
193 accum.fFrames += sample.fFrames; | 135 accum.fFrames += sample.fFrames; |
194 accum.fDuration += sample.fDuration; | 136 accum.fDuration += sample.fDuration; |
195 values.push_back(sample.value()); | 137 values.push_back(sample.value()); |
196 } | 138 } |
197 std::sort(values.begin(), values.end()); | 139 std::sort(values.begin(), values.end()); |
198 | 140 |
199 const double accumValue = accum.value(); | 141 const double accumValue = accum.value(); |
200 double variance = 0; | 142 double variance = 0; |
201 for (double value : values) { | 143 for (double value : values) { |
202 const double delta = value - accumValue; | 144 const double delta = value - accumValue; |
203 variance += delta * delta; | 145 variance += delta * delta; |
204 } | 146 } |
205 variance /= values.size(); | 147 variance /= values.size(); |
206 // Technically, this is the relative standard deviation. | 148 // Technically, this is the relative standard deviation. |
207 const double stddev = 100/*%*/ * sqrt(variance) / accumValue; | 149 const double stddev = 100/*%*/ * sqrt(variance) / accumValue; |
208 | 150 |
209 printf(resultFormat, accumValue, values[values.size() / 2], values.back(), v
alues.front(), | 151 printf(resultFormat, accumValue, values[values.size() / 2], values.back(), v
alues.front(), |
210 stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu"
, Sample::metric(), | 152 stddev, values.size(), FLAGS_sampleMs, Sample::metric(), config, benc
h); |
211 config, bench); | |
212 printf("\n"); | 153 printf("\n"); |
213 fflush(stdout); | 154 fflush(stdout); |
214 } | 155 } |
215 | 156 |
216 int main(int argc, char** argv) { | 157 int main(int argc, char** argv) { |
217 SkCommandLineFlags::SetUsage("Use skpbench.py instead. " | 158 SkCommandLineFlags::SetUsage("Use skpbench.py instead. " |
218 "You usually don't want to use this program dir
ectly."); | 159 "You usually don't want to use this program dir
ectly."); |
219 SkCommandLineFlags::Parse(argc, argv); | 160 SkCommandLineFlags::Parse(argc, argv); |
220 | 161 |
221 if (!FLAGS_suppressHeader) { | 162 if (!FLAGS_suppressHeader) { |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
299 if (FLAGS_sampleMs > 0) { | 240 if (FLAGS_sampleMs > 0) { |
300 // +1 because we might take one more sample in order to have an odd numb
er. | 241 // +1 because we might take one more sample in order to have an odd numb
er. |
301 samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sample
Ms); | 242 samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sample
Ms); |
302 } else { | 243 } else { |
303 samples.reserve(2 * FLAGS_duration); | 244 samples.reserve(2 * FLAGS_duration); |
304 } | 245 } |
305 | 246 |
306 // Run the benchmark. | 247 // Run the benchmark. |
307 SkCanvas* canvas = surface->getCanvas(); | 248 SkCanvas* canvas = surface->getCanvas(); |
308 canvas->translate(-skp->cullRect().x(), -skp->cullRect().y()); | 249 canvas->translate(-skp->cullRect().x(), -skp->cullRect().y()); |
309 if (!FLAGS_gpuClock) { | 250 run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples); |
310 run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples); | |
311 } else { | |
312 if (!testCtx->gpuTimingSupport()) { | |
313 exitf(ExitErr::kUnavailable, "GPU does not support timing"); | |
314 } | |
315 run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas
, skp.get(), | |
316 &samples); | |
317 } | |
318 print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile).
c_str()); | 251 print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile).
c_str()); |
319 | 252 |
320 // Save a proof (if one was requested). | 253 // Save a proof (if one was requested). |
321 if (!FLAGS_png.isEmpty()) { | 254 if (!FLAGS_png.isEmpty()) { |
322 SkBitmap bmp; | 255 SkBitmap bmp; |
323 bmp.setInfo(info); | 256 bmp.setInfo(info); |
324 if (!surface->getCanvas()->readPixels(&bmp, 0, 0)) { | 257 if (!surface->getCanvas()->readPixels(&bmp, 0, 0)) { |
325 exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png")
; | 258 exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png")
; |
326 } | 259 } |
327 const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]), | 260 const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]), |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
360 static void exitf(ExitErr err, const char* format, ...) { | 293 static void exitf(ExitErr err, const char* format, ...) { |
361 fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: "); | 294 fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: "); |
362 va_list args; | 295 va_list args; |
363 va_start(args, format); | 296 va_start(args, format); |
364 vfprintf(stderr, format, args); | 297 vfprintf(stderr, format, args); |
365 va_end(args); | 298 va_end(args); |
366 fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n":
".\n"); | 299 fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n":
".\n"); |
367 exit((int)err); | 300 exit((int)err); |
368 } | 301 } |
369 | 302 |
370 GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync) | 303 GpuSync::GpuSync(const FenceSync* fenceSync) |
371 : fFenceSync(fenceSync) { | 304 : fFenceSync(fenceSync) { |
372 this->updateFence(); | 305 this->updateFence(); |
373 } | 306 } |
374 | 307 |
375 GpuSync::~GpuSync() { | 308 GpuSync::~GpuSync() { |
376 fFenceSync->deleteFence(fFence); | 309 fFenceSync->deleteFence(fFence); |
377 } | 310 } |
378 | 311 |
379 void GpuSync::syncToPreviousFrame() { | 312 void GpuSync::syncToPreviousFrame() { |
380 if (sk_gpu_test::kInvalidFence == fFence) { | 313 if (kInvalidPlatformFence == fFence) { |
381 exitf(ExitErr::kSoftware, "attempted to sync with invalid fence"); | 314 exitf(ExitErr::kSoftware, "attempted to sync with invalid fence"); |
382 } | 315 } |
383 if (!fFenceSync->waitFence(fFence)) { | 316 if (!fFenceSync->waitFence(fFence)) { |
384 exitf(ExitErr::kUnavailable, "failed to wait for fence"); | 317 exitf(ExitErr::kUnavailable, "failed to wait for fence"); |
385 } | 318 } |
386 fFenceSync->deleteFence(fFence); | 319 fFenceSync->deleteFence(fFence); |
387 this->updateFence(); | 320 this->updateFence(); |
388 } | 321 } |
389 | 322 |
390 void GpuSync::updateFence() { | 323 void GpuSync::updateFence() { |
391 fFence = fFenceSync->insertFence(); | 324 fFence = fFenceSync->insertFence(); |
392 if (sk_gpu_test::kInvalidFence == fFence) { | 325 if (kInvalidPlatformFence == fFence) { |
393 exitf(ExitErr::kUnavailable, "failed to insert fence"); | 326 exitf(ExitErr::kUnavailable, "failed to insert fence"); |
394 } | 327 } |
395 } | 328 } |
OLD | NEW |