Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(710)

Side by Side Diff: tools/skpbench/skpbench.cpp

Issue 2388433003: skpbench: add option for gpu timing (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "GrContextFactory.h" 8 #include "GrContextFactory.h"
9 #include "SkCanvas.h" 9 #include "SkCanvas.h"
10 #include "SkGpuTimer.h"
10 #include "SkOSFile.h" 11 #include "SkOSFile.h"
11 #include "SkPicture.h" 12 #include "SkPicture.h"
12 #include "SkStream.h" 13 #include "SkStream.h"
13 #include "SkSurface.h" 14 #include "SkSurface.h"
14 #include "SkSurfaceProps.h" 15 #include "SkSurfaceProps.h"
15 #include "picture_utils.h" 16 #include "picture_utils.h"
16 #include "flags/SkCommandLineFlags.h" 17 #include "flags/SkCommandLineFlags.h"
17 #include "flags/SkCommonFlagsConfig.h" 18 #include "flags/SkCommonFlagsConfig.h"
18 #include <stdlib.h> 19 #include <stdlib.h>
19 #include <algorithm> 20 #include <algorithm>
20 #include <array> 21 #include <array>
21 #include <chrono> 22 #include <chrono>
22 #include <cmath> 23 #include <cmath>
23 #include <vector> 24 #include <vector>
24 25
25 /** 26 /**
26 * This is a minimalist program whose sole purpose is to open an skp file, bench mark it on a single 27 * This is a minimalist program whose sole purpose is to open an skp file, bench mark it on a single
27 * config, and exit. It is intended to be used through skpbench.py rather than i nvoked directly. 28 * config, and exit. It is intended to be used through skpbench.py rather than i nvoked directly.
28 * Limiting the entire process to a single config/skp pair helps to keep the res ults repeatable. 29 * Limiting the entire process to a single config/skp pair helps to keep the res ults repeatable.
29 * 30 *
30 * No tiling, looping, or other fanciness is used; it just draws the skp whole i nto a size-matched 31 * No tiling, looping, or other fanciness is used; it just draws the skp whole i nto a size-matched
31 * render target and syncs the GPU after each draw. 32 * render target and syncs the GPU after each draw.
32 * 33 *
33 * Currently, only GPU configs are supported. 34 * Currently, only GPU configs are supported.
34 */ 35 */
35 36
36 DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark"); 37 DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark");
37 DEFINE_int32(sampleMs, 50, "minimum duration of a sample"); 38 DEFINE_int32(sampleMs, 50, "minimum duration of a sample");
39 DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
38 DEFINE_bool(fps, false, "use fps instead of ms"); 40 DEFINE_bool(fps, false, "use fps instead of ms");
39 DEFINE_string(skp, "", "path to a single .skp file to benchmark"); 41 DEFINE_string(skp, "", "path to a single .skp file to benchmark");
40 DEFINE_string(png, "", "if set, save a .png proof to disk at this file location" ); 42 DEFINE_string(png, "", "if set, save a .png proof to disk at this file location" );
41 DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)"); 43 DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)");
42 DEFINE_bool(suppressHeader, false, "don't print a header row before the results" ); 44 DEFINE_bool(suppressHeader, false, "don't print a header row before the results" );
43 45
44 static const char* header = 46 static const char* header =
45 " accum median max min stddev samples sample_ms metric config bench"; 47 " accum median max min stddev samples sample_ms clock met ric config bench";
46 48
47 static const char* resultFormat = 49 static const char* resultFormat =
48 "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-6s %-9s %s"; 50 "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-5s %-6s %-9s %s";
49 51
50 struct Sample { 52 struct Sample {
51 using clock = std::chrono::high_resolution_clock; 53 using duration = std::chrono::nanoseconds;
52 54
53 Sample() : fFrames(0), fDuration(0) {} 55 Sample() : fFrames(0), fDuration(0) {}
54 double seconds() const { return std::chrono::duration<double>(fDuration).cou nt(); } 56 double seconds() const { return std::chrono::duration<double>(fDuration).cou nt(); }
55 double ms() const { return std::chrono::duration<double, std::milli>(fDurati on).count(); } 57 double ms() const { return std::chrono::duration<double, std::milli>(fDurati on).count(); }
56 double value() const { return FLAGS_fps ? fFrames / this->seconds() : this-> ms() / fFrames; } 58 double value() const { return FLAGS_fps ? fFrames / this->seconds() : this-> ms() / fFrames; }
57 static const char* metric() { return FLAGS_fps ? "fps" : "ms"; } 59 static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }
58 60
59 int fFrames; 61 int fFrames;
60 clock::duration fDuration; 62 duration fDuration;
61 }; 63 };
62 64
63 class GpuSync { 65 class GpuSync {
64 public: 66 public:
65 GpuSync(const SkGpuFenceSync* fenceSync); 67 GpuSync(const SkGpuFenceSync* fenceSync);
66 ~GpuSync(); 68 ~GpuSync();
67 69
68 void syncToPreviousFrame(); 70 void syncToPreviousFrame();
69 71
70 private: 72 private:
(...skipping 12 matching lines...) Expand all
83 kSoftware = 70 85 kSoftware = 70
84 }; 86 };
85 87
86 static void draw_skp_and_flush(SkCanvas*, const SkPicture*); 88 static void draw_skp_and_flush(SkCanvas*, const SkPicture*);
87 static bool mkdir_p(const SkString& name); 89 static bool mkdir_p(const SkString& name);
88 static SkString join(const SkCommandLineFlags::StringArray&); 90 static SkString join(const SkCommandLineFlags::StringArray&);
89 static void exitf(ExitErr, const char* format, ...); 91 static void exitf(ExitErr, const char* format, ...);
90 92
91 static void run_benchmark(const SkGpuFenceSync* fenceSync, SkCanvas* canvas, con st SkPicture* skp, 93 static void run_benchmark(const SkGpuFenceSync* fenceSync, SkCanvas* canvas, con st SkPicture* skp,
92 std::vector<Sample>* samples) { 94 std::vector<Sample>* samples) {
93 using clock = Sample::clock; 95 using clock = std::chrono::high_resolution_clock;
94 const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampl eMs); 96 const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_samp leMs);
95 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_durati on); 97 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_durati on);
96 98
97 draw_skp_and_flush(canvas, skp); 99 draw_skp_and_flush(canvas, skp);
98 GpuSync gpuSync(fenceSync); 100 GpuSync gpuSync(fenceSync);
99 101
100 draw_skp_and_flush(canvas, skp); 102 draw_skp_and_flush(canvas, skp);
101 gpuSync.syncToPreviousFrame(); 103 gpuSync.syncToPreviousFrame();
102 104
103 clock::time_point now = clock::now(); 105 clock::time_point now = clock::now();
104 const clock::time_point endTime = now + benchDuration; 106 const clock::time_point endTime = now + benchDuration;
105 107
106 do { 108 do {
107 clock::time_point sampleStart = now; 109 clock::time_point sampleStart = now;
108 samples->emplace_back(); 110 samples->emplace_back();
109 Sample& sample = samples->back(); 111 Sample& sample = samples->back();
110 112
111 do { 113 do {
112 draw_skp_and_flush(canvas, skp); 114 draw_skp_and_flush(canvas, skp);
113 gpuSync.syncToPreviousFrame(); 115 gpuSync.syncToPreviousFrame();
114 116
115 now = clock::now(); 117 now = clock::now();
116 sample.fDuration = now - sampleStart; 118 sample.fDuration = now - sampleStart;
117 ++sample.fFrames; 119 ++sample.fFrames;
118 } while (sample.fDuration < sampleDuration); 120 } while (sample.fDuration < sampleDuration);
119 } while (now < endTime || 0 == samples->size() % 2); 121 } while (now < endTime || 0 == samples->size() % 2);
120 } 122 }
121 123
124 static void run_gpu_time_benchmark(SkGpuTimer* gpuTimer, const SkGpuFenceSync* f enceSync,
125 SkCanvas* canvas, const SkPicture* skp,
126 std::vector<Sample>* samples) {
127 using clock = std::chrono::steady_clock;
128 const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampl eMs);
129 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_durati on);
130
131 if (!gpuTimer->disjointSupport()) {
132 fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
133 "results may be unreliable\n");
134 }
135
136 draw_skp_and_flush(canvas, skp);
137 GpuSync gpuSync(fenceSync);
138
139 gpuTimer->queueStart();
140 draw_skp_and_flush(canvas, skp);
141 SkPlatformGpuTimerQuery previousTime = gpuTimer->queueStop();
142 gpuSync.syncToPreviousFrame();
143
144 clock::time_point now = clock::now();
145 const clock::time_point endTime = now + benchDuration;
146
147 do {
148 const clock::time_point sampleEndTime = now + sampleDuration;
149 samples->emplace_back();
150 Sample& sample = samples->back();
151
152 do {
153 gpuTimer->queueStart();
154 draw_skp_and_flush(canvas, skp);
155 SkPlatformGpuTimerQuery time = gpuTimer->queueStop();
156 gpuSync.syncToPreviousFrame();
157
158 switch (gpuTimer->checkQueryStatus(previousTime)) {
159 using QueryStatus = SkGpuTimer::QueryStatus;
160 case QueryStatus::kInvalid:
161 exitf(ExitErr::kUnavailable, "GPU timer failed");
162 case QueryStatus::kPending:
163 exitf(ExitErr::kUnavailable, "timer query still not ready af ter fence sync");
164 case QueryStatus::kDisjoint:
165 if (FLAGS_verbosity >= 4) {
166 fprintf(stderr, "discarding timer query due to disjoint operations.\n");
167 }
168 break;
169 case QueryStatus::kAccurate:
170 sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
171 ++sample.fFrames;
172 break;
173 }
174 gpuTimer->deleteQuery(previousTime);
175 previousTime = time;
176 now = clock::now();
177 } while (now < sampleEndTime || 0 == sample.fFrames);
178 } while (now < endTime || 0 == samples->size() % 2);
179
180 gpuTimer->deleteQuery(previousTime);
181 }
182
122 void print_result(const std::vector<Sample>& samples, const char* config, const char* bench) { 183 void print_result(const std::vector<Sample>& samples, const char* config, const char* bench) {
123 if (0 == (samples.size() % 2)) { 184 if (0 == (samples.size() % 2)) {
124 exitf(ExitErr::kSoftware, "attempted to gather stats on even number of s amples"); 185 exitf(ExitErr::kSoftware, "attempted to gather stats on even number of s amples");
125 } 186 }
126 187
127 Sample accum = Sample(); 188 Sample accum = Sample();
128 std::vector<double> values; 189 std::vector<double> values;
129 values.reserve(samples.size()); 190 values.reserve(samples.size());
130 for (const Sample& sample : samples) { 191 for (const Sample& sample : samples) {
131 accum.fFrames += sample.fFrames; 192 accum.fFrames += sample.fFrames;
132 accum.fDuration += sample.fDuration; 193 accum.fDuration += sample.fDuration;
133 values.push_back(sample.value()); 194 values.push_back(sample.value());
134 } 195 }
135 std::sort(values.begin(), values.end()); 196 std::sort(values.begin(), values.end());
136 197
137 const double accumValue = accum.value(); 198 const double accumValue = accum.value();
138 double variance = 0; 199 double variance = 0;
139 for (double value : values) { 200 for (double value : values) {
140 const double delta = value - accumValue; 201 const double delta = value - accumValue;
141 variance += delta * delta; 202 variance += delta * delta;
142 } 203 }
143 variance /= values.size(); 204 variance /= values.size();
144 // Technically, this is the relative standard deviation. 205 // Technically, this is the relative standard deviation.
145 const double stddev = 100/*%*/ * sqrt(variance) / accumValue; 206 const double stddev = 100/*%*/ * sqrt(variance) / accumValue;
146 207
147 printf(resultFormat, accumValue, values[values.size() / 2], values.back(), v alues.front(), 208 printf(resultFormat, accumValue, values[values.size() / 2], values.back(), v alues.front(),
148 stddev, values.size(), FLAGS_sampleMs, Sample::metric(), config, benc h); 209 stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu" , Sample::metric(),
210 config, bench);
149 printf("\n"); 211 printf("\n");
150 fflush(stdout); 212 fflush(stdout);
151 } 213 }
152 214
153 int main(int argc, char** argv) { 215 int main(int argc, char** argv) {
154 SkCommandLineFlags::SetUsage("Use skpbench.py instead. " 216 SkCommandLineFlags::SetUsage("Use skpbench.py instead. "
155 "You usually don't want to use this program dir ectly."); 217 "You usually don't want to use this program dir ectly.");
156 SkCommandLineFlags::Parse(argc, argv); 218 SkCommandLineFlags::Parse(argc, argv);
157 219
158 if (!FLAGS_suppressHeader) { 220 if (!FLAGS_suppressHeader) {
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
236 if (FLAGS_sampleMs > 0) { 298 if (FLAGS_sampleMs > 0) {
237 // +1 because we might take one more sample in order to have an odd numb er. 299 // +1 because we might take one more sample in order to have an odd numb er.
238 samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sample Ms); 300 samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sample Ms);
239 } else { 301 } else {
240 samples.reserve(2 * FLAGS_duration); 302 samples.reserve(2 * FLAGS_duration);
241 } 303 }
242 304
243 // Run the benchmark. 305 // Run the benchmark.
244 SkCanvas* canvas = surface->getCanvas(); 306 SkCanvas* canvas = surface->getCanvas();
245 canvas->translate(-skp->cullRect().x(), -skp->cullRect().y()); 307 canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
246 run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples); 308 if (!FLAGS_gpuClock) {
309 run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples);
310 } else {
311 if (!testCtx->gpuTimingSupport()) {
312 exitf(ExitErr::kUnavailable, "GPU does not support timing");
313 }
314 run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas , skp.get(),
315 &samples);
316 }
247 print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile). c_str()); 317 print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile). c_str());
248 318
249 // Save a proof (if one was requested). 319 // Save a proof (if one was requested).
250 if (!FLAGS_png.isEmpty()) { 320 if (!FLAGS_png.isEmpty()) {
251 SkBitmap bmp; 321 SkBitmap bmp;
252 bmp.setInfo(info); 322 bmp.setInfo(info);
253 if (!surface->getCanvas()->readPixels(&bmp, 0, 0)) { 323 if (!surface->getCanvas()->readPixels(&bmp, 0, 0)) {
254 exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png") ; 324 exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png") ;
255 } 325 }
256 const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]), 326 const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]),
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
315 fFenceSync->deleteFence(fFence); 385 fFenceSync->deleteFence(fFence);
316 this->updateFence(); 386 this->updateFence();
317 } 387 }
318 388
319 void GpuSync::updateFence() { 389 void GpuSync::updateFence() {
320 fFence = fFenceSync->insertFence(); 390 fFence = fFenceSync->insertFence();
321 if (kInvalidPlatformGpuFence == fFence) { 391 if (kInvalidPlatformGpuFence == fFence) {
322 exitf(ExitErr::kUnavailable, "failed to insert fence"); 392 exitf(ExitErr::kUnavailable, "failed to insert fence");
323 } 393 }
324 } 394 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698