OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "GrContextFactory.h" | 8 #include "GrContextFactory.h" |
9 #include "SkCanvas.h" | 9 #include "SkCanvas.h" |
| 10 #include "SkGpuTimer.h" |
10 #include "SkOSFile.h" | 11 #include "SkOSFile.h" |
11 #include "SkPicture.h" | 12 #include "SkPicture.h" |
12 #include "SkStream.h" | 13 #include "SkStream.h" |
13 #include "SkSurface.h" | 14 #include "SkSurface.h" |
14 #include "SkSurfaceProps.h" | 15 #include "SkSurfaceProps.h" |
15 #include "picture_utils.h" | 16 #include "picture_utils.h" |
16 #include "flags/SkCommandLineFlags.h" | 17 #include "flags/SkCommandLineFlags.h" |
17 #include "flags/SkCommonFlagsConfig.h" | 18 #include "flags/SkCommonFlagsConfig.h" |
18 #include <stdlib.h> | 19 #include <stdlib.h> |
19 #include <algorithm> | 20 #include <algorithm> |
20 #include <array> | 21 #include <array> |
21 #include <chrono> | 22 #include <chrono> |
22 #include <cmath> | 23 #include <cmath> |
23 #include <vector> | 24 #include <vector> |
24 | 25 |
25 /** | 26 /** |
26 * This is a minimalist program whose sole purpose is to open an skp file, bench
mark it on a single | 27 * This is a minimalist program whose sole purpose is to open an skp file, bench
mark it on a single |
27 * config, and exit. It is intended to be used through skpbench.py rather than i
nvoked directly. | 28 * config, and exit. It is intended to be used through skpbench.py rather than i
nvoked directly. |
28 * Limiting the entire process to a single config/skp pair helps to keep the res
ults repeatable. | 29 * Limiting the entire process to a single config/skp pair helps to keep the res
ults repeatable. |
29 * | 30 * |
30 * No tiling, looping, or other fanciness is used; it just draws the skp whole i
nto a size-matched | 31 * No tiling, looping, or other fanciness is used; it just draws the skp whole i
nto a size-matched |
31 * render target and syncs the GPU after each draw. | 32 * render target and syncs the GPU after each draw. |
32 * | 33 * |
33 * Currently, only GPU configs are supported. | 34 * Currently, only GPU configs are supported. |
34 */ | 35 */ |
35 | 36 |
36 DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark"); | 37 DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark"); |
37 DEFINE_int32(sampleMs, 50, "minimum duration of a sample"); | 38 DEFINE_int32(sampleMs, 50, "minimum duration of a sample"); |
| 39 DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)"); |
38 DEFINE_bool(fps, false, "use fps instead of ms"); | 40 DEFINE_bool(fps, false, "use fps instead of ms"); |
39 DEFINE_string(skp, "", "path to a single .skp file to benchmark"); | 41 DEFINE_string(skp, "", "path to a single .skp file to benchmark"); |
40 DEFINE_string(png, "", "if set, save a .png proof to disk at this file location"
); | 42 DEFINE_string(png, "", "if set, save a .png proof to disk at this file location"
); |
41 DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)"); | 43 DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)"); |
42 DEFINE_bool(suppressHeader, false, "don't print a header row before the results"
); | 44 DEFINE_bool(suppressHeader, false, "don't print a header row before the results"
); |
43 | 45 |
44 static const char* header = | 46 static const char* header = |
45 " accum median max min stddev samples sample_ms metric
config bench"; | 47 " accum median max min stddev samples sample_ms clock met
ric config bench"; |
46 | 48 |
47 static const char* resultFormat = | 49 static const char* resultFormat = |
48 "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-6s %-9s %s"; | 50 "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-5s %-6s %-9s %s"; |
49 | 51 |
50 struct Sample { | 52 struct Sample { |
51 using clock = std::chrono::high_resolution_clock; | 53 using duration = std::chrono::nanoseconds; |
52 | 54 |
53 Sample() : fFrames(0), fDuration(0) {} | 55 Sample() : fFrames(0), fDuration(0) {} |
54 double seconds() const { return std::chrono::duration<double>(fDuration).cou
nt(); } | 56 double seconds() const { return std::chrono::duration<double>(fDuration).cou
nt(); } |
55 double ms() const { return std::chrono::duration<double, std::milli>(fDurati
on).count(); } | 57 double ms() const { return std::chrono::duration<double, std::milli>(fDurati
on).count(); } |
56 double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->
ms() / fFrames; } | 58 double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->
ms() / fFrames; } |
57 static const char* metric() { return FLAGS_fps ? "fps" : "ms"; } | 59 static const char* metric() { return FLAGS_fps ? "fps" : "ms"; } |
58 | 60 |
59 int fFrames; | 61 int fFrames; |
60 clock::duration fDuration; | 62 duration fDuration; |
61 }; | 63 }; |
62 | 64 |
63 class GpuSync { | 65 class GpuSync { |
64 public: | 66 public: |
65 GpuSync(const SkGpuFenceSync* fenceSync); | 67 GpuSync(const SkGpuFenceSync* fenceSync); |
66 ~GpuSync(); | 68 ~GpuSync(); |
67 | 69 |
68 void syncToPreviousFrame(); | 70 void syncToPreviousFrame(); |
69 | 71 |
70 private: | 72 private: |
(...skipping 12 matching lines...) Expand all Loading... |
83 kSoftware = 70 | 85 kSoftware = 70 |
84 }; | 86 }; |
85 | 87 |
86 static void draw_skp_and_flush(SkCanvas*, const SkPicture*); | 88 static void draw_skp_and_flush(SkCanvas*, const SkPicture*); |
87 static bool mkdir_p(const SkString& name); | 89 static bool mkdir_p(const SkString& name); |
88 static SkString join(const SkCommandLineFlags::StringArray&); | 90 static SkString join(const SkCommandLineFlags::StringArray&); |
89 static void exitf(ExitErr, const char* format, ...); | 91 static void exitf(ExitErr, const char* format, ...); |
90 | 92 |
91 static void run_benchmark(const SkGpuFenceSync* fenceSync, SkCanvas* canvas, con
st SkPicture* skp, | 93 static void run_benchmark(const SkGpuFenceSync* fenceSync, SkCanvas* canvas, con
st SkPicture* skp, |
92 std::vector<Sample>* samples) { | 94 std::vector<Sample>* samples) { |
93 using clock = Sample::clock; | 95 using clock = std::chrono::high_resolution_clock; |
94 const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampl
eMs); | 96 const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_samp
leMs); |
95 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_durati
on); | 97 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_durati
on); |
96 | 98 |
97 draw_skp_and_flush(canvas, skp); | 99 draw_skp_and_flush(canvas, skp); |
98 GpuSync gpuSync(fenceSync); | 100 GpuSync gpuSync(fenceSync); |
99 | 101 |
100 draw_skp_and_flush(canvas, skp); | 102 draw_skp_and_flush(canvas, skp); |
101 gpuSync.syncToPreviousFrame(); | 103 gpuSync.syncToPreviousFrame(); |
102 | 104 |
103 clock::time_point now = clock::now(); | 105 clock::time_point now = clock::now(); |
104 const clock::time_point endTime = now + benchDuration; | 106 const clock::time_point endTime = now + benchDuration; |
105 | 107 |
106 do { | 108 do { |
107 clock::time_point sampleStart = now; | 109 clock::time_point sampleStart = now; |
108 samples->emplace_back(); | 110 samples->emplace_back(); |
109 Sample& sample = samples->back(); | 111 Sample& sample = samples->back(); |
110 | 112 |
111 do { | 113 do { |
112 draw_skp_and_flush(canvas, skp); | 114 draw_skp_and_flush(canvas, skp); |
113 gpuSync.syncToPreviousFrame(); | 115 gpuSync.syncToPreviousFrame(); |
114 | 116 |
115 now = clock::now(); | 117 now = clock::now(); |
116 sample.fDuration = now - sampleStart; | 118 sample.fDuration = now - sampleStart; |
117 ++sample.fFrames; | 119 ++sample.fFrames; |
118 } while (sample.fDuration < sampleDuration); | 120 } while (sample.fDuration < sampleDuration); |
119 } while (now < endTime || 0 == samples->size() % 2); | 121 } while (now < endTime || 0 == samples->size() % 2); |
120 } | 122 } |
121 | 123 |
| 124 static void run_gpu_time_benchmark(SkGpuTimer* gpuTimer, const SkGpuFenceSync* f
enceSync, |
| 125 SkCanvas* canvas, const SkPicture* skp, |
| 126 std::vector<Sample>* samples) { |
| 127 using clock = std::chrono::steady_clock; |
| 128 const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampl
eMs); |
| 129 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_durati
on); |
| 130 |
| 131 if (!gpuTimer->disjointSupport()) { |
| 132 fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; " |
| 133 "results may be unreliable\n"); |
| 134 } |
| 135 |
| 136 draw_skp_and_flush(canvas, skp); |
| 137 GpuSync gpuSync(fenceSync); |
| 138 |
| 139 gpuTimer->queueStart(); |
| 140 draw_skp_and_flush(canvas, skp); |
| 141 SkPlatformGpuTimerQuery previousTime = gpuTimer->queueStop(); |
| 142 gpuSync.syncToPreviousFrame(); |
| 143 |
| 144 clock::time_point now = clock::now(); |
| 145 const clock::time_point endTime = now + benchDuration; |
| 146 |
| 147 do { |
| 148 const clock::time_point sampleEndTime = now + sampleDuration; |
| 149 samples->emplace_back(); |
| 150 Sample& sample = samples->back(); |
| 151 |
| 152 do { |
| 153 gpuTimer->queueStart(); |
| 154 draw_skp_and_flush(canvas, skp); |
| 155 SkPlatformGpuTimerQuery time = gpuTimer->queueStop(); |
| 156 gpuSync.syncToPreviousFrame(); |
| 157 |
| 158 switch (gpuTimer->checkQueryStatus(previousTime)) { |
| 159 using QueryStatus = SkGpuTimer::QueryStatus; |
| 160 case QueryStatus::kInvalid: |
| 161 exitf(ExitErr::kUnavailable, "GPU timer failed"); |
| 162 case QueryStatus::kPending: |
| 163 exitf(ExitErr::kUnavailable, "timer query still not ready af
ter fence sync"); |
| 164 case QueryStatus::kDisjoint: |
| 165 if (FLAGS_verbosity >= 4) { |
| 166 fprintf(stderr, "discarding timer query due to disjoint
operations.\n"); |
| 167 } |
| 168 break; |
| 169 case QueryStatus::kAccurate: |
| 170 sample.fDuration += gpuTimer->getTimeElapsed(previousTime); |
| 171 ++sample.fFrames; |
| 172 break; |
| 173 } |
| 174 gpuTimer->deleteQuery(previousTime); |
| 175 previousTime = time; |
| 176 now = clock::now(); |
| 177 } while (now < sampleEndTime || 0 == sample.fFrames); |
| 178 } while (now < endTime || 0 == samples->size() % 2); |
| 179 |
| 180 gpuTimer->deleteQuery(previousTime); |
| 181 } |
| 182 |
122 void print_result(const std::vector<Sample>& samples, const char* config, const
char* bench) { | 183 void print_result(const std::vector<Sample>& samples, const char* config, const
char* bench) { |
123 if (0 == (samples.size() % 2)) { | 184 if (0 == (samples.size() % 2)) { |
124 exitf(ExitErr::kSoftware, "attempted to gather stats on even number of s
amples"); | 185 exitf(ExitErr::kSoftware, "attempted to gather stats on even number of s
amples"); |
125 } | 186 } |
126 | 187 |
127 Sample accum = Sample(); | 188 Sample accum = Sample(); |
128 std::vector<double> values; | 189 std::vector<double> values; |
129 values.reserve(samples.size()); | 190 values.reserve(samples.size()); |
130 for (const Sample& sample : samples) { | 191 for (const Sample& sample : samples) { |
131 accum.fFrames += sample.fFrames; | 192 accum.fFrames += sample.fFrames; |
132 accum.fDuration += sample.fDuration; | 193 accum.fDuration += sample.fDuration; |
133 values.push_back(sample.value()); | 194 values.push_back(sample.value()); |
134 } | 195 } |
135 std::sort(values.begin(), values.end()); | 196 std::sort(values.begin(), values.end()); |
136 | 197 |
137 const double accumValue = accum.value(); | 198 const double accumValue = accum.value(); |
138 double variance = 0; | 199 double variance = 0; |
139 for (double value : values) { | 200 for (double value : values) { |
140 const double delta = value - accumValue; | 201 const double delta = value - accumValue; |
141 variance += delta * delta; | 202 variance += delta * delta; |
142 } | 203 } |
143 variance /= values.size(); | 204 variance /= values.size(); |
144 // Technically, this is the relative standard deviation. | 205 // Technically, this is the relative standard deviation. |
145 const double stddev = 100/*%*/ * sqrt(variance) / accumValue; | 206 const double stddev = 100/*%*/ * sqrt(variance) / accumValue; |
146 | 207 |
147 printf(resultFormat, accumValue, values[values.size() / 2], values.back(), v
alues.front(), | 208 printf(resultFormat, accumValue, values[values.size() / 2], values.back(), v
alues.front(), |
148 stddev, values.size(), FLAGS_sampleMs, Sample::metric(), config, benc
h); | 209 stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu"
, Sample::metric(), |
| 210 config, bench); |
149 printf("\n"); | 211 printf("\n"); |
150 fflush(stdout); | 212 fflush(stdout); |
151 } | 213 } |
152 | 214 |
153 int main(int argc, char** argv) { | 215 int main(int argc, char** argv) { |
154 SkCommandLineFlags::SetUsage("Use skpbench.py instead. " | 216 SkCommandLineFlags::SetUsage("Use skpbench.py instead. " |
155 "You usually don't want to use this program dir
ectly."); | 217 "You usually don't want to use this program dir
ectly."); |
156 SkCommandLineFlags::Parse(argc, argv); | 218 SkCommandLineFlags::Parse(argc, argv); |
157 | 219 |
158 if (!FLAGS_suppressHeader) { | 220 if (!FLAGS_suppressHeader) { |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
236 if (FLAGS_sampleMs > 0) { | 298 if (FLAGS_sampleMs > 0) { |
237 // +1 because we might take one more sample in order to have an odd numb
er. | 299 // +1 because we might take one more sample in order to have an odd numb
er. |
238 samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sample
Ms); | 300 samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sample
Ms); |
239 } else { | 301 } else { |
240 samples.reserve(2 * FLAGS_duration); | 302 samples.reserve(2 * FLAGS_duration); |
241 } | 303 } |
242 | 304 |
243 // Run the benchmark. | 305 // Run the benchmark. |
244 SkCanvas* canvas = surface->getCanvas(); | 306 SkCanvas* canvas = surface->getCanvas(); |
245 canvas->translate(-skp->cullRect().x(), -skp->cullRect().y()); | 307 canvas->translate(-skp->cullRect().x(), -skp->cullRect().y()); |
246 run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples); | 308 if (!FLAGS_gpuClock) { |
| 309 run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples); |
| 310 } else { |
| 311 if (!testCtx->gpuTimingSupport()) { |
| 312 exitf(ExitErr::kUnavailable, "GPU does not support timing"); |
| 313 } |
| 314 run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas
, skp.get(), |
| 315 &samples); |
| 316 } |
247 print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile).
c_str()); | 317 print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile).
c_str()); |
248 | 318 |
249 // Save a proof (if one was requested). | 319 // Save a proof (if one was requested). |
250 if (!FLAGS_png.isEmpty()) { | 320 if (!FLAGS_png.isEmpty()) { |
251 SkBitmap bmp; | 321 SkBitmap bmp; |
252 bmp.setInfo(info); | 322 bmp.setInfo(info); |
253 if (!surface->getCanvas()->readPixels(&bmp, 0, 0)) { | 323 if (!surface->getCanvas()->readPixels(&bmp, 0, 0)) { |
254 exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png")
; | 324 exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png")
; |
255 } | 325 } |
256 const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]), | 326 const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]), |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
315 fFenceSync->deleteFence(fFence); | 385 fFenceSync->deleteFence(fFence); |
316 this->updateFence(); | 386 this->updateFence(); |
317 } | 387 } |
318 | 388 |
319 void GpuSync::updateFence() { | 389 void GpuSync::updateFence() { |
320 fFence = fFenceSync->insertFence(); | 390 fFence = fFenceSync->insertFence(); |
321 if (kInvalidPlatformGpuFence == fFence) { | 391 if (kInvalidPlatformGpuFence == fFence) { |
322 exitf(ExitErr::kUnavailable, "failed to insert fence"); | 392 exitf(ExitErr::kUnavailable, "failed to insert fence"); |
323 } | 393 } |
324 } | 394 } |
OLD | NEW |