OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
| 8 #include "GpuTimer.h" |
8 #include "GrContextFactory.h" | 9 #include "GrContextFactory.h" |
9 #include "SkCanvas.h" | 10 #include "SkCanvas.h" |
10 #include "SkOSFile.h" | 11 #include "SkOSFile.h" |
11 #include "SkPicture.h" | 12 #include "SkPicture.h" |
12 #include "SkStream.h" | 13 #include "SkStream.h" |
13 #include "SkSurface.h" | 14 #include "SkSurface.h" |
14 #include "SkSurfaceProps.h" | 15 #include "SkSurfaceProps.h" |
15 #include "picture_utils.h" | 16 #include "picture_utils.h" |
16 #include "flags/SkCommandLineFlags.h" | 17 #include "flags/SkCommandLineFlags.h" |
17 #include "flags/SkCommonFlagsConfig.h" | 18 #include "flags/SkCommonFlagsConfig.h" |
(...skipping 14 matching lines...) Expand all Loading... |
32 * | 33 * |
33 * Currently, only GPU configs are supported. | 34 * Currently, only GPU configs are supported. |
34 */ | 35 */ |
35 | 36 |
36 using sk_gpu_test::PlatformFence; | 37 using sk_gpu_test::PlatformFence; |
37 using sk_gpu_test::kInvalidPlatformFence; | 38 using sk_gpu_test::kInvalidPlatformFence; |
38 using sk_gpu_test::FenceSync; | 39 using sk_gpu_test::FenceSync; |
39 | 40 |
40 DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark"); | 41 DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark"); |
41 DEFINE_int32(sampleMs, 50, "minimum duration of a sample"); | 42 DEFINE_int32(sampleMs, 50, "minimum duration of a sample"); |
| 43 DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)"); |
42 DEFINE_bool(fps, false, "use fps instead of ms"); | 44 DEFINE_bool(fps, false, "use fps instead of ms"); |
43 DEFINE_string(skp, "", "path to a single .skp file to benchmark"); | 45 DEFINE_string(skp, "", "path to a single .skp file to benchmark"); |
44 DEFINE_string(png, "", "if set, save a .png proof to disk at this file location"
); | 46 DEFINE_string(png, "", "if set, save a .png proof to disk at this file location"
); |
45 DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)"); | 47 DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)"); |
46 DEFINE_bool(suppressHeader, false, "don't print a header row before the results"
); | 48 DEFINE_bool(suppressHeader, false, "don't print a header row before the results"
); |
47 | 49 |
| 50 using sk_gpu_test::FenceSync; |
| 51 using sk_gpu_test::PlatformFence; |
| 52 using sk_gpu_test::kInvalidPlatformFence; |
| 53 using sk_gpu_test::GpuTimer; |
| 54 using sk_gpu_test::PlatformGpuTimerQuery; |
| 55 |
48 static const char* header = | 56 static const char* header = |
49 " accum median max min stddev samples sample_ms metric
config bench"; | 57 " accum median max min stddev samples sample_ms clock met
ric config bench"; |
50 | 58 |
51 static const char* resultFormat = | 59 static const char* resultFormat = |
52 "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-6s %-9s %s"; | 60 "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-5s %-6s %-9s %s"; |
53 | 61 |
54 struct Sample { | 62 struct Sample { |
55 using clock = std::chrono::high_resolution_clock; | 63 using duration = std::chrono::nanoseconds; |
56 | 64 |
57 Sample() : fFrames(0), fDuration(0) {} | 65 Sample() : fFrames(0), fDuration(0) {} |
58 double seconds() const { return std::chrono::duration<double>(fDuration).cou
nt(); } | 66 double seconds() const { return std::chrono::duration<double>(fDuration).cou
nt(); } |
59 double ms() const { return std::chrono::duration<double, std::milli>(fDurati
on).count(); } | 67 double ms() const { return std::chrono::duration<double, std::milli>(fDurati
on).count(); } |
60 double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->
ms() / fFrames; } | 68 double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->
ms() / fFrames; } |
61 static const char* metric() { return FLAGS_fps ? "fps" : "ms"; } | 69 static const char* metric() { return FLAGS_fps ? "fps" : "ms"; } |
62 | 70 |
63 int fFrames; | 71 int fFrames; |
64 clock::duration fDuration; | 72 duration fDuration; |
65 }; | 73 }; |
66 | 74 |
67 class GpuSync { | 75 class GpuSync { |
68 public: | 76 public: |
69 GpuSync(const FenceSync* fenceSync); | 77 GpuSync(const FenceSync* fenceSync); |
70 ~GpuSync(); | 78 ~GpuSync(); |
71 | 79 |
72 void syncToPreviousFrame(); | 80 void syncToPreviousFrame(); |
73 | 81 |
74 private: | 82 private: |
(...skipping 12 matching lines...) Expand all Loading... |
87 kSoftware = 70 | 95 kSoftware = 70 |
88 }; | 96 }; |
89 | 97 |
90 static void draw_skp_and_flush(SkCanvas*, const SkPicture*); | 98 static void draw_skp_and_flush(SkCanvas*, const SkPicture*); |
91 static bool mkdir_p(const SkString& name); | 99 static bool mkdir_p(const SkString& name); |
92 static SkString join(const SkCommandLineFlags::StringArray&); | 100 static SkString join(const SkCommandLineFlags::StringArray&); |
93 static void exitf(ExitErr, const char* format, ...); | 101 static void exitf(ExitErr, const char* format, ...); |
94 | 102 |
95 static void run_benchmark(const FenceSync* fenceSync, SkCanvas* canvas, const Sk
Picture* skp, | 103 static void run_benchmark(const FenceSync* fenceSync, SkCanvas* canvas, const Sk
Picture* skp, |
96 std::vector<Sample>* samples) { | 104 std::vector<Sample>* samples) { |
97 using clock = Sample::clock; | 105 using clock = std::chrono::high_resolution_clock; |
98 const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampl
eMs); | 106 const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_samp
leMs); |
99 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_durati
on); | 107 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_durati
on); |
100 | 108 |
101 draw_skp_and_flush(canvas, skp); | 109 draw_skp_and_flush(canvas, skp); |
102 GpuSync gpuSync(fenceSync); | 110 GpuSync gpuSync(fenceSync); |
103 | 111 |
104 draw_skp_and_flush(canvas, skp); | 112 draw_skp_and_flush(canvas, skp); |
105 gpuSync.syncToPreviousFrame(); | 113 gpuSync.syncToPreviousFrame(); |
106 | 114 |
107 clock::time_point now = clock::now(); | 115 clock::time_point now = clock::now(); |
108 const clock::time_point endTime = now + benchDuration; | 116 const clock::time_point endTime = now + benchDuration; |
109 | 117 |
110 do { | 118 do { |
111 clock::time_point sampleStart = now; | 119 clock::time_point sampleStart = now; |
112 samples->emplace_back(); | 120 samples->emplace_back(); |
113 Sample& sample = samples->back(); | 121 Sample& sample = samples->back(); |
114 | 122 |
115 do { | 123 do { |
116 draw_skp_and_flush(canvas, skp); | 124 draw_skp_and_flush(canvas, skp); |
117 gpuSync.syncToPreviousFrame(); | 125 gpuSync.syncToPreviousFrame(); |
118 | 126 |
119 now = clock::now(); | 127 now = clock::now(); |
120 sample.fDuration = now - sampleStart; | 128 sample.fDuration = now - sampleStart; |
121 ++sample.fFrames; | 129 ++sample.fFrames; |
122 } while (sample.fDuration < sampleDuration); | 130 } while (sample.fDuration < sampleDuration); |
123 } while (now < endTime || 0 == samples->size() % 2); | 131 } while (now < endTime || 0 == samples->size() % 2); |
124 } | 132 } |
125 | 133 |
| 134 static void run_gpu_time_benchmark(GpuTimer* gpuTimer, const FenceSync* fenceSyn
c, SkCanvas* canvas, |
| 135 const SkPicture* skp, std::vector<Sample>* sa
mples) { |
| 136 using clock = std::chrono::steady_clock; |
| 137 const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampl
eMs); |
| 138 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_durati
on); |
| 139 |
| 140 if (!gpuTimer->disjointSupport()) { |
| 141 fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; " |
| 142 "results may be unreliable\n"); |
| 143 } |
| 144 |
| 145 draw_skp_and_flush(canvas, skp); |
| 146 GpuSync gpuSync(fenceSync); |
| 147 |
| 148 gpuTimer->queueStart(); |
| 149 draw_skp_and_flush(canvas, skp); |
| 150 PlatformGpuTimerQuery previousTime = gpuTimer->queueStop(); |
| 151 gpuSync.syncToPreviousFrame(); |
| 152 |
| 153 clock::time_point now = clock::now(); |
| 154 const clock::time_point endTime = now + benchDuration; |
| 155 |
| 156 do { |
| 157 const clock::time_point sampleEndTime = now + sampleDuration; |
| 158 samples->emplace_back(); |
| 159 Sample& sample = samples->back(); |
| 160 |
| 161 do { |
| 162 gpuTimer->queueStart(); |
| 163 draw_skp_and_flush(canvas, skp); |
| 164 PlatformGpuTimerQuery time = gpuTimer->queueStop(); |
| 165 gpuSync.syncToPreviousFrame(); |
| 166 |
| 167 switch (gpuTimer->checkQueryStatus(previousTime)) { |
| 168 using QueryStatus = GpuTimer::QueryStatus; |
| 169 case QueryStatus::kInvalid: |
| 170 exitf(ExitErr::kUnavailable, "GPU timer failed"); |
| 171 case QueryStatus::kPending: |
| 172 exitf(ExitErr::kUnavailable, "timer query still not ready af
ter fence sync"); |
| 173 case QueryStatus::kDisjoint: |
| 174 if (FLAGS_verbosity >= 4) { |
| 175 fprintf(stderr, "discarding timer query due to disjoint
operations.\n"); |
| 176 } |
| 177 break; |
| 178 case QueryStatus::kAccurate: |
| 179 sample.fDuration += gpuTimer->getTimeElapsed(previousTime); |
| 180 ++sample.fFrames; |
| 181 break; |
| 182 } |
| 183 gpuTimer->deleteQuery(previousTime); |
| 184 previousTime = time; |
| 185 now = clock::now(); |
| 186 } while (now < sampleEndTime || 0 == sample.fFrames); |
| 187 } while (now < endTime || 0 == samples->size() % 2); |
| 188 |
| 189 gpuTimer->deleteQuery(previousTime); |
| 190 } |
| 191 |
126 void print_result(const std::vector<Sample>& samples, const char* config, const
char* bench) { | 192 void print_result(const std::vector<Sample>& samples, const char* config, const
char* bench) { |
127 if (0 == (samples.size() % 2)) { | 193 if (0 == (samples.size() % 2)) { |
128 exitf(ExitErr::kSoftware, "attempted to gather stats on even number of s
amples"); | 194 exitf(ExitErr::kSoftware, "attempted to gather stats on even number of s
amples"); |
129 } | 195 } |
130 | 196 |
131 Sample accum = Sample(); | 197 Sample accum = Sample(); |
132 std::vector<double> values; | 198 std::vector<double> values; |
133 values.reserve(samples.size()); | 199 values.reserve(samples.size()); |
134 for (const Sample& sample : samples) { | 200 for (const Sample& sample : samples) { |
135 accum.fFrames += sample.fFrames; | 201 accum.fFrames += sample.fFrames; |
136 accum.fDuration += sample.fDuration; | 202 accum.fDuration += sample.fDuration; |
137 values.push_back(sample.value()); | 203 values.push_back(sample.value()); |
138 } | 204 } |
139 std::sort(values.begin(), values.end()); | 205 std::sort(values.begin(), values.end()); |
140 | 206 |
141 const double accumValue = accum.value(); | 207 const double accumValue = accum.value(); |
142 double variance = 0; | 208 double variance = 0; |
143 for (double value : values) { | 209 for (double value : values) { |
144 const double delta = value - accumValue; | 210 const double delta = value - accumValue; |
145 variance += delta * delta; | 211 variance += delta * delta; |
146 } | 212 } |
147 variance /= values.size(); | 213 variance /= values.size(); |
148 // Technically, this is the relative standard deviation. | 214 // Technically, this is the relative standard deviation. |
149 const double stddev = 100/*%*/ * sqrt(variance) / accumValue; | 215 const double stddev = 100/*%*/ * sqrt(variance) / accumValue; |
150 | 216 |
151 printf(resultFormat, accumValue, values[values.size() / 2], values.back(), v
alues.front(), | 217 printf(resultFormat, accumValue, values[values.size() / 2], values.back(), v
alues.front(), |
152 stddev, values.size(), FLAGS_sampleMs, Sample::metric(), config, benc
h); | 218 stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu"
, Sample::metric(), |
| 219 config, bench); |
153 printf("\n"); | 220 printf("\n"); |
154 fflush(stdout); | 221 fflush(stdout); |
155 } | 222 } |
156 | 223 |
157 int main(int argc, char** argv) { | 224 int main(int argc, char** argv) { |
158 SkCommandLineFlags::SetUsage("Use skpbench.py instead. " | 225 SkCommandLineFlags::SetUsage("Use skpbench.py instead. " |
159 "You usually don't want to use this program dir
ectly."); | 226 "You usually don't want to use this program dir
ectly."); |
160 SkCommandLineFlags::Parse(argc, argv); | 227 SkCommandLineFlags::Parse(argc, argv); |
161 | 228 |
162 if (!FLAGS_suppressHeader) { | 229 if (!FLAGS_suppressHeader) { |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
240 if (FLAGS_sampleMs > 0) { | 307 if (FLAGS_sampleMs > 0) { |
241 // +1 because we might take one more sample in order to have an odd numb
er. | 308 // +1 because we might take one more sample in order to have an odd numb
er. |
242 samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sample
Ms); | 309 samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sample
Ms); |
243 } else { | 310 } else { |
244 samples.reserve(2 * FLAGS_duration); | 311 samples.reserve(2 * FLAGS_duration); |
245 } | 312 } |
246 | 313 |
247 // Run the benchmark. | 314 // Run the benchmark. |
248 SkCanvas* canvas = surface->getCanvas(); | 315 SkCanvas* canvas = surface->getCanvas(); |
249 canvas->translate(-skp->cullRect().x(), -skp->cullRect().y()); | 316 canvas->translate(-skp->cullRect().x(), -skp->cullRect().y()); |
250 run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples); | 317 if (!FLAGS_gpuClock) { |
| 318 run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples); |
| 319 } else { |
| 320 if (!testCtx->gpuTimingSupport()) { |
| 321 exitf(ExitErr::kUnavailable, "GPU does not support timing"); |
| 322 } |
| 323 run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas
, skp.get(), |
| 324 &samples); |
| 325 } |
251 print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile).
c_str()); | 326 print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile).
c_str()); |
252 | 327 |
253 // Save a proof (if one was requested). | 328 // Save a proof (if one was requested). |
254 if (!FLAGS_png.isEmpty()) { | 329 if (!FLAGS_png.isEmpty()) { |
255 SkBitmap bmp; | 330 SkBitmap bmp; |
256 bmp.setInfo(info); | 331 bmp.setInfo(info); |
257 if (!surface->getCanvas()->readPixels(&bmp, 0, 0)) { | 332 if (!surface->getCanvas()->readPixels(&bmp, 0, 0)) { |
258 exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png")
; | 333 exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png")
; |
259 } | 334 } |
260 const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]), | 335 const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]), |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
319 fFenceSync->deleteFence(fFence); | 394 fFenceSync->deleteFence(fFence); |
320 this->updateFence(); | 395 this->updateFence(); |
321 } | 396 } |
322 | 397 |
323 void GpuSync::updateFence() { | 398 void GpuSync::updateFence() { |
324 fFence = fFenceSync->insertFence(); | 399 fFence = fFenceSync->insertFence(); |
325 if (kInvalidPlatformFence == fFence) { | 400 if (kInvalidPlatformFence == fFence) { |
326 exitf(ExitErr::kUnavailable, "failed to insert fence"); | 401 exitf(ExitErr::kUnavailable, "failed to insert fence"); |
327 } | 402 } |
328 } | 403 } |
OLD | NEW |