OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "GrCaps.h" | 8 #include "GrCaps.h" |
9 #include "GrContextFactory.h" | 9 #include "GrContextFactory.h" |
10 #include "Benchmark.h" | 10 #include "Benchmark.h" |
11 #include "ResultsWriter.h" | 11 #include "ResultsWriter.h" |
12 #include "SkCommandLineFlags.h" | 12 #include "SkCommandLineFlags.h" |
13 #include "SkOSFile.h" | 13 #include "SkOSFile.h" |
14 #include "SkStream.h" | 14 #include "SkStream.h" |
15 #include "SkSurface.h" | 15 #include "SkSurface.h" |
16 #include "SkTime.h" | 16 #include "SkTime.h" |
| 17 #include "SkTLList.h" |
| 18 #include "SkThreadUtils.h" |
17 #include "Stats.h" | 19 #include "Stats.h" |
18 #include "Timer.h" | 20 #include "Timer.h" |
19 #include "VisualSKPBench.h" | 21 #include "VisualSKPBench.h" |
20 #include "gl/GrGLDefines.h" | 22 #include "gl/GrGLDefines.h" |
| 23 #include "../private/SkMutex.h" |
| 24 #include "../private/SkSemaphore.h" |
| 25 #include "../private/SkGpuFenceSync.h" |
21 | 26 |
22 // posix only for now | 27 // posix only for now |
23 #include <unistd.h> | 28 #include <unistd.h> |
24 #include <sys/types.h> | 29 #include <sys/types.h> |
25 #include <sys/wait.h> | 30 #include <sys/wait.h> |
26 | 31 |
27 /* | 32 /* |
28 * This is an experimental GPU only benchmarking program. The initial implement
ation will only | 33 * This is an experimental GPU only benchmarking program. The initial implement
ation will only |
29 * support SKPs. | 34 * support SKPs. |
30 */ | 35 */ |
31 | 36 |
32 // To get image decoders linked in we have to do the below magic | 37 // To get image decoders linked in we have to do the below magic |
33 #include "SkForceLinking.h" | 38 #include "SkForceLinking.h" |
34 #include "SkImageDecoder.h" | 39 #include "SkImageDecoder.h" |
35 __SK_FORCE_IMAGE_DECODER_LINKING; | 40 __SK_FORCE_IMAGE_DECODER_LINKING; |
36 | 41 |
37 | |
38 static const int kAutoTuneLoops = 0; | 42 static const int kAutoTuneLoops = 0; |
39 | 43 |
40 static const int kDefaultLoops = | 44 static const int kDefaultLoops = |
41 #ifdef SK_DEBUG | 45 #ifdef SK_DEBUG |
42 1; | 46 1; |
43 #else | 47 #else |
44 kAutoTuneLoops; | 48 kAutoTuneLoops; |
45 #endif | 49 #endif |
46 | 50 |
47 static SkString loops_help_txt() { | 51 static SkString loops_help_txt() { |
(...skipping 13 matching lines...) Expand all Loading... |
61 "$ requires the end of the bench to match\n" | 65 "$ requires the end of the bench to match\n" |
62 "^ and $ requires an exact match\n" | 66 "^ and $ requires an exact match\n" |
63 "If a bench does not match any list entry,\n" | 67 "If a bench does not match any list entry,\n" |
64 "it is skipped unless some list entry starts with ~"); | 68 "it is skipped unless some list entry starts with ~"); |
65 DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU
allows to lag."); | 69 DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU
allows to lag."); |
66 DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); | 70 DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); |
67 DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this."); | 71 DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this."); |
68 DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str()); | 72 DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str()); |
69 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); | 73 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); |
70 DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs."); | 74 DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs."); |
| 75 DEFINE_bool(useBackgroundThread, true, "If false, kilobench will time cpu / gpu
work together"); |
| 76 DEFINE_bool(useMultiProcess, true, "If false, kilobench will run all tests in on
e process"); |
71 | 77 |
72 static SkString humanize(double ms) { | 78 static SkString humanize(double ms) { |
73 return HumanizeMs(ms); | 79 return HumanizeMs(ms); |
74 } | 80 } |
75 #define HUMANIZE(ms) humanize(ms).c_str() | 81 #define HUMANIZE(ms) humanize(ms).c_str() |
76 | 82 |
77 namespace kilobench { | 83 namespace kilobench { |
78 class BenchmarkStream { | 84 class BenchmarkStream { |
79 public: | 85 public: |
80 BenchmarkStream() : fCurrentSKP(0) { | 86 BenchmarkStream() : fCurrentSKP(0) { |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
139 | 145 |
140 return nullptr; | 146 return nullptr; |
141 } | 147 } |
142 | 148 |
143 SkTArray<SkString> fSKPs; | 149 SkTArray<SkString> fSKPs; |
144 int fCurrentSKP; | 150 int fCurrentSKP; |
145 }; | 151 }; |
146 | 152 |
147 struct GPUTarget { | 153 struct GPUTarget { |
148 void setup() { | 154 void setup() { |
149 this->gl->makeCurrent(); | 155 fGL->makeCurrent(); |
150 // Make sure we're done with whatever came before. | 156 // Make sure we're done with whatever came before. |
151 SK_GL(*this->gl, Finish()); | 157 SK_GL(*fGL, Finish()); |
152 } | 158 } |
153 | 159 |
154 SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; } | 160 SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; } |
155 | 161 |
156 void endTiming() { | 162 void endTiming(bool usePlatformSwapBuffers) { |
157 if (this->gl) { | 163 if (fGL) { |
158 SK_GL(*this->gl, Flush()); | 164 SK_GL(*fGL, Flush()); |
159 this->gl->swapBuffers(); | 165 if (usePlatformSwapBuffers) { |
| 166 fGL->swapBuffers(); |
| 167 } else { |
| 168 fGL->waitOnSyncOrSwap(); |
| 169 } |
160 } | 170 } |
161 } | 171 } |
162 void fence() { | 172 void finish() { |
163 SK_GL(*this->gl, Finish()); | 173 SK_GL(*fGL, Finish()); |
164 } | 174 } |
165 | 175 |
166 bool needsFrameTiming(int* maxFrameLag) const { | 176 bool needsFrameTiming(int* maxFrameLag) const { |
167 if (!this->gl->getMaxGpuFrameLag(maxFrameLag)) { | 177 if (!fGL->getMaxGpuFrameLag(maxFrameLag)) { |
168 // Frame lag is unknown. | 178 // Frame lag is unknown. |
169 *maxFrameLag = FLAGS_gpuFrameLag; | 179 *maxFrameLag = FLAGS_gpuFrameLag; |
170 } | 180 } |
171 return true; | 181 return true; |
172 } | 182 } |
173 | 183 |
174 bool init(Benchmark* bench, GrContextFactory* factory, bool useDfText, | 184 bool init(Benchmark* bench, GrContextFactory* factory, bool useDfText, |
175 GrContextFactory::GLContextType ctxType, | 185 GrContextFactory::GLContextType ctxType, |
176 GrContextFactory::GLContextOptions ctxOptions, int numSamples) { | 186 GrContextFactory::GLContextOptions ctxOptions, int numSamples) { |
177 GrContext* context = factory->get(ctxType, ctxOptions); | 187 GrContext* context = factory->get(ctxType, ctxOptions); |
178 int maxRTSize = context->caps()->maxRenderTargetSize(); | 188 int maxRTSize = context->caps()->maxRenderTargetSize(); |
179 SkImageInfo info = SkImageInfo::Make(SkTMin(bench->getSize().fX, maxRTSi
ze), | 189 SkImageInfo info = SkImageInfo::Make(SkTMin(bench->getSize().fX, maxRTSi
ze), |
180 SkTMin(bench->getSize().fY, maxRTSi
ze), | 190 SkTMin(bench->getSize().fY, maxRTSi
ze), |
181 kN32_SkColorType, kPremul_SkAlphaT
ype); | 191 kN32_SkColorType, kPremul_SkAlphaT
ype); |
182 uint32_t flags = useDfText ? SkSurfaceProps::kUseDeviceIndependentFonts_
Flag : | 192 uint32_t flags = useDfText ? SkSurfaceProps::kUseDeviceIndependentFonts_
Flag : |
183 0; | 193 0; |
184 SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType); | 194 SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType); |
185 this->surface.reset(SkSurface::NewRenderTarget(context, | 195 fSurface.reset(SkSurface::NewRenderTarget(context, |
186 SkSurface::kNo_Budgeted,
info, | 196 SkSurface::kNo_Budgeted, info, |
187 numSamples, &props)); | 197 numSamples, &props)); |
188 this->gl = factory->getContextInfo(ctxType, ctxOptions).fGLContext; | 198 fGL = factory->getContextInfo(ctxType, ctxOptions).fGLContext; |
189 if (!this->surface.get()) { | 199 if (!fSurface.get()) { |
190 return false; | 200 return false; |
191 } | 201 } |
192 | 202 |
193 // Kilobench should only be used on platforms with fence sync support | 203 // Kilobench should only be used on platforms with fence sync support |
194 SkASSERT(this->gl->fenceSyncSupport()); | 204 SkASSERT(fGL->fenceSyncSupport()); |
195 return true; | 205 return true; |
196 } | 206 } |
197 | 207 |
198 SkCanvas* getCanvas() const { | 208 SkCanvas* getCanvas() const { |
199 if (!surface.get()) { | 209 if (!fSurface.get()) { |
200 return nullptr; | 210 return nullptr; |
201 } | 211 } |
202 return surface->getCanvas(); | 212 return fSurface->getCanvas(); |
203 } | 213 } |
204 | 214 |
205 bool capturePixels(SkBitmap* bmp) { | 215 bool capturePixels(SkBitmap* bmp) { |
206 SkCanvas* canvas = this->getCanvas(); | 216 SkCanvas* canvas = this->getCanvas(); |
207 if (!canvas) { | 217 if (!canvas) { |
208 return false; | 218 return false; |
209 } | 219 } |
210 bmp->setInfo(canvas->imageInfo()); | 220 bmp->setInfo(canvas->imageInfo()); |
211 if (!canvas->readPixels(bmp, 0, 0)) { | 221 if (!canvas->readPixels(bmp, 0, 0)) { |
212 SkDebugf("Can't read canvas pixels.\n"); | 222 SkDebugf("Can't read canvas pixels.\n"); |
213 return false; | 223 return false; |
214 } | 224 } |
215 return true; | 225 return true; |
216 } | 226 } |
217 | 227 |
| 228 SkGLContext* gl() { return fGL; } |
| 229 |
218 private: | 230 private: |
219 //const Config config; | 231 SkGLContext* fGL; |
220 SkGLContext* gl; | 232 SkAutoTDelete<SkSurface> fSurface; |
221 SkAutoTDelete<SkSurface> surface; | |
222 }; | 233 }; |
223 | 234 |
224 static bool write_canvas_png(GPUTarget* target, const SkString& filename) { | 235 static bool write_canvas_png(GPUTarget* target, const SkString& filename) { |
225 | 236 |
226 if (filename.isEmpty()) { | 237 if (filename.isEmpty()) { |
227 return false; | 238 return false; |
228 } | 239 } |
229 if (target->getCanvas() && | 240 if (target->getCanvas() && |
230 kUnknown_SkColorType == target->getCanvas()->imageInfo().colorType()) { | 241 kUnknown_SkColorType == target->getCanvas()->imageInfo().colorType()) { |
231 return false; | 242 return false; |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
269 return 1; | 280 return 1; |
270 } | 281 } |
271 if (loops > FLAGS_maxLoops) { | 282 if (loops > FLAGS_maxLoops) { |
272 SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loo
ps, FLAGS_maxLoops); | 283 SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loo
ps, FLAGS_maxLoops); |
273 return FLAGS_maxLoops; | 284 return FLAGS_maxLoops; |
274 } | 285 } |
275 return loops; | 286 return loops; |
276 } | 287 } |
277 | 288 |
278 static double now_ms() { return SkTime::GetNSecs() * 1e-6; } | 289 static double now_ms() { return SkTime::GetNSecs() * 1e-6; } |
279 static double time(int loops, Benchmark* bench, GPUTarget* target) { | 290 |
| 291 struct TimingThread { |
| 292 TimingThread(SkGLContext* mainContext) |
| 293 : fFenceSync(mainContext->fenceSync()) |
| 294 , fMainContext(mainContext) |
| 295 , fDone(false) {} |
| 296 |
| 297 static void Loop(void* data) { |
| 298 TimingThread* timingThread = reinterpret_cast<TimingThread*>(data); |
| 299 timingThread->timingLoop(); |
| 300 } |
| 301 |
| 302 // To ensure waiting for the sync actually does something, we check to make
sure the we exceed |
| 303 // some small value |
| 304 const double kMinElapsed = 1e-6; |
| 305 bool sanity(double start) const { |
| 306 double elapsed = now_ms() - start; |
| 307 return elapsed > kMinElapsed; |
| 308 } |
| 309 |
| 310 void waitFence(SkPlatformGpuFence sync) { |
| 311 SkDEBUGCODE(double start = now_ms()); |
| 312 fFenceSync->waitFence(sync, false); |
| 313 SkASSERT(sanity(start)); |
| 314 } |
| 315 |
| 316 void timingLoop() { |
| 317 // Create a context which shares display lists with the main thread |
| 318 SkAutoTDelete<SkGLContext> glContext(SkCreatePlatformGLContext(kNone_GrG
LStandard, |
| 319 fMainCont
ext)); |
| 320 glContext->makeCurrent(); |
| 321 |
| 322 // Basic timing methodology is: |
| 323 // 1) Wait on semaphore until main thread indicates its time to start ti
ming the frame |
| 324 // 2) Wait on frame start sync, record time. This is start of the frame
. |
| 325 // 3) Wait on semaphore until main thread indicates its time to finish t
iming the frame |
| 326 // 4) Wait on frame end sync, record time. FrameEndTime - FrameStartTim
e = frame time |
| 327 // 5) Wait on semaphore until main thread indicates we should time the n
ext frame or quit |
| 328 while (true) { |
| 329 fSemaphore.wait(); |
| 330 |
| 331 // get start sync |
| 332 SkPlatformGpuFence startSync = this->popStartSync(); |
| 333 |
| 334 // wait on sync |
| 335 this->waitFence(startSync); |
| 336 double start = kilobench::now_ms(); |
| 337 |
| 338 // do we want to sleep here? |
| 339 // wait for end sync |
| 340 fSemaphore.wait(); |
| 341 |
| 342 // get end sync |
| 343 SkPlatformGpuFence endSync = this->popEndSync(); |
| 344 |
| 345 // wait on sync |
| 346 this->waitFence(endSync); |
| 347 double elapsed = kilobench::now_ms() - start; |
| 348 |
| 349 // No mutex needed, client won't touch timings until we're done |
| 350 fTimings.push_back(elapsed); |
| 351 |
| 352 // clean up fences |
| 353 fFenceSync->deleteFence(startSync); |
| 354 fFenceSync->deleteFence(endSync); |
| 355 |
| 356 fSemaphore.wait(); |
| 357 if (this->isDone()) { |
| 358 break; |
| 359 } |
| 360 } |
| 361 } |
| 362 |
| 363 void pushStartSync() { this->pushSync(&fFrameStartSyncs, &fFrameStartSyncsMu
tex); } |
| 364 |
| 365 SkPlatformGpuFence popStartSync() { |
| 366 return this->popSync(&fFrameStartSyncs, &fFrameStartSyncsMutex); |
| 367 } |
| 368 |
| 369 void pushEndSync() { this->pushSync(&fFrameEndSyncs, &fFrameEndSyncsMutex);
} |
| 370 |
| 371 SkPlatformGpuFence popEndSync() { return this->popSync(&fFrameEndSyncs, &fFr
ameEndSyncsMutex); } |
| 372 |
| 373 void setDone() { |
| 374 SkAutoMutexAcquire done(fDoneMutex); |
| 375 fDone = true; |
| 376 fSemaphore.signal(); |
| 377 } |
| 378 |
| 379 typedef SkTLList<SkPlatformGpuFence, 1> SyncQueue; |
| 380 |
| 381 void pushSync(SyncQueue* queue, SkMutex* mutex) { |
| 382 SkAutoMutexAcquire am(mutex); |
| 383 *queue->addToHead() = fFenceSync->insertFence(); |
| 384 fSemaphore.signal(); |
| 385 } |
| 386 |
| 387 SkPlatformGpuFence popSync(SyncQueue* queue, SkMutex* mutex) { |
| 388 SkAutoMutexAcquire am(mutex); |
| 389 SkPlatformGpuFence sync = *queue->head(); |
| 390 queue->popHead(); |
| 391 return sync; |
| 392 } |
| 393 |
| 394 bool isDone() { |
| 395 SkAutoMutexAcquire am1(fFrameStartSyncsMutex); |
| 396 SkAutoMutexAcquire done(fDoneMutex); |
| 397 if (fDone && fFrameStartSyncs.isEmpty()) { |
| 398 return true; |
| 399 } else { |
| 400 return false; |
| 401 } |
| 402 } |
| 403 |
| 404 const SkTArray<double>& timings() const { SkASSERT(fDone); return fTimings;
} |
| 405 |
| 406 private: |
| 407 SkGpuFenceSync* fFenceSync; |
| 408 SkSemaphore fSemaphore; |
| 409 SkMutex fFrameStartSyncsMutex; |
| 410 SyncQueue fFrameStartSyncs; |
| 411 SkMutex fFrameEndSyncsMutex; |
| 412 SyncQueue fFrameEndSyncs; |
| 413 SkTArray<double> fTimings; |
| 414 SkMutex fDoneMutex; |
| 415 SkGLContext* fMainContext; |
| 416 bool fDone; |
| 417 }; |
| 418 |
| 419 static double time(int loops, Benchmark* bench, GPUTarget* target, TimingThread*
timingThread) { |
280 SkCanvas* canvas = target->getCanvas(); | 420 SkCanvas* canvas = target->getCanvas(); |
281 if (canvas) { | 421 canvas->clear(SK_ColorWHITE); |
282 canvas->clear(SK_ColorWHITE); | 422 bench->preDraw(canvas); |
| 423 |
| 424 if (timingThread) { |
| 425 timingThread->pushStartSync(); |
283 } | 426 } |
284 bench->preDraw(canvas); | |
285 double start = now_ms(); | 427 double start = now_ms(); |
286 canvas = target->beginTiming(canvas); | 428 canvas = target->beginTiming(canvas); |
287 bench->draw(loops, canvas); | 429 bench->draw(loops, canvas); |
288 if (canvas) { | 430 canvas->flush(); |
289 canvas->flush(); | 431 target->endTiming(timingThread ? true : false); |
| 432 |
| 433 double elapsed = now_ms() - start; |
| 434 if (timingThread) { |
| 435 timingThread->pushEndSync(); |
| 436 timingThread->setDone(); |
290 } | 437 } |
291 target->endTiming(); | |
292 double elapsed = now_ms() - start; | |
293 bench->postDraw(canvas); | 438 bench->postDraw(canvas); |
294 return elapsed; | 439 return elapsed; |
295 } | 440 } |
296 | 441 |
| 442 // TODO For now we don't use the background timing thread to tune loops |
297 static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL
ag) { | 443 static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL
ag) { |
298 // First, figure out how many loops it'll take to get a frame up to FLAGS_gp
uMs. | 444 // First, figure out how many loops it'll take to get a frame up to FLAGS_gp
uMs. |
299 int loops = bench->calculateLoops(FLAGS_loops); | 445 int loops = bench->calculateLoops(FLAGS_loops); |
300 if (kAutoTuneLoops == loops) { | 446 if (kAutoTuneLoops == loops) { |
301 loops = 1; | 447 loops = 1; |
302 double elapsed = 0; | 448 double elapsed = 0; |
303 do { | 449 do { |
304 if (1<<30 == loops) { | 450 if (1<<30 == loops) { |
305 // We're about to wrap. Something's wrong with the bench. | 451 // We're about to wrap. Something's wrong with the bench. |
306 loops = 0; | 452 loops = 0; |
307 break; | 453 break; |
308 } | 454 } |
309 loops *= 2; | 455 loops *= 2; |
310 // If the GPU lets frames lag at all, we need to make sure we're tim
ing | 456 // If the GPU lets frames lag at all, we need to make sure we're tim
ing |
311 // _this_ round, not still timing last round. | 457 // _this_ round, not still timing last round. |
312 for (int i = 0; i < maxGpuFrameLag; i++) { | 458 for (int i = 0; i < maxGpuFrameLag; i++) { |
313 elapsed = time(loops, bench, target); | 459 elapsed = time(loops, bench, target, nullptr); |
314 } | 460 } |
315 } while (elapsed < FLAGS_gpuMs); | 461 } while (elapsed < FLAGS_gpuMs); |
316 | 462 |
317 // We've overshot at least a little. Scale back linearly. | 463 // We've overshot at least a little. Scale back linearly. |
318 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); | 464 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); |
319 loops = clamp_loops(loops); | 465 loops = clamp_loops(loops); |
320 | 466 |
321 // Make sure we're not still timing our calibration. | 467 // Make sure we're not still timing our calibration. |
322 target->fence(); | 468 target->finish(); |
323 } else { | 469 } else { |
324 loops = detect_forever_loops(loops); | 470 loops = detect_forever_loops(loops); |
325 } | 471 } |
326 | 472 |
327 // Pretty much the same deal as the calibration: do some warmup to make | 473 // Pretty much the same deal as the calibration: do some warmup to make |
328 // sure we're timing steady-state pipelined frames. | 474 // sure we're timing steady-state pipelined frames. |
329 for (int i = 0; i < maxGpuFrameLag - 1; i++) { | 475 for (int i = 0; i < maxGpuFrameLag - 1; i++) { |
330 time(loops, bench, target); | 476 time(loops, bench, target, nullptr); |
331 } | 477 } |
332 | 478 |
333 return loops; | 479 return loops; |
334 } | 480 } |
335 | 481 |
336 struct AutoSetupContextBenchAndTarget { | 482 struct AutoSetupContextBenchAndTarget { |
337 AutoSetupContextBenchAndTarget(Benchmark* bench) : fBenchmark(bench) { | 483 AutoSetupContextBenchAndTarget(Benchmark* bench) : fBenchmark(bench) { |
338 GrContextOptions grContextOpts; | 484 GrContextOptions grContextOpts; |
339 fCtxFactory.reset(new GrContextFactory(grContextOpts)); | 485 fCtxFactory.reset(new GrContextFactory(grContextOpts)); |
340 | 486 |
341 SkAssertResult(fTarget.init(bench, fCtxFactory, false, | 487 SkAssertResult(fTarget.init(bench, fCtxFactory, false, |
342 GrContextFactory::kNative_GLContextType, | 488 GrContextFactory::kNative_GLContextType, |
343 GrContextFactory::kNone_GLContextOptions, 0)
); | 489 GrContextFactory::kNone_GLContextOptions, 0)
); |
344 | 490 |
345 fCanvas = fTarget.getCanvas(); | 491 fCanvas = fTarget.getCanvas(); |
346 fTarget.setup(); | 492 fTarget.setup(); |
347 | 493 |
348 bench->perCanvasPreDraw(fCanvas); | 494 bench->perCanvasPreDraw(fCanvas); |
349 fTarget.needsFrameTiming(&fMaxFrameLag); | 495 fTarget.needsFrameTiming(&fMaxFrameLag); |
350 } | 496 } |
351 | 497 |
352 int getLoops() { return setup_gpu_bench(&fTarget, fBenchmark, fMaxFrameLag);
} | 498 int getLoops() { return setup_gpu_bench(&fTarget, fBenchmark, fMaxFrameLag);
} |
353 | 499 |
354 double timeSample(int loops) { | 500 double timeSample(int loops, TimingThread* timingThread) { |
355 for (int i = 0; i < fMaxFrameLag; i++) { | 501 for (int i = 0; i < fMaxFrameLag; i++) { |
356 time(loops, fBenchmark, &fTarget); | 502 time(loops, fBenchmark, &fTarget, timingThread); |
357 } | 503 } |
358 | 504 |
359 return time(loops, fBenchmark, &fTarget) / loops; | 505 return time(loops, fBenchmark, &fTarget, timingThread) / loops; |
360 } | 506 } |
| 507 |
361 void teardownBench() { fBenchmark->perCanvasPostDraw(fCanvas); } | 508 void teardownBench() { fBenchmark->perCanvasPostDraw(fCanvas); } |
362 | 509 |
363 SkAutoTDelete<GrContextFactory> fCtxFactory; | 510 SkAutoTDelete<GrContextFactory> fCtxFactory; |
364 GPUTarget fTarget; | 511 GPUTarget fTarget; |
365 SkCanvas* fCanvas; | 512 SkCanvas* fCanvas; |
366 Benchmark* fBenchmark; | 513 Benchmark* fBenchmark; |
367 int fMaxFrameLag; | 514 int fMaxFrameLag; |
368 }; | 515 }; |
369 | 516 |
370 int setup_loops(Benchmark* bench) { | 517 int setup_loops(Benchmark* bench) { |
371 AutoSetupContextBenchAndTarget ascbt(bench); | 518 AutoSetupContextBenchAndTarget ascbt(bench); |
372 int loops = ascbt.getLoops(); | 519 int loops = ascbt.getLoops(); |
373 ascbt.teardownBench(); | 520 ascbt.teardownBench(); |
374 | 521 |
375 if (!FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) { | 522 if (!FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) { |
376 SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], "gpu"); | 523 SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], "gpu"); |
377 pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName()
); | 524 pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName()
); |
378 pngFilename.append(".png"); | 525 pngFilename.append(".png"); |
379 write_canvas_png(&ascbt.fTarget, pngFilename); | 526 write_canvas_png(&ascbt.fTarget, pngFilename); |
380 } | 527 } |
381 return loops; | 528 return loops; |
382 } | 529 } |
383 | 530 |
384 double time_sample(Benchmark* bench, int loops) { | 531 struct Sample { |
| 532 double fCpu; |
| 533 double fGpu; |
| 534 }; |
| 535 |
| 536 Sample time_sample(Benchmark* bench, int loops) { |
385 AutoSetupContextBenchAndTarget ascbt(bench); | 537 AutoSetupContextBenchAndTarget ascbt(bench); |
386 double sample = ascbt.timeSample(loops); | 538 |
| 539 Sample sample; |
| 540 if (FLAGS_useBackgroundThread) { |
| 541 TimingThread timingThread(ascbt.fTarget.gl()); |
| 542 SkAutoTDelete<SkThread> nativeThread(new SkThread(TimingThread::Loop, &t
imingThread)); |
| 543 nativeThread->start(); |
| 544 sample.fCpu = ascbt.timeSample(loops, &timingThread); |
| 545 nativeThread->join(); |
| 546 |
| 547 // return the min |
| 548 double min = SK_ScalarMax; |
| 549 for (int i = 0; i < timingThread.timings().count(); i++) { |
| 550 min = SkTMin(min, timingThread.timings()[i]); |
| 551 } |
| 552 sample.fGpu = min; |
| 553 } else { |
| 554 sample.fCpu = ascbt.timeSample(loops, nullptr); |
| 555 } |
| 556 |
387 ascbt.teardownBench(); | 557 ascbt.teardownBench(); |
388 | 558 |
389 return sample; | 559 return sample; |
390 } | 560 } |
391 | 561 |
392 } // namespace kilobench | 562 } // namespace kilobench |
393 | 563 |
394 static const int kOutResultSize = 1024; | 564 static const int kOutResultSize = 1024; |
395 | 565 |
| 566 void printResult(const SkTArray<double>& samples, int loops, const char* name, c
onst char* mod) { |
| 567 SkString newName(name); |
| 568 newName.appendf("_%s", mod); |
| 569 Stats stats(samples); |
| 570 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean; |
| 571 SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n" |
| 572 , loops |
| 573 , HUMANIZE(stats.min) |
| 574 , HUMANIZE(stats.median) |
| 575 , HUMANIZE(stats.mean) |
| 576 , HUMANIZE(stats.max) |
| 577 , stddev_percent |
| 578 , stats.plot.c_str() |
| 579 , "gpu" |
| 580 , newName.c_str() |
| 581 ); |
| 582 } |
| 583 |
396 int kilobench_main() { | 584 int kilobench_main() { |
397 kilobench::BenchmarkStream benchStream; | 585 kilobench::BenchmarkStream benchStream; |
398 | 586 |
399 SkDebugf("loops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n", | 587 SkDebugf("loops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n", |
400 FLAGS_samples, "samples"); | 588 FLAGS_samples, "samples"); |
401 | 589 |
402 int descriptors[2]; | 590 int descriptors[2]; |
403 if (pipe(descriptors) != 0) { | 591 if (pipe(descriptors) != 0) { |
404 SkFAIL("Failed to open a pipe\n"); | 592 SkFAIL("Failed to open a pipe\n"); |
405 } | 593 } |
406 | 594 |
407 while (Benchmark* b = benchStream.next()) { | 595 while (Benchmark* b = benchStream.next()) { |
408 SkAutoTDelete<Benchmark> bench(b); | 596 SkAutoTDelete<Benchmark> bench(b); |
409 | 597 |
410 int loops; | 598 int loops = 1; |
411 SkTArray<double> samples; | 599 SkTArray<double> cpuSamples; |
| 600 SkTArray<double> gpuSamples; |
412 for (int i = 0; i < FLAGS_samples + 1; i++) { | 601 for (int i = 0; i < FLAGS_samples + 1; i++) { |
413 // We fork off a new process to setup the grcontext and run the test
while we wait | 602 // We fork off a new process to setup the grcontext and run the test
while we wait |
414 int childPid = fork(); | 603 if (FLAGS_useMultiProcess) { |
415 if (childPid > 0) { | 604 int childPid = fork(); |
416 char result[kOutResultSize]; | 605 if (childPid > 0) { |
417 if (read(descriptors[0], result, kOutResultSize) < 0) { | 606 char result[kOutResultSize]; |
418 SkFAIL("Failed to read from pipe\n"); | 607 if (read(descriptors[0], result, kOutResultSize) < 0) { |
| 608 SkFAIL("Failed to read from pipe\n"); |
| 609 } |
| 610 |
| 611 // if samples == 0 then parse # of loops |
| 612 // else parse float |
| 613 if (i == 0) { |
| 614 sscanf(result, "%d", &loops); |
| 615 } else { |
| 616 sscanf(result, "%lf %lf", &cpuSamples.push_back(), |
| 617 &gpuSamples.push_back()); |
| 618 } |
| 619 |
| 620 // wait until exit |
| 621 int status; |
| 622 waitpid(childPid, &status, 0); |
| 623 } else if (0 == childPid) { |
| 624 char result[kOutResultSize]; |
| 625 if (i == 0) { |
| 626 sprintf(result, "%d", kilobench::setup_loops(bench)); |
| 627 } else { |
| 628 kilobench::Sample sample = kilobench::time_sample(bench,
loops); |
| 629 sprintf(result, "%lf %lf", sample.fCpu, sample.fGpu); |
| 630 } |
| 631 |
| 632 // Make sure to write the null terminator |
| 633 if (write(descriptors[1], result, strlen(result) + 1) < 0) { |
| 634 SkFAIL("Failed to write to pipe\n"); |
| 635 } |
| 636 return 0; |
| 637 } else { |
| 638 SkFAIL("Fork failed\n"); |
419 } | 639 } |
420 | 640 } else { |
421 // if samples == 0 then parse # of loops | |
422 // else parse float | |
423 if (i == 0) { | 641 if (i == 0) { |
424 sscanf(result, "%d", &loops); | 642 loops = kilobench::setup_loops(bench); |
425 } else { | 643 } else { |
426 sscanf(result, "%lf", &samples.push_back()); | 644 kilobench::Sample sample = kilobench::time_sample(bench, loo
ps); |
| 645 cpuSamples.push_back(sample.fCpu); |
| 646 gpuSamples.push_back(sample.fGpu); |
427 } | 647 } |
428 | |
429 // wait until exit | |
430 int status; | |
431 waitpid(childPid, &status, 0); | |
432 } else if (0 == childPid) { | |
433 char result[kOutResultSize]; | |
434 if (i == 0) { | |
435 sprintf(result, "%d", kilobench::setup_loops(bench)); | |
436 } else { | |
437 sprintf(result, "%lf", kilobench::time_sample(bench, loops))
; | |
438 } | |
439 | |
440 // Make sure to write the null terminator | |
441 if (write(descriptors[1], result, strlen(result) + 1) < 0) { | |
442 SkFAIL("Failed to write to pipe\n"); | |
443 } | |
444 return 0; | |
445 } else { | |
446 SkFAIL("Fork failed\n"); | |
447 } | 648 } |
448 } | 649 } |
449 | 650 |
450 Stats stats(samples); | 651 printResult(cpuSamples, loops, bench->getUniqueName(), "cpu"); |
451 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean; | 652 if (FLAGS_useBackgroundThread) { |
452 SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n" | 653 printResult(gpuSamples, loops, bench->getUniqueName(), "gpu"); |
453 , loops | 654 } |
454 , HUMANIZE(stats.min) | |
455 , HUMANIZE(stats.median) | |
456 , HUMANIZE(stats.mean) | |
457 , HUMANIZE(stats.max) | |
458 , stddev_percent | |
459 , stats.plot.c_str() | |
460 , "gpu" | |
461 , bench->getUniqueName() | |
462 ); | |
463 | |
464 } | 655 } |
465 return 0; | 656 return 0; |
466 } | 657 } |
467 | 658 |
468 #if !defined SK_BUILD_FOR_IOS | 659 #if !defined SK_BUILD_FOR_IOS |
469 int main(int argc, char** argv) { | 660 int main(int argc, char** argv) { |
470 SkCommandLineFlags::Parse(argc, argv); | 661 SkCommandLineFlags::Parse(argc, argv); |
471 return kilobench_main(); | 662 return kilobench_main(); |
472 } | 663 } |
473 #endif | 664 #endif |
OLD | NEW |